From 5e45211a64149b3c659b90ff2de6fa982a5a93ed Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sat, 4 May 2024 14:17:33 +0200
Subject: Adding upstream version 15.5.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 src/backend/commands/Makefile           |    66 +
 src/backend/commands/aggregatecmds.c    |   496 +
 src/backend/commands/alter.c            |  1061 ++
 src/backend/commands/amcmds.c           |   269 +
 src/backend/commands/analyze.c          |  3076 +++++
 src/backend/commands/async.c            |  2446 ++++
 src/backend/commands/cluster.c          |  1736 +++
 src/backend/commands/collationcmds.c    |   820 ++
 src/backend/commands/comment.c          |   459 +
 src/backend/commands/constraint.c       |   205 +
 src/backend/commands/conversioncmds.c   |   139 +
 src/backend/commands/copy.c             |   798 ++
 src/backend/commands/copyfrom.c         |  1624 +++
 src/backend/commands/copyfromparse.c    |  1921 +++
 src/backend/commands/copyto.c           |  1310 ++
 src/backend/commands/createas.c         |   637 +
 src/backend/commands/dbcommands.c       |  3285 +++++
 src/backend/commands/define.c           |   391 +
 src/backend/commands/discard.c          |    78 +
 src/backend/commands/dropcmds.c         |   493 +
 src/backend/commands/event_trigger.c    |  2182 ++++
 src/backend/commands/explain.c          |  5022 ++++++++
 src/backend/commands/extension.c        |  3417 ++++++
 src/backend/commands/foreigncmds.c      |  1617 +++
 src/backend/commands/functioncmds.c     |  2374 ++++
 src/backend/commands/indexcmds.c        |  4355 +++++++
 src/backend/commands/lockcmds.c         |   306 +
 src/backend/commands/matview.c          |   936 ++
 src/backend/commands/opclasscmds.c      |  1745 +++
 src/backend/commands/operatorcmds.c     |   552 +
 src/backend/commands/policy.c           |  1285 ++
 src/backend/commands/portalcmds.c       |   496 +
 src/backend/commands/prepare.c          |   729 ++
 src/backend/commands/proclang.c         |   239 +
 src/backend/commands/publicationcmds.c  |  2006 +++
 src/backend/commands/schemacmds.c       |   441 +
 src/backend/commands/seclabel.c         |   581 +
 src/backend/commands/sequence.c         |  1917 +++
 src/backend/commands/statscmds.c        |   898 ++
 src/backend/commands/subscriptioncmds.c |  1966 +++
 src/backend/commands/tablecmds.c        | 19402 ++++++++++++++++++++++++++++++
 src/backend/commands/tablespace.c       |  1595 +++
 src/backend/commands/trigger.c          |  6664 ++++++++++
 src/backend/commands/tsearchcmds.c      |  1759 +++
 src/backend/commands/typecmds.c         |  4495 +++++++
 src/backend/commands/user.c             |  1645 +++
 src/backend/commands/vacuum.c           |  2465 ++++
 src/backend/commands/vacuumparallel.c   |  1074 ++
 src/backend/commands/variable.c         |   935 ++
 src/backend/commands/view.c             |   604 +
 50 files changed, 95012 insertions(+)
 create mode 100644 src/backend/commands/Makefile
 create mode 100644 src/backend/commands/aggregatecmds.c
 create mode 100644 src/backend/commands/alter.c
 create mode 100644 src/backend/commands/amcmds.c
 create mode 100644 src/backend/commands/analyze.c
 create mode 100644 src/backend/commands/async.c
 create mode 100644 src/backend/commands/cluster.c
 create mode 100644 src/backend/commands/collationcmds.c
 create mode 100644 src/backend/commands/comment.c
 create mode 100644 src/backend/commands/constraint.c
 create mode 100644 src/backend/commands/conversioncmds.c
 create mode 100644 src/backend/commands/copy.c
 create mode 100644 src/backend/commands/copyfrom.c
 create mode 100644 src/backend/commands/copyfromparse.c
 create mode 100644 src/backend/commands/copyto.c
 create mode 100644 src/backend/commands/createas.c
 create mode 100644 src/backend/commands/dbcommands.c
 create mode 100644 src/backend/commands/define.c
 create mode 100644 src/backend/commands/discard.c
 create mode 100644 src/backend/commands/dropcmds.c
 create mode 100644 src/backend/commands/event_trigger.c
 create mode 100644 src/backend/commands/explain.c
 create mode 100644 src/backend/commands/extension.c
 create mode 100644 src/backend/commands/foreigncmds.c
 create mode 100644 src/backend/commands/functioncmds.c
 create mode 100644 src/backend/commands/indexcmds.c
 create mode 100644 src/backend/commands/lockcmds.c
 create mode 100644 src/backend/commands/matview.c
 create mode 100644 src/backend/commands/opclasscmds.c
 create mode 100644 src/backend/commands/operatorcmds.c
 create mode 100644 src/backend/commands/policy.c
 create mode 100644 src/backend/commands/portalcmds.c
 create mode 100644 src/backend/commands/prepare.c
 create mode 100644 src/backend/commands/proclang.c
 create mode 100644 src/backend/commands/publicationcmds.c
 create mode 100644 src/backend/commands/schemacmds.c
 create mode 100644 src/backend/commands/seclabel.c
 create mode 100644 src/backend/commands/sequence.c
 create mode 100644 src/backend/commands/statscmds.c
 create mode 100644 src/backend/commands/subscriptioncmds.c
 create mode 100644 src/backend/commands/tablecmds.c
 create mode 100644 src/backend/commands/tablespace.c
 create mode 100644 src/backend/commands/trigger.c
 create mode 100644 src/backend/commands/tsearchcmds.c
 create mode 100644 src/backend/commands/typecmds.c
 create mode 100644 src/backend/commands/user.c
 create mode 100644 src/backend/commands/vacuum.c
 create mode 100644 src/backend/commands/vacuumparallel.c
 create mode 100644 src/backend/commands/variable.c
 create mode 100644 src/backend/commands/view.c

(limited to 'src/backend/commands')

diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile
new file mode 100644
index 0000000..48f7348
--- /dev/null
+++ b/src/backend/commands/Makefile
@@ -0,0 +1,66 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+#    Makefile for backend/commands
+#
+# IDENTIFICATION
+#    src/backend/commands/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/commands
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = \
+	aggregatecmds.o \
+	alter.o \
+	amcmds.o \
+	analyze.o \
+	async.o \
+	cluster.o \
+	collationcmds.o \
+	comment.o \
+	constraint.o \
+	conversioncmds.o \
+	copy.o \
+	copyfrom.o \
+	copyfromparse.o \
+	copyto.o \
+	createas.o \
+	dbcommands.o \
+	define.o \
+	discard.o \
+	dropcmds.o \
+	event_trigger.o \
+	explain.o \
+	extension.o \
+	foreigncmds.o \
+	functioncmds.o \
+	indexcmds.o \
+	lockcmds.o \
+	matview.o \
+	opclasscmds.o \
+	operatorcmds.o \
+	policy.o \
+	portalcmds.o \
+	prepare.o \
+	proclang.o \
+	publicationcmds.o \
+	schemacmds.o \
+	seclabel.o \
+	sequence.o \
+	statscmds.o \
+	subscriptioncmds.o \
+	tablecmds.o \
+	tablespace.o \
+	trigger.o \
+	tsearchcmds.o \
+	typecmds.o \
+	user.o \
+	vacuum.o \
+	vacuumparallel.o \
+	variable.o \
+	view.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/commands/aggregatecmds.c b/src/backend/commands/aggregatecmds.c
new file mode 100644
index 0000000..010eca7
--- /dev/null
+++ b/src/backend/commands/aggregatecmds.c
@@ -0,0 +1,496 @@
+/*-------------------------------------------------------------------------
+ *
+ * aggregatecmds.c
+ *
+ *	  Routines for aggregate-manipulation commands
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/aggregatecmds.c
+ *
+ * DESCRIPTION
+ *	  The "DefineFoo" routines take the parse tree and pick out the
+ *	  appropriate arguments/flags, passing the results to the
+ *	  corresponding "FooDefine" routines (in src/catalog) that do
+ *	  the actual catalog-munging.  These routines also verify permission
+ *	  of the user to execute the command.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "catalog/dependency.h"
+#include "catalog/pg_aggregate.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_type.h"
+#include "commands/alter.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "parser/parse_type.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+
+static char extractModify(DefElem *defel);
+
+
+/*
+ *	DefineAggregate
+ *
+ * "oldstyle" signals the old (pre-8.2) style where the aggregate input type
+ * is specified by a BASETYPE element in the parameters.  Otherwise,
+ * "args" is a pair, whose first element is a list of FunctionParameter structs
+ * defining the agg's arguments (both direct and aggregated), and whose second
+ * element is an Integer node with the number of direct args, or -1 if this
+ * isn't an ordered-set aggregate.
+ * "parameters" is a list of DefElem representing the agg's definition clauses.
+ */
+ObjectAddress
+DefineAggregate(ParseState *pstate,
+				List *name,
+				List *args,
+				bool oldstyle,
+				List *parameters,
+				bool replace)
+{
+	char	   *aggName;
+	Oid			aggNamespace;
+	AclResult	aclresult;
+	char		aggKind = AGGKIND_NORMAL;
+	List	   *transfuncName = NIL;
+	List	   *finalfuncName = NIL;
+	List	   *combinefuncName = NIL;
+	List	   *serialfuncName = NIL;
+	List	   *deserialfuncName = NIL;
+	List	   *mtransfuncName = NIL;
+	List	   *minvtransfuncName = NIL;
+	List	   *mfinalfuncName = NIL;
+	bool		finalfuncExtraArgs = false;
+	bool		mfinalfuncExtraArgs = false;
+	char		finalfuncModify = 0;
+	char		mfinalfuncModify = 0;
+	List	   *sortoperatorName = NIL;
+	TypeName   *baseType = NULL;
+	TypeName   *transType = NULL;
+	TypeName   *mtransType = NULL;
+	int32		transSpace = 0;
+	int32		mtransSpace = 0;
+	char	   *initval = NULL;
+	char	   *minitval = NULL;
+	char	   *parallel = NULL;
+	int			numArgs;
+	int			numDirectArgs = 0;
+	oidvector  *parameterTypes;
+	ArrayType  *allParameterTypes;
+	ArrayType  *parameterModes;
+	ArrayType  *parameterNames;
+	List	   *parameterDefaults;
+	Oid			variadicArgType;
+	Oid			transTypeId;
+	Oid			mtransTypeId = InvalidOid;
+	char		transTypeType;
+	char		mtransTypeType = 0;
+	char		proparallel = PROPARALLEL_UNSAFE;
+	ListCell   *pl;
+
+	/* Convert list of names to a name and namespace */
+	aggNamespace = QualifiedNameGetCreationNamespace(name, &aggName);
+
+	/* Check we have creation rights in target namespace */
+	aclresult = pg_namespace_aclcheck(aggNamespace, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_SCHEMA,
+					   get_namespace_name(aggNamespace));
+
+	/* Deconstruct the output of the aggr_args grammar production */
+	if (!oldstyle)
+	{
+		Assert(list_length(args) == 2);
+		numDirectArgs = intVal(lsecond(args));
+		if (numDirectArgs >= 0)
+			aggKind = AGGKIND_ORDERED_SET;
+		else
+			numDirectArgs = 0;
+		args = linitial_node(List, args);
+	}
+
+	/* Examine aggregate's definition clauses */
+	foreach(pl, parameters)
+	{
+		DefElem    *defel = lfirst_node(DefElem, pl);
+
+		/*
+		 * sfunc1, stype1, and initcond1 are accepted as obsolete spellings
+		 * for sfunc, stype, initcond.
+		 */
+		if (strcmp(defel->defname, "sfunc") == 0)
+			transfuncName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "sfunc1") == 0)
+			transfuncName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "finalfunc") == 0)
+			finalfuncName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "combinefunc") == 0)
+			combinefuncName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "serialfunc") == 0)
+			serialfuncName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "deserialfunc") == 0)
+			deserialfuncName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "msfunc") == 0)
+			mtransfuncName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "minvfunc") == 0)
+			minvtransfuncName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "mfinalfunc") == 0)
+			mfinalfuncName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "finalfunc_extra") == 0)
+			finalfuncExtraArgs = defGetBoolean(defel);
+		else if (strcmp(defel->defname, "mfinalfunc_extra") == 0)
+			mfinalfuncExtraArgs = defGetBoolean(defel);
+		else if (strcmp(defel->defname, "finalfunc_modify") == 0)
+			finalfuncModify = extractModify(defel);
+		else if (strcmp(defel->defname, "mfinalfunc_modify") == 0)
+			mfinalfuncModify = extractModify(defel);
+		else if (strcmp(defel->defname, "sortop") == 0)
+			sortoperatorName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "basetype") == 0)
+			baseType = defGetTypeName(defel);
+		else if (strcmp(defel->defname, "hypothetical") == 0)
+		{
+			if (defGetBoolean(defel))
+			{
+				if (aggKind == AGGKIND_NORMAL)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+							 errmsg("only ordered-set aggregates can be hypothetical")));
+				aggKind = AGGKIND_HYPOTHETICAL;
+			}
+		}
+		else if (strcmp(defel->defname, "stype") == 0)
+			transType = defGetTypeName(defel);
+		else if (strcmp(defel->defname, "stype1") == 0)
+			transType = defGetTypeName(defel);
+		else if (strcmp(defel->defname, "sspace") == 0)
+			transSpace = defGetInt32(defel);
+		else if (strcmp(defel->defname, "mstype") == 0)
+			mtransType = defGetTypeName(defel);
+		else if (strcmp(defel->defname, "msspace") == 0)
+			mtransSpace = defGetInt32(defel);
+		else if (strcmp(defel->defname, "initcond") == 0)
+			initval = defGetString(defel);
+		else if (strcmp(defel->defname, "initcond1") == 0)
+			initval = defGetString(defel);
+		else if (strcmp(defel->defname, "minitcond") == 0)
+			minitval = defGetString(defel);
+		else if (strcmp(defel->defname, "parallel") == 0)
+			parallel = defGetString(defel);
+		else
+			ereport(WARNING,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("aggregate attribute \"%s\" not recognized",
+							defel->defname)));
+	}
+
+	/*
+	 * make sure we have our required definitions
+	 */
+	if (transType == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+				 errmsg("aggregate stype must be specified")));
+	if (transfuncName == NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+				 errmsg("aggregate sfunc must be specified")));
+
+	/*
+	 * if mtransType is given, mtransfuncName and minvtransfuncName must be as
+	 * well; if not, then none of the moving-aggregate options should have
+	 * been given.
+	 */
+	if (mtransType != NULL)
+	{
+		if (mtransfuncName == NIL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("aggregate msfunc must be specified when mstype is specified")));
+		if (minvtransfuncName == NIL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("aggregate minvfunc must be specified when mstype is specified")));
+	}
+	else
+	{
+		if (mtransfuncName != NIL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("aggregate msfunc must not be specified without mstype")));
+		if (minvtransfuncName != NIL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("aggregate minvfunc must not be specified without mstype")));
+		if (mfinalfuncName != NIL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("aggregate mfinalfunc must not be specified without mstype")));
+		if (mtransSpace != 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("aggregate msspace must not be specified without mstype")));
+		if (minitval != NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("aggregate minitcond must not be specified without mstype")));
+	}
+
+	/*
+	 * Default values for modify flags can only be determined once we know the
+	 * aggKind.
+	 */
+	if (finalfuncModify == 0)
+		finalfuncModify = (aggKind == AGGKIND_NORMAL) ? AGGMODIFY_READ_ONLY : AGGMODIFY_READ_WRITE;
+	if (mfinalfuncModify == 0)
+		mfinalfuncModify = (aggKind == AGGKIND_NORMAL) ? AGGMODIFY_READ_ONLY : AGGMODIFY_READ_WRITE;
+
+	/*
+	 * look up the aggregate's input datatype(s).
+	 */
+	if (oldstyle)
+	{
+		/*
+		 * Old style: use basetype parameter.  This supports aggregates of
+		 * zero or one input, with input type ANY meaning zero inputs.
+		 *
+		 * Historically we allowed the command to look like basetype = 'ANY'
+		 * so we must do a case-insensitive comparison for the name ANY. Ugh.
+		 */
+		Oid			aggArgTypes[1];
+
+		if (baseType == NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("aggregate input type must be specified")));
+
+		if (pg_strcasecmp(TypeNameToString(baseType), "ANY") == 0)
+		{
+			numArgs = 0;
+			aggArgTypes[0] = InvalidOid;
+		}
+		else
+		{
+			numArgs = 1;
+			aggArgTypes[0] = typenameTypeId(NULL, baseType);
+		}
+		parameterTypes = buildoidvector(aggArgTypes, numArgs);
+		allParameterTypes = NULL;
+		parameterModes = NULL;
+		parameterNames = NULL;
+		parameterDefaults = NIL;
+		variadicArgType = InvalidOid;
+	}
+	else
+	{
+		/*
+		 * New style: args is a list of FunctionParameters (possibly zero of
+		 * 'em).  We share functioncmds.c's code for processing them.
+		 */
+		Oid			requiredResultType;
+
+		if (baseType != NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("basetype is redundant with aggregate input type specification")));
+
+		numArgs = list_length(args);
+		interpret_function_parameter_list(pstate,
+										  args,
+										  InvalidOid,
+										  OBJECT_AGGREGATE,
+										  &parameterTypes,
+										  NULL,
+										  &allParameterTypes,
+										  &parameterModes,
+										  &parameterNames,
+										  NULL,
+										  &parameterDefaults,
+										  &variadicArgType,
+										  &requiredResultType);
+		/* Parameter defaults are not currently allowed by the grammar */
+		Assert(parameterDefaults == NIL);
+		/* There shouldn't have been any OUT parameters, either */
+		Assert(requiredResultType == InvalidOid);
+	}
+
+	/*
+	 * look up the aggregate's transtype.
+	 *
+	 * transtype can't be a pseudo-type, since we need to be able to store
+	 * values of the transtype.  However, we can allow polymorphic transtype
+	 * in some cases (AggregateCreate will check).  Also, we allow "internal"
+	 * for functions that want to pass pointers to private data structures;
+	 * but allow that only to superusers, since you could crash the system (or
+	 * worse) by connecting up incompatible internal-using functions in an
+	 * aggregate.
+	 */
+	transTypeId = typenameTypeId(NULL, transType);
+	transTypeType = get_typtype(transTypeId);
+	if (transTypeType == TYPTYPE_PSEUDO &&
+		!IsPolymorphicType(transTypeId))
+	{
+		if (transTypeId == INTERNALOID && superuser())
+			 /* okay */ ;
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("aggregate transition data type cannot be %s",
+							format_type_be(transTypeId))));
+	}
+
+	if (serialfuncName && deserialfuncName)
+	{
+		/*
+		 * Serialization is only needed/allowed for transtype INTERNAL.
+		 */
+		if (transTypeId != INTERNALOID)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("serialization functions may be specified only when the aggregate transition data type is %s",
+							format_type_be(INTERNALOID))));
+	}
+	else if (serialfuncName || deserialfuncName)
+	{
+		/*
+		 * Cannot specify one function without the other.
+		 */
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+				 errmsg("must specify both or neither of serialization and deserialization functions")));
+	}
+
+	/*
+	 * If a moving-aggregate transtype is specified, look that up.  Same
+	 * restrictions as for transtype.
+	 */
+	if (mtransType)
+	{
+		mtransTypeId = typenameTypeId(NULL, mtransType);
+		mtransTypeType = get_typtype(mtransTypeId);
+		if (mtransTypeType == TYPTYPE_PSEUDO &&
+			!IsPolymorphicType(mtransTypeId))
+		{
+			if (mtransTypeId == INTERNALOID && superuser())
+				 /* okay */ ;
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+						 errmsg("aggregate transition data type cannot be %s",
+								format_type_be(mtransTypeId))));
+		}
+	}
+
+	/*
+	 * If we have an initval, and it's not for a pseudotype (particularly a
+	 * polymorphic type), make sure it's acceptable to the type's input
+	 * function.  We will store the initval as text, because the input
+	 * function isn't necessarily immutable (consider "now" for timestamp),
+	 * and we want to use the runtime not creation-time interpretation of the
+	 * value.  However, if it's an incorrect value it seems much more
+	 * user-friendly to complain at CREATE AGGREGATE time.
+	 */
+	if (initval && transTypeType != TYPTYPE_PSEUDO)
+	{
+		Oid			typinput,
+					typioparam;
+
+		getTypeInputInfo(transTypeId, &typinput, &typioparam);
+		(void) OidInputFunctionCall(typinput, initval, typioparam, -1);
+	}
+
+	/*
+	 * Likewise for moving-aggregate initval.
+	 */
+	if (minitval && mtransTypeType != TYPTYPE_PSEUDO)
+	{
+		Oid			typinput,
+					typioparam;
+
+		getTypeInputInfo(mtransTypeId, &typinput, &typioparam);
+		(void) OidInputFunctionCall(typinput, minitval, typioparam, -1);
+	}
+
+	if (parallel)
+	{
+		if (strcmp(parallel, "safe") == 0)
+			proparallel = PROPARALLEL_SAFE;
+		else if (strcmp(parallel, "restricted") == 0)
+			proparallel = PROPARALLEL_RESTRICTED;
+		else if (strcmp(parallel, "unsafe") == 0)
+			proparallel = PROPARALLEL_UNSAFE;
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("parameter \"parallel\" must be SAFE, RESTRICTED, or UNSAFE")));
+	}
+
+	/*
+	 * Most of the argument-checking is done inside of AggregateCreate
+	 */
+	return AggregateCreate(aggName, /* aggregate name */
+						   aggNamespace,	/* namespace */
+						   replace,
+						   aggKind,
+						   numArgs,
+						   numDirectArgs,
+						   parameterTypes,
+						   PointerGetDatum(allParameterTypes),
+						   PointerGetDatum(parameterModes),
+						   PointerGetDatum(parameterNames),
+						   parameterDefaults,
+						   variadicArgType,
+						   transfuncName,	/* step function name */
+						   finalfuncName,	/* final function name */
+						   combinefuncName, /* combine function name */
+						   serialfuncName,	/* serial function name */
+						   deserialfuncName,	/* deserial function name */
+						   mtransfuncName,	/* fwd trans function name */
+						   minvtransfuncName,	/* inv trans function name */
+						   mfinalfuncName,	/* final function name */
+						   finalfuncExtraArgs,
+						   mfinalfuncExtraArgs,
+						   finalfuncModify,
+						   mfinalfuncModify,
+						   sortoperatorName,	/* sort operator name */
+						   transTypeId, /* transition data type */
+						   transSpace,	/* transition space */
+						   mtransTypeId,	/* transition data type */
+						   mtransSpace, /* transition space */
+						   initval, /* initial condition */
+						   minitval,	/* initial condition */
+						   proparallel);	/* parallel safe? */
+}
+
+/*
+ * Convert the string form of [m]finalfunc_modify to the catalog representation
+ */
+static char
+extractModify(DefElem *defel)
+{
+	char	   *val = defGetString(defel);
+
+	if (strcmp(val, "read_only") == 0)
+		return AGGMODIFY_READ_ONLY;
+	if (strcmp(val, "shareable") == 0)
+		return AGGMODIFY_SHAREABLE;
+	if (strcmp(val, "read_write") == 0)
+		return AGGMODIFY_READ_WRITE;
+	ereport(ERROR,
+			(errcode(ERRCODE_SYNTAX_ERROR),
+			 errmsg("parameter \"%s\" must be READ_ONLY, SHAREABLE, or READ_WRITE",
+					defel->defname)));
+	return 0;					/* keep compiler quiet */
+}
diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c
new file mode 100644
index 0000000..5456b82
--- /dev/null
+++ b/src/backend/commands/alter.c
@@ -0,0 +1,1061 @@
+/*-------------------------------------------------------------------------
+ *
+ * alter.c
+ *	  Drivers for generic alter commands
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/alter.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/relation.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_conversion.h"
+#include "catalog/pg_event_trigger.h"
+#include "catalog/pg_foreign_data_wrapper.h"
+#include "catalog/pg_foreign_server.h"
+#include "catalog/pg_language.h"
+#include "catalog/pg_largeobject.h"
+#include "catalog/pg_largeobject_metadata.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_statistic_ext.h"
+#include "catalog/pg_subscription.h"
+#include "catalog/pg_ts_config.h"
+#include "catalog/pg_ts_dict.h"
+#include "catalog/pg_ts_parser.h"
+#include "catalog/pg_ts_template.h"
+#include "commands/alter.h"
+#include "commands/collationcmds.h"
+#include "commands/conversioncmds.h"
+#include "commands/dbcommands.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/extension.h"
+#include "commands/policy.h"
+#include "commands/proclang.h"
+#include "commands/publicationcmds.h"
+#include "commands/schemacmds.h"
+#include "commands/subscriptioncmds.h"
+#include "commands/tablecmds.h"
+#include "commands/tablespace.h"
+#include "commands/trigger.h"
+#include "commands/typecmds.h"
+#include "commands/user.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "rewrite/rewriteDefine.h"
+#include "tcop/utility.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+static Oid	AlterObjectNamespace_internal(Relation rel, Oid objid, Oid nspOid);
+
+/*
+ * Raise an error to the effect that an object of the given name is already
+ * present in the given namespace.
+ */
+static void
+report_name_conflict(Oid classId, const char *name)
+{
+	char	   *msgfmt;
+
+	switch (classId)
+	{
+		case EventTriggerRelationId:
+			msgfmt = gettext_noop("event trigger \"%s\" already exists");
+			break;
+		case ForeignDataWrapperRelationId:
+			msgfmt = gettext_noop("foreign-data wrapper \"%s\" already exists");
+			break;
+		case ForeignServerRelationId:
+			msgfmt = gettext_noop("server \"%s\" already exists");
+			break;
+		case LanguageRelationId:
+			msgfmt = gettext_noop("language \"%s\" already exists");
+			break;
+		case PublicationRelationId:
+			msgfmt = gettext_noop("publication \"%s\" already exists");
+			break;
+		case SubscriptionRelationId:
+			msgfmt = gettext_noop("subscription \"%s\" already exists");
+			break;
+		default:
+			elog(ERROR, "unsupported object class %u", classId);
+			break;
+	}
+
+	ereport(ERROR,
+			(errcode(ERRCODE_DUPLICATE_OBJECT),
+			 errmsg(msgfmt, name)));
+}
+
+static void
+report_namespace_conflict(Oid classId, const char *name, Oid nspOid)
+{
+	char	   *msgfmt;
+
+	Assert(OidIsValid(nspOid));
+
+	switch (classId)
+	{
+		case ConversionRelationId:
+			Assert(OidIsValid(nspOid));
+			msgfmt = gettext_noop("conversion \"%s\" already exists in schema \"%s\"");
+			break;
+		case StatisticExtRelationId:
+			Assert(OidIsValid(nspOid));
+			msgfmt = gettext_noop("statistics object \"%s\" already exists in schema \"%s\"");
+			break;
+		case TSParserRelationId:
+			Assert(OidIsValid(nspOid));
+			msgfmt = gettext_noop("text search parser \"%s\" already exists in schema \"%s\"");
+			break;
+		case TSDictionaryRelationId:
+			Assert(OidIsValid(nspOid));
+			msgfmt = gettext_noop("text search dictionary \"%s\" already exists in schema \"%s\"");
+			break;
+		case TSTemplateRelationId:
+			Assert(OidIsValid(nspOid));
+			msgfmt = gettext_noop("text search template \"%s\" already exists in schema \"%s\"");
+			break;
+		case TSConfigRelationId:
+			Assert(OidIsValid(nspOid));
+			msgfmt = gettext_noop("text search configuration \"%s\" already exists in schema \"%s\"");
+			break;
+		default:
+			elog(ERROR, "unsupported object class %u", classId);
+			break;
+	}
+
+	ereport(ERROR,
+			(errcode(ERRCODE_DUPLICATE_OBJECT),
+			 errmsg(msgfmt, name, get_namespace_name(nspOid))));
+}
+
+/*
+ * AlterObjectRename_internal
+ *
+ * Generic function to rename the given object, for simple cases (won't
+ * work for tables, nor other cases where we need to do more than change
+ * the name column of a single catalog entry).
+ *
+ * rel: catalog relation containing object (RowExclusiveLock'd by caller)
+ * objectId: OID of object to be renamed
+ * new_name: CString representation of new name
+ */
+static void
+AlterObjectRename_internal(Relation rel, Oid objectId, const char *new_name)
+{
+	Oid			classId = RelationGetRelid(rel);
+	int			oidCacheId = get_object_catcache_oid(classId);
+	int			nameCacheId = get_object_catcache_name(classId);
+	AttrNumber	Anum_name = get_object_attnum_name(classId);
+	AttrNumber	Anum_namespace = get_object_attnum_namespace(classId);
+	AttrNumber	Anum_owner = get_object_attnum_owner(classId);
+	HeapTuple	oldtup;
+	HeapTuple	newtup;
+	Datum		datum;
+	bool		isnull;
+	Oid			namespaceId;
+	Oid			ownerId;
+	char	   *old_name;
+	AclResult	aclresult;
+	Datum	   *values;
+	bool	   *nulls;
+	bool	   *replaces;
+	NameData	nameattrdata;
+
+	oldtup = SearchSysCache1(oidCacheId, ObjectIdGetDatum(objectId));
+	if (!HeapTupleIsValid(oldtup))
+		elog(ERROR, "cache lookup failed for object %u of catalog \"%s\"",
+			 objectId, RelationGetRelationName(rel));
+
+	datum = heap_getattr(oldtup, Anum_name,
+						 RelationGetDescr(rel), &isnull);
+	Assert(!isnull);
+	old_name = NameStr(*(DatumGetName(datum)));
+
+	/* Get OID of namespace */
+	if (Anum_namespace > 0)
+	{
+		datum = heap_getattr(oldtup, Anum_namespace,
+							 RelationGetDescr(rel), &isnull);
+		Assert(!isnull);
+		namespaceId = DatumGetObjectId(datum);
+	}
+	else
+		namespaceId = InvalidOid;
+
+	/* Permission checks ... superusers can always do it */
+	if (!superuser())
+	{
+		/* Fail if object does not have an explicit owner */
+		if (Anum_owner <= 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to rename %s",
+							getObjectDescriptionOids(classId, objectId))));
+
+		/* Otherwise, must be owner of the existing object */
+		datum = heap_getattr(oldtup, Anum_owner,
+							 RelationGetDescr(rel), &isnull);
+		Assert(!isnull);
+		ownerId = DatumGetObjectId(datum);
+
+		if (!has_privs_of_role(GetUserId(), DatumGetObjectId(ownerId)))
+			aclcheck_error(ACLCHECK_NOT_OWNER, get_object_type(classId, objectId),
+						   old_name);
+
+		/* User must have CREATE privilege on the namespace */
+		if (OidIsValid(namespaceId))
+		{
+			aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
+											  ACL_CREATE);
+			if (aclresult != ACLCHECK_OK)
+				aclcheck_error(aclresult, OBJECT_SCHEMA,
+							   get_namespace_name(namespaceId));
+		}
+	}
+
+	/*
+	 * Check for duplicate name (more friendly than unique-index failure).
+	 * Since this is just a friendliness check, we can just skip it in cases
+	 * where there isn't suitable support.
+	 */
+	if (classId == ProcedureRelationId)
+	{
+		Form_pg_proc proc = (Form_pg_proc) GETSTRUCT(oldtup);
+
+		IsThereFunctionInNamespace(new_name, proc->pronargs,
+								   &proc->proargtypes, proc->pronamespace);
+	}
+	else if (classId == CollationRelationId)
+	{
+		Form_pg_collation coll = (Form_pg_collation) GETSTRUCT(oldtup);
+
+		IsThereCollationInNamespace(new_name, coll->collnamespace);
+	}
+	else if (classId == OperatorClassRelationId)
+	{
+		Form_pg_opclass opc = (Form_pg_opclass) GETSTRUCT(oldtup);
+
+		IsThereOpClassInNamespace(new_name, opc->opcmethod,
+								  opc->opcnamespace);
+	}
+	else if (classId == OperatorFamilyRelationId)
+	{
+		Form_pg_opfamily opf = (Form_pg_opfamily) GETSTRUCT(oldtup);
+
+		IsThereOpFamilyInNamespace(new_name, opf->opfmethod,
+								   opf->opfnamespace);
+	}
+	else if (classId == SubscriptionRelationId)
+	{
+		if (SearchSysCacheExists2(SUBSCRIPTIONNAME, MyDatabaseId,
+								  CStringGetDatum(new_name)))
+			report_name_conflict(classId, new_name);
+
+		/* Also enforce regression testing naming rules, if enabled */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+		if (strncmp(new_name, "regress_", 8) != 0)
+			elog(WARNING, "subscriptions created by regression test cases should have names starting with \"regress_\"");
+#endif
+	}
+	else if (nameCacheId >= 0)
+	{
+		if (OidIsValid(namespaceId))
+		{
+			if (SearchSysCacheExists2(nameCacheId,
+									  CStringGetDatum(new_name),
+									  ObjectIdGetDatum(namespaceId)))
+				report_namespace_conflict(classId, new_name, namespaceId);
+		}
+		else
+		{
+			if (SearchSysCacheExists1(nameCacheId,
+									  CStringGetDatum(new_name)))
+				report_name_conflict(classId, new_name);
+		}
+	}
+
+	/* Build modified tuple */
+	values = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(Datum));
+	nulls = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(bool));
+	replaces = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(bool));
+	namestrcpy(&nameattrdata, new_name);
+	values[Anum_name - 1] = NameGetDatum(&nameattrdata);
+	replaces[Anum_name - 1] = true;
+	newtup = heap_modify_tuple(oldtup, RelationGetDescr(rel),
+							   values, nulls, replaces);
+
+	/* Perform actual update */
+	CatalogTupleUpdate(rel, &oldtup->t_self, newtup);
+
+	InvokeObjectPostAlterHook(classId, objectId, 0);
+
+	/* Release memory */
+	pfree(values);
+	pfree(nulls);
+	pfree(replaces);
+	heap_freetuple(newtup);
+
+	ReleaseSysCache(oldtup);
+}
+
+/*
+ * Executes an ALTER OBJECT / RENAME TO statement.  Based on the object
+ * type, the function appropriate to that type is executed.
+ *
+ * Return value is the address of the renamed object.
+ */
+ObjectAddress
+ExecRenameStmt(RenameStmt *stmt)
+{
+	switch (stmt->renameType)
+	{
+		case OBJECT_TABCONSTRAINT:
+		case OBJECT_DOMCONSTRAINT:
+			return RenameConstraint(stmt);
+
+		case OBJECT_DATABASE:
+			return RenameDatabase(stmt->subname, stmt->newname);
+
+		case OBJECT_ROLE:
+			return RenameRole(stmt->subname, stmt->newname);
+
+		case OBJECT_SCHEMA:
+			return RenameSchema(stmt->subname, stmt->newname);
+
+		case OBJECT_TABLESPACE:
+			return RenameTableSpace(stmt->subname, stmt->newname);
+
+		case OBJECT_TABLE:
+		case OBJECT_SEQUENCE:
+		case OBJECT_VIEW:
+		case OBJECT_MATVIEW:
+		case OBJECT_INDEX:
+		case OBJECT_FOREIGN_TABLE:
+			return RenameRelation(stmt);
+
+		case OBJECT_COLUMN:
+		case OBJECT_ATTRIBUTE:
+			return renameatt(stmt);
+
+		case OBJECT_RULE:
+			return RenameRewriteRule(stmt->relation, stmt->subname,
+									 stmt->newname);
+
+		case OBJECT_TRIGGER:
+			return renametrig(stmt);
+
+		case OBJECT_POLICY:
+			return rename_policy(stmt);
+
+		case OBJECT_DOMAIN:
+		case OBJECT_TYPE:
+			return RenameType(stmt);
+
+		case OBJECT_AGGREGATE:
+		case OBJECT_COLLATION:
+		case OBJECT_CONVERSION:
+		case OBJECT_EVENT_TRIGGER:
+		case OBJECT_FDW:
+		case OBJECT_FOREIGN_SERVER:
+		case OBJECT_FUNCTION:
+		case OBJECT_OPCLASS:
+		case OBJECT_OPFAMILY:
+		case OBJECT_LANGUAGE:
+		case OBJECT_PROCEDURE:
+		case OBJECT_ROUTINE:
+		case OBJECT_STATISTIC_EXT:
+		case OBJECT_TSCONFIGURATION:
+		case OBJECT_TSDICTIONARY:
+		case OBJECT_TSPARSER:
+		case OBJECT_TSTEMPLATE:
+		case OBJECT_PUBLICATION:
+		case OBJECT_SUBSCRIPTION:
+			{
+				ObjectAddress address;
+				Relation	catalog;
+				Relation	relation;
+
+				address = get_object_address(stmt->renameType,
+											 stmt->object,
+											 &relation,
+											 AccessExclusiveLock, false);
+				Assert(relation == NULL);
+
+				catalog = table_open(address.classId, RowExclusiveLock);
+				AlterObjectRename_internal(catalog,
+										   address.objectId,
+										   stmt->newname);
+				table_close(catalog, RowExclusiveLock);
+
+				return address;
+			}
+
+		default:
+			elog(ERROR, "unrecognized rename stmt type: %d",
+				 (int) stmt->renameType);
+			return InvalidObjectAddress;	/* keep compiler happy */
+	}
+}
+
+/*
+ * Executes an ALTER OBJECT / [NO] DEPENDS ON EXTENSION statement.
+ *
+ * Return value is the address of the altered object.  refAddress is an output
+ * argument which, if not null, receives the address of the object that the
+ * altered object now depends on.
+ */
+ObjectAddress
+ExecAlterObjectDependsStmt(AlterObjectDependsStmt *stmt, ObjectAddress *refAddress)
+{
+	ObjectAddress address;
+	ObjectAddress refAddr;
+	Relation	rel;
+
+	address =
+		get_object_address_rv(stmt->objectType, stmt->relation, (List *) stmt->object,
+							  &rel, AccessExclusiveLock, false);
+
+	/*
+	 * Verify that the user is entitled to run the command.
+	 *
+	 * We don't check any privileges on the extension, because that's not
+	 * needed.  The object owner is stipulating, by running this command, that
+	 * the extension owner can drop the object whenever they feel like it,
+	 * which is not considered a problem.
+	 */
+	check_object_ownership(GetUserId(),
+						   stmt->objectType, address, stmt->object, rel);
+
+	/*
+	 * If a relation was involved, it would have been opened and locked. We
+	 * don't need the relation here, but we'll retain the lock until commit.
+	 */
+	if (rel)
+		table_close(rel, NoLock);
+
+	refAddr = get_object_address(OBJECT_EXTENSION, (Node *) stmt->extname,
+								 &rel, AccessExclusiveLock, false);
+	Assert(rel == NULL);
+	if (refAddress)
+		*refAddress = refAddr;
+
+	if (stmt->remove)
+	{
+		deleteDependencyRecordsForSpecific(address.classId, address.objectId,
+										   DEPENDENCY_AUTO_EXTENSION,
+										   refAddr.classId, refAddr.objectId);
+	}
+	else
+	{
+		List	   *currexts;
+
+		/* Avoid duplicates */
+		currexts = getAutoExtensionsOfObject(address.classId,
+											 address.objectId);
+		if (!list_member_oid(currexts, refAddr.objectId))
+			recordDependencyOn(&address, &refAddr, DEPENDENCY_AUTO_EXTENSION);
+	}
+
+	return address;
+}
+
+/*
+ * Executes an ALTER OBJECT / SET SCHEMA statement.  Based on the object
+ * type, the function appropriate to that type is executed.
+ *
+ * Return value is that of the altered object.
+ *
+ * oldSchemaAddr is an output argument which, if not NULL, is set to the object
+ * address of the original schema.
+ */
+ObjectAddress
+ExecAlterObjectSchemaStmt(AlterObjectSchemaStmt *stmt,
+						  ObjectAddress *oldSchemaAddr)
+{
+	ObjectAddress address;
+	Oid			oldNspOid;
+
+	switch (stmt->objectType)
+	{
+		case OBJECT_EXTENSION:
+			address = AlterExtensionNamespace(strVal(stmt->object), stmt->newschema,
+											  oldSchemaAddr ? &oldNspOid : NULL);
+			break;
+
+		case OBJECT_FOREIGN_TABLE:
+		case OBJECT_SEQUENCE:
+		case OBJECT_TABLE:
+		case OBJECT_VIEW:
+		case OBJECT_MATVIEW:
+			address = AlterTableNamespace(stmt,
+										  oldSchemaAddr ? &oldNspOid : NULL);
+			break;
+
+		case OBJECT_DOMAIN:
+		case OBJECT_TYPE:
+			address = AlterTypeNamespace(castNode(List, stmt->object), stmt->newschema,
+										 stmt->objectType,
+										 oldSchemaAddr ? &oldNspOid : NULL);
+			break;
+
+			/* generic code path */
+		case OBJECT_AGGREGATE:
+		case OBJECT_COLLATION:
+		case OBJECT_CONVERSION:
+		case OBJECT_FUNCTION:
+		case OBJECT_OPERATOR:
+		case OBJECT_OPCLASS:
+		case OBJECT_OPFAMILY:
+		case OBJECT_PROCEDURE:
+		case OBJECT_ROUTINE:
+		case OBJECT_STATISTIC_EXT:
+		case OBJECT_TSCONFIGURATION:
+		case OBJECT_TSDICTIONARY:
+		case OBJECT_TSPARSER:
+		case OBJECT_TSTEMPLATE:
+			{
+				Relation	catalog;
+				Relation	relation;
+				Oid			classId;
+				Oid			nspOid;
+
+				address = get_object_address(stmt->objectType,
+											 stmt->object,
+											 &relation,
+											 AccessExclusiveLock,
+											 false);
+				Assert(relation == NULL);
+				classId = address.classId;
+				catalog = table_open(classId, RowExclusiveLock);
+				nspOid = LookupCreationNamespace(stmt->newschema);
+
+				oldNspOid = AlterObjectNamespace_internal(catalog, address.objectId,
+														  nspOid);
+				table_close(catalog, RowExclusiveLock);
+			}
+			break;
+
+		default:
+			elog(ERROR, "unrecognized AlterObjectSchemaStmt type: %d",
+				 (int) stmt->objectType);
+			return InvalidObjectAddress;	/* keep compiler happy */
+	}
+
+	if (oldSchemaAddr)
+		ObjectAddressSet(*oldSchemaAddr, NamespaceRelationId, oldNspOid);
+
+	return address;
+}
+
+/*
+ * Change an object's namespace given its classOid and object Oid.
+ *
+ * Objects that don't have a namespace should be ignored.
+ *
+ * This function is currently used only by ALTER EXTENSION SET SCHEMA,
+ * so it only needs to cover object types that can be members of an
+ * extension, and it doesn't have to deal with certain special cases
+ * such as not wanting to process array types --- those should never
+ * be direct members of an extension anyway.  Nonetheless, we insist
+ * on listing all OCLASS types in the switch.
+ *
+ * Returns the OID of the object's previous namespace, or InvalidOid if
+ * object doesn't have a schema.
+ */
+Oid
+AlterObjectNamespace_oid(Oid classId, Oid objid, Oid nspOid,
+						 ObjectAddresses *objsMoved)
+{
+	Oid			oldNspOid = InvalidOid;
+	ObjectAddress dep;
+
+	dep.classId = classId;
+	dep.objectId = objid;
+	dep.objectSubId = 0;
+
+	switch (getObjectClass(&dep))
+	{
+		case OCLASS_CLASS:
+			{
+				Relation	rel;
+
+				rel = relation_open(objid, AccessExclusiveLock);
+				oldNspOid = RelationGetNamespace(rel);
+
+				AlterTableNamespaceInternal(rel, oldNspOid, nspOid, objsMoved);
+
+				relation_close(rel, NoLock);
+				break;
+			}
+
+		case OCLASS_TYPE:
+			oldNspOid = AlterTypeNamespace_oid(objid, nspOid, objsMoved);
+			break;
+
+		case OCLASS_PROC:
+		case OCLASS_COLLATION:
+		case OCLASS_CONVERSION:
+		case OCLASS_OPERATOR:
+		case OCLASS_OPCLASS:
+		case OCLASS_OPFAMILY:
+		case OCLASS_STATISTIC_EXT:
+		case OCLASS_TSPARSER:
+		case OCLASS_TSDICT:
+		case OCLASS_TSTEMPLATE:
+		case OCLASS_TSCONFIG:
+			{
+				Relation	catalog;
+
+				catalog = table_open(classId, RowExclusiveLock);
+
+				oldNspOid = AlterObjectNamespace_internal(catalog, objid,
+														  nspOid);
+
+				table_close(catalog, RowExclusiveLock);
+			}
+			break;
+
+		case OCLASS_CAST:
+		case OCLASS_CONSTRAINT:
+		case OCLASS_DEFAULT:
+		case OCLASS_LANGUAGE:
+		case OCLASS_LARGEOBJECT:
+		case OCLASS_AM:
+		case OCLASS_AMOP:
+		case OCLASS_AMPROC:
+		case OCLASS_REWRITE:
+		case OCLASS_TRIGGER:
+		case OCLASS_SCHEMA:
+		case OCLASS_ROLE:
+		case OCLASS_DATABASE:
+		case OCLASS_TBLSPACE:
+		case OCLASS_FDW:
+		case OCLASS_FOREIGN_SERVER:
+		case OCLASS_USER_MAPPING:
+		case OCLASS_DEFACL:
+		case OCLASS_EXTENSION:
+		case OCLASS_EVENT_TRIGGER:
+		case OCLASS_PARAMETER_ACL:
+		case OCLASS_POLICY:
+		case OCLASS_PUBLICATION:
+		case OCLASS_PUBLICATION_NAMESPACE:
+		case OCLASS_PUBLICATION_REL:
+		case OCLASS_SUBSCRIPTION:
+		case OCLASS_TRANSFORM:
+			/* ignore object types that don't have schema-qualified names */
+			break;
+
+			/*
+			 * There's intentionally no default: case here; we want the
+			 * compiler to warn if a new OCLASS hasn't been handled above.
+			 */
+	}
+
+	return oldNspOid;
+}
+
+/*
+ * Generic function to change the namespace of a given object, for simple
+ * cases (won't work for tables, nor other cases where we need to do more
+ * than change the namespace column of a single catalog entry).
+ *
+ * rel: catalog relation containing object (RowExclusiveLock'd by caller)
+ * objid: OID of object to change the namespace of
+ * nspOid: OID of new namespace
+ *
+ * Returns the OID of the object's previous namespace.
+ */
+static Oid
+AlterObjectNamespace_internal(Relation rel, Oid objid, Oid nspOid)
+{
+	Oid			classId = RelationGetRelid(rel);
+	int			oidCacheId = get_object_catcache_oid(classId);
+	int			nameCacheId = get_object_catcache_name(classId);
+	AttrNumber	Anum_name = get_object_attnum_name(classId);
+	AttrNumber	Anum_namespace = get_object_attnum_namespace(classId);
+	AttrNumber	Anum_owner = get_object_attnum_owner(classId);
+	Oid			oldNspOid;
+	Datum		name,
+				namespace;
+	bool		isnull;
+	HeapTuple	tup,
+				newtup;
+	Datum	   *values;
+	bool	   *nulls;
+	bool	   *replaces;
+
+	tup = SearchSysCacheCopy1(oidCacheId, ObjectIdGetDatum(objid));
+	if (!HeapTupleIsValid(tup)) /* should not happen */
+		elog(ERROR, "cache lookup failed for object %u of catalog \"%s\"",
+			 objid, RelationGetRelationName(rel));
+
+	name = heap_getattr(tup, Anum_name, RelationGetDescr(rel), &isnull);
+	Assert(!isnull);
+	namespace = heap_getattr(tup, Anum_namespace, RelationGetDescr(rel),
+							 &isnull);
+	Assert(!isnull);
+	oldNspOid = DatumGetObjectId(namespace);
+
+	/*
+	 * If the object is already in the correct namespace, we don't need to do
+	 * anything except fire the object access hook.
+	 */
+	if (oldNspOid == nspOid)
+	{
+		InvokeObjectPostAlterHook(classId, objid, 0);
+		return oldNspOid;
+	}
+
+	/* Check basic namespace related issues */
+	CheckSetNamespace(oldNspOid, nspOid);
+
+	/* Permission checks ... superusers can always do it */
+	if (!superuser())
+	{
+		Datum		owner;
+		Oid			ownerId;
+		AclResult	aclresult;
+
+		/* Fail if object does not have an explicit owner */
+		if (Anum_owner <= 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to set schema of %s",
+							getObjectDescriptionOids(classId, objid))));
+
+		/* Otherwise, must be owner of the existing object */
+		owner = heap_getattr(tup, Anum_owner, RelationGetDescr(rel), &isnull);
+		Assert(!isnull);
+		ownerId = DatumGetObjectId(owner);
+
+		if (!has_privs_of_role(GetUserId(), ownerId))
+			aclcheck_error(ACLCHECK_NOT_OWNER, get_object_type(classId, objid),
+						   NameStr(*(DatumGetName(name))));
+
+		/* User must have CREATE privilege on new namespace */
+		aclresult = pg_namespace_aclcheck(nspOid, GetUserId(), ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_SCHEMA,
+						   get_namespace_name(nspOid));
+	}
+
+	/*
+	 * Check for duplicate name (more friendly than unique-index failure).
+	 * Since this is just a friendliness check, we can just skip it in cases
+	 * where there isn't suitable support.
+	 */
+	if (classId == ProcedureRelationId)
+	{
+		Form_pg_proc proc = (Form_pg_proc) GETSTRUCT(tup);
+
+		IsThereFunctionInNamespace(NameStr(proc->proname), proc->pronargs,
+								   &proc->proargtypes, nspOid);
+	}
+	else if (classId == CollationRelationId)
+	{
+		Form_pg_collation coll = (Form_pg_collation) GETSTRUCT(tup);
+
+		IsThereCollationInNamespace(NameStr(coll->collname), nspOid);
+	}
+	else if (classId == OperatorClassRelationId)
+	{
+		Form_pg_opclass opc = (Form_pg_opclass) GETSTRUCT(tup);
+
+		IsThereOpClassInNamespace(NameStr(opc->opcname),
+								  opc->opcmethod, nspOid);
+	}
+	else if (classId == OperatorFamilyRelationId)
+	{
+		Form_pg_opfamily opf = (Form_pg_opfamily) GETSTRUCT(tup);
+
+		IsThereOpFamilyInNamespace(NameStr(opf->opfname),
+								   opf->opfmethod, nspOid);
+	}
+	else if (nameCacheId >= 0 &&
+			 SearchSysCacheExists2(nameCacheId, name,
+								   ObjectIdGetDatum(nspOid)))
+		report_namespace_conflict(classId,
+								  NameStr(*(DatumGetName(name))),
+								  nspOid);
+
+	/* Build modified tuple */
+	values = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(Datum));
+	nulls = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(bool));
+	replaces = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(bool));
+	values[Anum_namespace - 1] = ObjectIdGetDatum(nspOid);
+	replaces[Anum_namespace - 1] = true;
+	newtup = heap_modify_tuple(tup, RelationGetDescr(rel),
+							   values, nulls, replaces);
+
+	/* Perform actual update */
+	CatalogTupleUpdate(rel, &tup->t_self, newtup);
+
+	/* Release memory */
+	pfree(values);
+	pfree(nulls);
+	pfree(replaces);
+
+	/* update dependencies to point to the new schema */
+	changeDependencyFor(classId, objid,
+						NamespaceRelationId, oldNspOid, nspOid);
+
+	InvokeObjectPostAlterHook(classId, objid, 0);
+
+	return oldNspOid;
+}
+
+/*
+ * Executes an ALTER OBJECT / OWNER TO statement.  Based on the object
+ * type, the function appropriate to that type is executed.
+ */
+ObjectAddress
+ExecAlterOwnerStmt(AlterOwnerStmt *stmt)
+{
+	Oid			newowner = get_rolespec_oid(stmt->newowner, false);
+
+	switch (stmt->objectType)
+	{
+		case OBJECT_DATABASE:
+			return AlterDatabaseOwner(strVal(stmt->object), newowner);
+
+		case OBJECT_SCHEMA:
+			return AlterSchemaOwner(strVal(stmt->object), newowner);
+
+		case OBJECT_TYPE:
+		case OBJECT_DOMAIN:		/* same as TYPE */
+			return AlterTypeOwner(castNode(List, stmt->object), newowner, stmt->objectType);
+			break;
+
+		case OBJECT_FDW:
+			return AlterForeignDataWrapperOwner(strVal(stmt->object),
+												newowner);
+
+		case OBJECT_FOREIGN_SERVER:
+			return AlterForeignServerOwner(strVal(stmt->object),
+										   newowner);
+
+		case OBJECT_EVENT_TRIGGER:
+			return AlterEventTriggerOwner(strVal(stmt->object),
+										  newowner);
+
+		case OBJECT_PUBLICATION:
+			return AlterPublicationOwner(strVal(stmt->object),
+										 newowner);
+
+		case OBJECT_SUBSCRIPTION:
+			return AlterSubscriptionOwner(strVal(stmt->object),
+										  newowner);
+
+			/* Generic cases */
+		case OBJECT_AGGREGATE:
+		case OBJECT_COLLATION:
+		case OBJECT_CONVERSION:
+		case OBJECT_FUNCTION:
+		case OBJECT_LANGUAGE:
+		case OBJECT_LARGEOBJECT:
+		case OBJECT_OPERATOR:
+		case OBJECT_OPCLASS:
+		case OBJECT_OPFAMILY:
+		case OBJECT_PROCEDURE:
+		case OBJECT_ROUTINE:
+		case OBJECT_STATISTIC_EXT:
+		case OBJECT_TABLESPACE:
+		case OBJECT_TSDICTIONARY:
+		case OBJECT_TSCONFIGURATION:
+			{
+				Relation	catalog;
+				Relation	relation;
+				Oid			classId;
+				ObjectAddress address;
+
+				address = get_object_address(stmt->objectType,
+											 stmt->object,
+											 &relation,
+											 AccessExclusiveLock,
+											 false);
+				Assert(relation == NULL);
+				classId = address.classId;
+
+				/*
+				 * XXX - get_object_address returns Oid of pg_largeobject
+				 * catalog for OBJECT_LARGEOBJECT because of historical
+				 * reasons.  Fix up it here.
+				 */
+				if (classId == LargeObjectRelationId)
+					classId = LargeObjectMetadataRelationId;
+
+				catalog = table_open(classId, RowExclusiveLock);
+
+				AlterObjectOwner_internal(catalog, address.objectId, newowner);
+				table_close(catalog, RowExclusiveLock);
+
+				return address;
+			}
+			break;
+
+		default:
+			elog(ERROR, "unrecognized AlterOwnerStmt type: %d",
+				 (int) stmt->objectType);
+			return InvalidObjectAddress;	/* keep compiler happy */
+	}
+}
+
+/*
+ * Generic function to change the ownership of a given object, for simple
+ * cases (won't work for tables, nor other cases where we need to do more than
+ * change the ownership column of a single catalog entry).
+ *
+ * rel: catalog relation containing object (RowExclusiveLock'd by caller)
+ * objectId: OID of object to change the ownership of
+ * new_ownerId: OID of new object owner
+ */
+void
+AlterObjectOwner_internal(Relation rel, Oid objectId, Oid new_ownerId)
+{
+	Oid			classId = RelationGetRelid(rel);
+	AttrNumber	Anum_oid = get_object_attnum_oid(classId);
+	AttrNumber	Anum_owner = get_object_attnum_owner(classId);
+	AttrNumber	Anum_namespace = get_object_attnum_namespace(classId);
+	AttrNumber	Anum_acl = get_object_attnum_acl(classId);
+	AttrNumber	Anum_name = get_object_attnum_name(classId);
+	HeapTuple	oldtup;
+	Datum		datum;
+	bool		isnull;
+	Oid			old_ownerId;
+	Oid			namespaceId = InvalidOid;
+
+	oldtup = get_catalog_object_by_oid(rel, Anum_oid, objectId);
+	if (oldtup == NULL)
+		elog(ERROR, "cache lookup failed for object %u of catalog \"%s\"",
+			 objectId, RelationGetRelationName(rel));
+
+	datum = heap_getattr(oldtup, Anum_owner,
+						 RelationGetDescr(rel), &isnull);
+	Assert(!isnull);
+	old_ownerId = DatumGetObjectId(datum);
+
+	if (Anum_namespace != InvalidAttrNumber)
+	{
+		datum = heap_getattr(oldtup, Anum_namespace,
+							 RelationGetDescr(rel), &isnull);
+		Assert(!isnull);
+		namespaceId = DatumGetObjectId(datum);
+	}
+
+	if (old_ownerId != new_ownerId)
+	{
+		AttrNumber	nattrs;
+		HeapTuple	newtup;
+		Datum	   *values;
+		bool	   *nulls;
+		bool	   *replaces;
+
+		/* Superusers can bypass permission checks */
+		if (!superuser())
+		{
+			/* must be owner */
+			if (!has_privs_of_role(GetUserId(), old_ownerId))
+			{
+				char	   *objname;
+				char		namebuf[NAMEDATALEN];
+
+				if (Anum_name != InvalidAttrNumber)
+				{
+					datum = heap_getattr(oldtup, Anum_name,
+										 RelationGetDescr(rel), &isnull);
+					Assert(!isnull);
+					objname = NameStr(*DatumGetName(datum));
+				}
+				else
+				{
+					snprintf(namebuf, sizeof(namebuf), "%u", objectId);
+					objname = namebuf;
+				}
+				aclcheck_error(ACLCHECK_NOT_OWNER, get_object_type(classId, objectId),
+							   objname);
+			}
+			/* Must be able to become new owner */
+			check_is_member_of_role(GetUserId(), new_ownerId);
+
+			/* New owner must have CREATE privilege on namespace */
+			if (OidIsValid(namespaceId))
+			{
+				AclResult	aclresult;
+
+				aclresult = pg_namespace_aclcheck(namespaceId, new_ownerId,
+												  ACL_CREATE);
+				if (aclresult != ACLCHECK_OK)
+					aclcheck_error(aclresult, OBJECT_SCHEMA,
+								   get_namespace_name(namespaceId));
+			}
+		}
+
+		/* Build a modified tuple */
+		nattrs = RelationGetNumberOfAttributes(rel);
+		values = palloc0(nattrs * sizeof(Datum));
+		nulls = palloc0(nattrs * sizeof(bool));
+		replaces = palloc0(nattrs * sizeof(bool));
+		values[Anum_owner - 1] = ObjectIdGetDatum(new_ownerId);
+		replaces[Anum_owner - 1] = true;
+
+		/*
+		 * Determine the modified ACL for the new owner.  This is only
+		 * necessary when the ACL is non-null.
+		 */
+		if (Anum_acl != InvalidAttrNumber)
+		{
+			datum = heap_getattr(oldtup,
+								 Anum_acl, RelationGetDescr(rel), &isnull);
+			if (!isnull)
+			{
+				Acl		   *newAcl;
+
+				newAcl = aclnewowner(DatumGetAclP(datum),
+									 old_ownerId, new_ownerId);
+				values[Anum_acl - 1] = PointerGetDatum(newAcl);
+				replaces[Anum_acl - 1] = true;
+			}
+		}
+
+		newtup = heap_modify_tuple(oldtup, RelationGetDescr(rel),
+								   values, nulls, replaces);
+
+		/* Perform actual update */
+		CatalogTupleUpdate(rel, &newtup->t_self, newtup);
+
+		/* Update owner dependency reference */
+		if (classId == LargeObjectMetadataRelationId)
+			classId = LargeObjectRelationId;
+		changeDependencyOnOwner(classId, objectId, new_ownerId);
+
+		/* Release memory */
+		pfree(values);
+		pfree(nulls);
+		pfree(replaces);
+	}
+
+	InvokeObjectPostAlterHook(classId, objectId, 0);
+}
diff --git a/src/backend/commands/amcmds.c b/src/backend/commands/amcmds.c
new file mode 100644
index 0000000..914cfa4
--- /dev/null
+++ b/src/backend/commands/amcmds.c
@@ -0,0 +1,269 @@
+/*-------------------------------------------------------------------------
+ *
+ * amcmds.c
+ *	  Routines for SQL commands that manipulate access methods.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/amcmds.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_type.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+static Oid	lookup_am_handler_func(List *handler_name, char amtype);
+static const char *get_am_type_string(char amtype);
+
+
+/*
+ * CreateAccessMethod
+ *		Registers a new access method.
+ */
+ObjectAddress
+CreateAccessMethod(CreateAmStmt *stmt)
+{
+	Relation	rel;
+	ObjectAddress myself;
+	ObjectAddress referenced;
+	Oid			amoid;
+	Oid			amhandler;
+	bool		nulls[Natts_pg_am];
+	Datum		values[Natts_pg_am];
+	HeapTuple	tup;
+
+	rel = table_open(AccessMethodRelationId, RowExclusiveLock);
+
+	/* Must be superuser */
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied to create access method \"%s\"",
+						stmt->amname),
+				 errhint("Must be superuser to create an access method.")));
+
+	/* Check if name is used */
+	amoid = GetSysCacheOid1(AMNAME, Anum_pg_am_oid,
+							CStringGetDatum(stmt->amname));
+	if (OidIsValid(amoid))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("access method \"%s\" already exists",
+						stmt->amname)));
+	}
+
+	/*
+	 * Get the handler function oid, verifying the AM type while at it.
+	 */
+	amhandler = lookup_am_handler_func(stmt->handler_name, stmt->amtype);
+
+	/*
+	 * Insert tuple into pg_am.
+	 */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	amoid = GetNewOidWithIndex(rel, AmOidIndexId, Anum_pg_am_oid);
+	values[Anum_pg_am_oid - 1] = ObjectIdGetDatum(amoid);
+	values[Anum_pg_am_amname - 1] =
+		DirectFunctionCall1(namein, CStringGetDatum(stmt->amname));
+	values[Anum_pg_am_amhandler - 1] = ObjectIdGetDatum(amhandler);
+	values[Anum_pg_am_amtype - 1] = CharGetDatum(stmt->amtype);
+
+	tup = heap_form_tuple(RelationGetDescr(rel), values, nulls);
+
+	CatalogTupleInsert(rel, tup);
+	heap_freetuple(tup);
+
+	myself.classId = AccessMethodRelationId;
+	myself.objectId = amoid;
+	myself.objectSubId = 0;
+
+	/* Record dependency on handler function */
+	referenced.classId = ProcedureRelationId;
+	referenced.objectId = amhandler;
+	referenced.objectSubId = 0;
+
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+	recordDependencyOnCurrentExtension(&myself, false);
+
+	InvokeObjectPostCreateHook(AccessMethodRelationId, amoid, 0);
+
+	table_close(rel, RowExclusiveLock);
+
+	return myself;
+}
+
+/*
+ * get_am_type_oid
+ *		Worker for various get_am_*_oid variants
+ *
+ * If missing_ok is false, throw an error if access method not found.  If
+ * true, just return InvalidOid.
+ *
+ * If amtype is not '\0', an error is raised if the AM found is not of the
+ * given type.
+ */
+static Oid
+get_am_type_oid(const char *amname, char amtype, bool missing_ok)
+{
+	HeapTuple	tup;
+	Oid			oid = InvalidOid;
+
+	tup = SearchSysCache1(AMNAME, CStringGetDatum(amname));
+	if (HeapTupleIsValid(tup))
+	{
+		Form_pg_am	amform = (Form_pg_am) GETSTRUCT(tup);
+
+		if (amtype != '\0' &&
+			amform->amtype != amtype)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("access method \"%s\" is not of type %s",
+							NameStr(amform->amname),
+							get_am_type_string(amtype))));
+
+		oid = amform->oid;
+		ReleaseSysCache(tup);
+	}
+
+	if (!OidIsValid(oid) && !missing_ok)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("access method \"%s\" does not exist", amname)));
+	return oid;
+}
+
+/*
+ * get_index_am_oid - given an access method name, look up its OID
+ *		and verify it corresponds to an index AM.
+ */
+Oid
+get_index_am_oid(const char *amname, bool missing_ok)
+{
+	return get_am_type_oid(amname, AMTYPE_INDEX, missing_ok);
+}
+
+/*
+ * get_table_am_oid - given an access method name, look up its OID
+ *		and verify it corresponds to an table AM.
+ */
+Oid
+get_table_am_oid(const char *amname, bool missing_ok)
+{
+	return get_am_type_oid(amname, AMTYPE_TABLE, missing_ok);
+}
+
+/*
+ * get_am_oid - given an access method name, look up its OID.
+ *		The type is not checked.
+ */
+Oid
+get_am_oid(const char *amname, bool missing_ok)
+{
+	return get_am_type_oid(amname, '\0', missing_ok);
+}
+
+/*
+ * get_am_name - given an access method OID, look up its name.
+ */
+char *
+get_am_name(Oid amOid)
+{
+	HeapTuple	tup;
+	char	   *result = NULL;
+
+	tup = SearchSysCache1(AMOID, ObjectIdGetDatum(amOid));
+	if (HeapTupleIsValid(tup))
+	{
+		Form_pg_am	amform = (Form_pg_am) GETSTRUCT(tup);
+
+		result = pstrdup(NameStr(amform->amname));
+		ReleaseSysCache(tup);
+	}
+	return result;
+}
+
+/*
+ * Convert single-character access method type into string for error reporting.
+ */
+static const char *
+get_am_type_string(char amtype)
+{
+	switch (amtype)
+	{
+		case AMTYPE_INDEX:
+			return "INDEX";
+		case AMTYPE_TABLE:
+			return "TABLE";
+		default:
+			/* shouldn't happen */
+			elog(ERROR, "invalid access method type '%c'", amtype);
+			return NULL;		/* keep compiler quiet */
+	}
+}
+
+/*
+ * Convert a handler function name to an Oid.  If the return type of the
+ * function doesn't match the given AM type, an error is raised.
+ *
+ * This function either return valid function Oid or throw an error.
+ */
+static Oid
+lookup_am_handler_func(List *handler_name, char amtype)
+{
+	Oid			handlerOid;
+	Oid			funcargtypes[1] = {INTERNALOID};
+	Oid			expectedType = InvalidOid;
+
+	if (handler_name == NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_FUNCTION),
+				 errmsg("handler function is not specified")));
+
+	/* handlers have one argument of type internal */
+	handlerOid = LookupFuncName(handler_name, 1, funcargtypes, false);
+
+	/* check that handler has the correct return type */
+	switch (amtype)
+	{
+		case AMTYPE_INDEX:
+			expectedType = INDEX_AM_HANDLEROID;
+			break;
+		case AMTYPE_TABLE:
+			expectedType = TABLE_AM_HANDLEROID;
+			break;
+		default:
+			elog(ERROR, "unrecognized access method type \"%c\"", amtype);
+	}
+
+	if (get_func_rettype(handlerOid) != expectedType)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("function %s must return type %s",
+						get_func_name(handlerOid),
+						format_type_extended(expectedType, -1, 0))));
+
+	return handlerOid;
+}
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
new file mode 100644
index 0000000..01efdd5
--- /dev/null
+++ b/src/backend/commands/analyze.c
@@ -0,0 +1,3076 @@
+/*-------------------------------------------------------------------------
+ *
+ * analyze.c
+ *	  the Postgres statistics generator
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/analyze.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/detoast.h"
+#include "access/genam.h"
+#include "access/multixact.h"
+#include "access/relation.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "access/tableam.h"
+#include "access/transam.h"
+#include "access/tupconvert.h"
+#include "access/visibilitymap.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/index.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_statistic_ext.h"
+#include "commands/dbcommands.h"
+#include "commands/progress.h"
+#include "commands/tablecmds.h"
+#include "commands/vacuum.h"
+#include "common/pg_prng.h"
+#include "executor/executor.h"
+#include "foreign/fdwapi.h"
+#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/parse_oper.h"
+#include "parser/parse_relation.h"
+#include "pgstat.h"
+#include "postmaster/autovacuum.h"
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "utils/acl.h"
+#include "utils/attoptcache.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/pg_rusage.h"
+#include "utils/sampling.h"
+#include "utils/sortsupport.h"
+#include "utils/spccache.h"
+#include "utils/syscache.h"
+#include "utils/timestamp.h"
+
+
+/* Per-index data for ANALYZE */
+typedef struct AnlIndexData
+{
+	IndexInfo  *indexInfo;		/* BuildIndexInfo result */
+	double		tupleFract;		/* fraction of rows for partial index */
+	VacAttrStats **vacattrstats;	/* index attrs to analyze */
+	int			attr_cnt;
+} AnlIndexData;
+
+
+/* Default statistics target (GUC parameter) */
+int			default_statistics_target = 100;
+
+/* A few variables that don't seem worth passing around as parameters */
+static MemoryContext anl_context = NULL;
+static BufferAccessStrategy vac_strategy;
+
+
+static void do_analyze_rel(Relation onerel,
+						   VacuumParams *params, List *va_cols,
+						   AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
+						   bool inh, bool in_outer_xact, int elevel);
+static void compute_index_stats(Relation onerel, double totalrows,
+								AnlIndexData *indexdata, int nindexes,
+								HeapTuple *rows, int numrows,
+								MemoryContext col_context);
+static VacAttrStats *examine_attribute(Relation onerel, int attnum,
+									   Node *index_expr);
+static int	acquire_sample_rows(Relation onerel, int elevel,
+								HeapTuple *rows, int targrows,
+								double *totalrows, double *totaldeadrows);
+static int	compare_rows(const void *a, const void *b, void *arg);
+static int	acquire_inherited_sample_rows(Relation onerel, int elevel,
+										  HeapTuple *rows, int targrows,
+										  double *totalrows, double *totaldeadrows);
+static void update_attstats(Oid relid, bool inh,
+							int natts, VacAttrStats **vacattrstats);
+static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
+static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
+
+
+/*
+ *	analyze_rel() -- analyze one relation
+ *
+ * relid identifies the relation to analyze.  If relation is supplied, use
+ * the name therein for reporting any failure to open/lock the rel; do not
+ * use it once we've successfully opened the rel, since it might be stale.
+ */
+void
+analyze_rel(Oid relid, RangeVar *relation,
+			VacuumParams *params, List *va_cols, bool in_outer_xact,
+			BufferAccessStrategy bstrategy)
+{
+	Relation	onerel;
+	int			elevel;
+	AcquireSampleRowsFunc acquirefunc = NULL;
+	BlockNumber relpages = 0;
+
+	/* Select logging level */
+	if (params->options & VACOPT_VERBOSE)
+		elevel = INFO;
+	else
+		elevel = DEBUG2;
+
+	/* Set up static variables */
+	vac_strategy = bstrategy;
+
+	/*
+	 * Check for user-requested abort.
+	 */
+	CHECK_FOR_INTERRUPTS();
+
+	/*
+	 * Open the relation, getting ShareUpdateExclusiveLock to ensure that two
+	 * ANALYZEs don't run on it concurrently.  (This also locks out a
+	 * concurrent VACUUM, which doesn't matter much at the moment but might
+	 * matter if we ever try to accumulate stats on dead tuples.) If the rel
+	 * has been dropped since we last saw it, we don't need to process it.
+	 *
+	 * Make sure to generate only logs for ANALYZE in this case.
+	 */
+	onerel = vacuum_open_relation(relid, relation, params->options & ~(VACOPT_VACUUM),
+								  params->log_min_duration >= 0,
+								  ShareUpdateExclusiveLock);
+
+	/* leave if relation could not be opened or locked */
+	if (!onerel)
+		return;
+
+	/*
+	 * Check if relation needs to be skipped based on ownership.  This check
+	 * happens also when building the relation list to analyze for a manual
+	 * operation, and needs to be done additionally here as ANALYZE could
+	 * happen across multiple transactions where relation ownership could have
+	 * changed in-between.  Make sure to generate only logs for ANALYZE in
+	 * this case.
+	 */
+	if (!vacuum_is_relation_owner(RelationGetRelid(onerel),
+								  onerel->rd_rel,
+								  params->options & VACOPT_ANALYZE))
+	{
+		relation_close(onerel, ShareUpdateExclusiveLock);
+		return;
+	}
+
+	/*
+	 * Silently ignore tables that are temp tables of other backends ---
+	 * trying to analyze these is rather pointless, since their contents are
+	 * probably not up-to-date on disk.  (We don't throw a warning here; it
+	 * would just lead to chatter during a database-wide ANALYZE.)
+	 */
+	if (RELATION_IS_OTHER_TEMP(onerel))
+	{
+		relation_close(onerel, ShareUpdateExclusiveLock);
+		return;
+	}
+
+	/*
+	 * We can ANALYZE any table except pg_statistic. See update_attstats
+	 */
+	if (RelationGetRelid(onerel) == StatisticRelationId)
+	{
+		relation_close(onerel, ShareUpdateExclusiveLock);
+		return;
+	}
+
+	/*
+	 * Check that it's of an analyzable relkind, and set up appropriately.
+	 */
+	if (onerel->rd_rel->relkind == RELKIND_RELATION ||
+		onerel->rd_rel->relkind == RELKIND_MATVIEW)
+	{
+		/* Regular table, so we'll use the regular row acquisition function */
+		acquirefunc = acquire_sample_rows;
+		/* Also get regular table's size */
+		relpages = RelationGetNumberOfBlocks(onerel);
+	}
+	else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		/*
+		 * For a foreign table, call the FDW's hook function to see whether it
+		 * supports analysis.
+		 */
+		FdwRoutine *fdwroutine;
+		bool		ok = false;
+
+		fdwroutine = GetFdwRoutineForRelation(onerel, false);
+
+		if (fdwroutine->AnalyzeForeignTable != NULL)
+			ok = fdwroutine->AnalyzeForeignTable(onerel,
+												 &acquirefunc,
+												 &relpages);
+
+		if (!ok)
+		{
+			ereport(WARNING,
+					(errmsg("skipping \"%s\" --- cannot analyze this foreign table",
+							RelationGetRelationName(onerel))));
+			relation_close(onerel, ShareUpdateExclusiveLock);
+			return;
+		}
+	}
+	else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		/*
+		 * For partitioned tables, we want to do the recursive ANALYZE below.
+		 */
+	}
+	else
+	{
+		/* No need for a WARNING if we already complained during VACUUM */
+		if (!(params->options & VACOPT_VACUUM))
+			ereport(WARNING,
+					(errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables",
+							RelationGetRelationName(onerel))));
+		relation_close(onerel, ShareUpdateExclusiveLock);
+		return;
+	}
+
+	/*
+	 * OK, let's do it.  First, initialize progress reporting.
+	 */
+	pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE,
+								  RelationGetRelid(onerel));
+
+	/*
+	 * Do the normal non-recursive ANALYZE.  We can skip this for partitioned
+	 * tables, which don't contain any rows.
+	 */
+	if (onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		do_analyze_rel(onerel, params, va_cols, acquirefunc,
+					   relpages, false, in_outer_xact, elevel);
+
+	/*
+	 * If there are child tables, do recursive ANALYZE.
+	 */
+	if (onerel->rd_rel->relhassubclass)
+		do_analyze_rel(onerel, params, va_cols, acquirefunc, relpages,
+					   true, in_outer_xact, elevel);
+
+	/*
+	 * Close source relation now, but keep lock so that no one deletes it
+	 * before we commit.  (If someone did, they'd fail to clean up the entries
+	 * we made in pg_statistic.  Also, releasing the lock before commit would
+	 * expose us to concurrent-update failures in update_attstats.)
+	 */
+	relation_close(onerel, NoLock);
+
+	pgstat_progress_end_command();
+}
+
+/*
+ *	do_analyze_rel() -- analyze one relation, recursively or not
+ *
+ * Note that "acquirefunc" is only relevant for the non-inherited case.
+ * For the inherited case, acquire_inherited_sample_rows() determines the
+ * appropriate acquirefunc for each child table.
+ */
+static void
+do_analyze_rel(Relation onerel, VacuumParams *params,
+			   List *va_cols, AcquireSampleRowsFunc acquirefunc,
+			   BlockNumber relpages, bool inh, bool in_outer_xact,
+			   int elevel)
+{
+	int			attr_cnt,
+				tcnt,
+				i,
+				ind;
+	Relation   *Irel;
+	int			nindexes;
+	bool		hasindex;
+	VacAttrStats **vacattrstats;
+	AnlIndexData *indexdata;
+	int			targrows,
+				numrows,
+				minrows;
+	double		totalrows,
+				totaldeadrows;
+	HeapTuple  *rows;
+	PGRUsage	ru0;
+	TimestampTz starttime = 0;
+	MemoryContext caller_context;
+	Oid			save_userid;
+	int			save_sec_context;
+	int			save_nestlevel;
+	int64		AnalyzePageHit = VacuumPageHit;
+	int64		AnalyzePageMiss = VacuumPageMiss;
+	int64		AnalyzePageDirty = VacuumPageDirty;
+	PgStat_Counter startreadtime = 0;
+	PgStat_Counter startwritetime = 0;
+
+	if (inh)
+		ereport(elevel,
+				(errmsg("analyzing \"%s.%s\" inheritance tree",
+						get_namespace_name(RelationGetNamespace(onerel)),
+						RelationGetRelationName(onerel))));
+	else
+		ereport(elevel,
+				(errmsg("analyzing \"%s.%s\"",
+						get_namespace_name(RelationGetNamespace(onerel)),
+						RelationGetRelationName(onerel))));
+
+	/*
+	 * Set up a working context so that we can easily free whatever junk gets
+	 * created.
+	 */
+	anl_context = AllocSetContextCreate(CurrentMemoryContext,
+										"Analyze",
+										ALLOCSET_DEFAULT_SIZES);
+	caller_context = MemoryContextSwitchTo(anl_context);
+
+	/*
+	 * Switch to the table owner's userid, so that any index functions are run
+	 * as that user.  Also lock down security-restricted operations and
+	 * arrange to make GUC variable changes local to this command.
+	 */
+	GetUserIdAndSecContext(&save_userid, &save_sec_context);
+	SetUserIdAndSecContext(onerel->rd_rel->relowner,
+						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
+	save_nestlevel = NewGUCNestLevel();
+
+	/* measure elapsed time iff autovacuum logging requires it */
+	if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
+	{
+		if (track_io_timing)
+		{
+			startreadtime = pgStatBlockReadTime;
+			startwritetime = pgStatBlockWriteTime;
+		}
+
+		pg_rusage_init(&ru0);
+		if (params->log_min_duration >= 0)
+			starttime = GetCurrentTimestamp();
+	}
+
+	/*
+	 * Determine which columns to analyze
+	 *
+	 * Note that system attributes are never analyzed, so we just reject them
+	 * at the lookup stage.  We also reject duplicate column mentions.  (We
+	 * could alternatively ignore duplicates, but analyzing a column twice
+	 * won't work; we'd end up making a conflicting update in pg_statistic.)
+	 */
+	if (va_cols != NIL)
+	{
+		Bitmapset  *unique_cols = NULL;
+		ListCell   *le;
+
+		vacattrstats = (VacAttrStats **) palloc(list_length(va_cols) *
+												sizeof(VacAttrStats *));
+		tcnt = 0;
+		foreach(le, va_cols)
+		{
+			char	   *col = strVal(lfirst(le));
+
+			i = attnameAttNum(onerel, col, false);
+			if (i == InvalidAttrNumber)
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_COLUMN),
+						 errmsg("column \"%s\" of relation \"%s\" does not exist",
+								col, RelationGetRelationName(onerel))));
+			if (bms_is_member(i, unique_cols))
+				ereport(ERROR,
+						(errcode(ERRCODE_DUPLICATE_COLUMN),
+						 errmsg("column \"%s\" of relation \"%s\" appears more than once",
+								col, RelationGetRelationName(onerel))));
+			unique_cols = bms_add_member(unique_cols, i);
+
+			vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
+			if (vacattrstats[tcnt] != NULL)
+				tcnt++;
+		}
+		attr_cnt = tcnt;
+	}
+	else
+	{
+		attr_cnt = onerel->rd_att->natts;
+		vacattrstats = (VacAttrStats **)
+			palloc(attr_cnt * sizeof(VacAttrStats *));
+		tcnt = 0;
+		for (i = 1; i <= attr_cnt; i++)
+		{
+			vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
+			if (vacattrstats[tcnt] != NULL)
+				tcnt++;
+		}
+		attr_cnt = tcnt;
+	}
+
+	/*
+	 * Open all indexes of the relation, and see if there are any analyzable
+	 * columns in the indexes.  We do not analyze index columns if there was
+	 * an explicit column list in the ANALYZE command, however.
+	 *
+	 * If we are doing a recursive scan, we don't want to touch the parent's
+	 * indexes at all.  If we're processing a partitioned table, we need to
+	 * know if there are any indexes, but we don't want to process them.
+	 */
+	if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		List	   *idxs = RelationGetIndexList(onerel);
+
+		Irel = NULL;
+		nindexes = 0;
+		hasindex = idxs != NIL;
+		list_free(idxs);
+	}
+	else if (!inh)
+	{
+		vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);
+		hasindex = nindexes > 0;
+	}
+	else
+	{
+		Irel = NULL;
+		nindexes = 0;
+		hasindex = false;
+	}
+	indexdata = NULL;
+	if (nindexes > 0)
+	{
+		indexdata = (AnlIndexData *) palloc0(nindexes * sizeof(AnlIndexData));
+		for (ind = 0; ind < nindexes; ind++)
+		{
+			AnlIndexData *thisdata = &indexdata[ind];
+			IndexInfo  *indexInfo;
+
+			thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]);
+			thisdata->tupleFract = 1.0; /* fix later if partial */
+			if (indexInfo->ii_Expressions != NIL && va_cols == NIL)
+			{
+				ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);
+
+				thisdata->vacattrstats = (VacAttrStats **)
+					palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *));
+				tcnt = 0;
+				for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
+				{
+					int			keycol = indexInfo->ii_IndexAttrNumbers[i];
+
+					if (keycol == 0)
+					{
+						/* Found an index expression */
+						Node	   *indexkey;
+
+						if (indexpr_item == NULL)	/* shouldn't happen */
+							elog(ERROR, "too few entries in indexprs list");
+						indexkey = (Node *) lfirst(indexpr_item);
+						indexpr_item = lnext(indexInfo->ii_Expressions,
+											 indexpr_item);
+						thisdata->vacattrstats[tcnt] =
+							examine_attribute(Irel[ind], i + 1, indexkey);
+						if (thisdata->vacattrstats[tcnt] != NULL)
+							tcnt++;
+					}
+				}
+				thisdata->attr_cnt = tcnt;
+			}
+		}
+	}
+
+	/*
+	 * Determine how many rows we need to sample, using the worst case from
+	 * all analyzable columns.  We use a lower bound of 100 rows to avoid
+	 * possible overflow in Vitter's algorithm.  (Note: that will also be the
+	 * target in the corner case where there are no analyzable columns.)
+	 */
+	targrows = 100;
+	for (i = 0; i < attr_cnt; i++)
+	{
+		if (targrows < vacattrstats[i]->minrows)
+			targrows = vacattrstats[i]->minrows;
+	}
+	for (ind = 0; ind < nindexes; ind++)
+	{
+		AnlIndexData *thisdata = &indexdata[ind];
+
+		for (i = 0; i < thisdata->attr_cnt; i++)
+		{
+			if (targrows < thisdata->vacattrstats[i]->minrows)
+				targrows = thisdata->vacattrstats[i]->minrows;
+		}
+	}
+
+	/*
+	 * Look at extended statistics objects too, as those may define custom
+	 * statistics target. So we may need to sample more rows and then build
+	 * the statistics with enough detail.
+	 */
+	minrows = ComputeExtStatisticsRows(onerel, attr_cnt, vacattrstats);
+
+	if (targrows < minrows)
+		targrows = minrows;
+
+	/*
+	 * Acquire the sample rows
+	 */
+	rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
+	pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
+								 inh ? PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS_INH :
+								 PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS);
+	if (inh)
+		numrows = acquire_inherited_sample_rows(onerel, elevel,
+												rows, targrows,
+												&totalrows, &totaldeadrows);
+	else
+		numrows = (*acquirefunc) (onerel, elevel,
+								  rows, targrows,
+								  &totalrows, &totaldeadrows);
+
+	/*
+	 * Compute the statistics.  Temporary results during the calculations for
+	 * each column are stored in a child context.  The calc routines are
+	 * responsible to make sure that whatever they store into the VacAttrStats
+	 * structure is allocated in anl_context.
+	 */
+	if (numrows > 0)
+	{
+		MemoryContext col_context,
+					old_context;
+
+		pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
+									 PROGRESS_ANALYZE_PHASE_COMPUTE_STATS);
+
+		col_context = AllocSetContextCreate(anl_context,
+											"Analyze Column",
+											ALLOCSET_DEFAULT_SIZES);
+		old_context = MemoryContextSwitchTo(col_context);
+
+		for (i = 0; i < attr_cnt; i++)
+		{
+			VacAttrStats *stats = vacattrstats[i];
+			AttributeOpts *aopt;
+
+			stats->rows = rows;
+			stats->tupDesc = onerel->rd_att;
+			stats->compute_stats(stats,
+								 std_fetch_func,
+								 numrows,
+								 totalrows);
+
+			/*
+			 * If the appropriate flavor of the n_distinct option is
+			 * specified, override with the corresponding value.
+			 */
+			aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum);
+			if (aopt != NULL)
+			{
+				float8		n_distinct;
+
+				n_distinct = inh ? aopt->n_distinct_inherited : aopt->n_distinct;
+				if (n_distinct != 0.0)
+					stats->stadistinct = n_distinct;
+			}
+
+			MemoryContextResetAndDeleteChildren(col_context);
+		}
+
+		if (nindexes > 0)
+			compute_index_stats(onerel, totalrows,
+								indexdata, nindexes,
+								rows, numrows,
+								col_context);
+
+		MemoryContextSwitchTo(old_context);
+		MemoryContextDelete(col_context);
+
+		/*
+		 * Emit the completed stats rows into pg_statistic, replacing any
+		 * previous statistics for the target columns.  (If there are stats in
+		 * pg_statistic for columns we didn't process, we leave them alone.)
+		 */
+		update_attstats(RelationGetRelid(onerel), inh,
+						attr_cnt, vacattrstats);
+
+		for (ind = 0; ind < nindexes; ind++)
+		{
+			AnlIndexData *thisdata = &indexdata[ind];
+
+			update_attstats(RelationGetRelid(Irel[ind]), false,
+							thisdata->attr_cnt, thisdata->vacattrstats);
+		}
+
+		/* Build extended statistics (if there are any). */
+		BuildRelationExtStatistics(onerel, inh, totalrows, numrows, rows,
+								   attr_cnt, vacattrstats);
+	}
+
+	pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
+								 PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE);
+
+	/*
+	 * Update pages/tuples stats in pg_class ... but not if we're doing
+	 * inherited stats.
+	 *
+	 * We assume that VACUUM hasn't set pg_class.reltuples already, even
+	 * during a VACUUM ANALYZE.  Although VACUUM often updates pg_class,
+	 * exceptions exist.  A "VACUUM (ANALYZE, INDEX_CLEANUP OFF)" command will
+	 * never update pg_class entries for index relations.  It's also possible
+	 * that an individual index's pg_class entry won't be updated during
+	 * VACUUM if the index AM returns NULL from its amvacuumcleanup() routine.
+	 */
+	if (!inh)
+	{
+		BlockNumber relallvisible;
+
+		visibilitymap_count(onerel, &relallvisible, NULL);
+
+		/* Update pg_class for table relation */
+		vac_update_relstats(onerel,
+							relpages,
+							totalrows,
+							relallvisible,
+							hasindex,
+							InvalidTransactionId,
+							InvalidMultiXactId,
+							NULL, NULL,
+							in_outer_xact);
+
+		/* Same for indexes */
+		for (ind = 0; ind < nindexes; ind++)
+		{
+			AnlIndexData *thisdata = &indexdata[ind];
+			double		totalindexrows;
+
+			totalindexrows = ceil(thisdata->tupleFract * totalrows);
+			vac_update_relstats(Irel[ind],
+								RelationGetNumberOfBlocks(Irel[ind]),
+								totalindexrows,
+								0,
+								false,
+								InvalidTransactionId,
+								InvalidMultiXactId,
+								NULL, NULL,
+								in_outer_xact);
+		}
+	}
+	else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		/*
+		 * Partitioned tables don't have storage, so we don't set any fields
+		 * in their pg_class entries except for reltuples and relhasindex.
+		 */
+		vac_update_relstats(onerel, -1, totalrows,
+							0, hasindex, InvalidTransactionId,
+							InvalidMultiXactId,
+							NULL, NULL,
+							in_outer_xact);
+	}
+
+	/*
+	 * Now report ANALYZE to the cumulative stats system.  For regular tables,
+	 * we do it only if not doing inherited stats.  For partitioned tables, we
+	 * only do it for inherited stats. (We're never called for not-inherited
+	 * stats on partitioned tables anyway.)
+	 *
+	 * Reset the changes_since_analyze counter only if we analyzed all
+	 * columns; otherwise, there is still work for auto-analyze to do.
+	 */
+	if (!inh)
+		pgstat_report_analyze(onerel, totalrows, totaldeadrows,
+							  (va_cols == NIL));
+	else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		pgstat_report_analyze(onerel, 0, 0, (va_cols == NIL));
+
+	/*
+	 * If this isn't part of VACUUM ANALYZE, let index AMs do cleanup.
+	 *
+	 * Note that most index AMs perform a no-op as a matter of policy for
+	 * amvacuumcleanup() when called in ANALYZE-only mode.  The only exception
+	 * among core index AMs is GIN/ginvacuumcleanup().
+	 */
+	if (!(params->options & VACOPT_VACUUM))
+	{
+		for (ind = 0; ind < nindexes; ind++)
+		{
+			IndexBulkDeleteResult *stats;
+			IndexVacuumInfo ivinfo;
+
+			ivinfo.index = Irel[ind];
+			ivinfo.analyze_only = true;
+			ivinfo.estimated_count = true;
+			ivinfo.message_level = elevel;
+			ivinfo.num_heap_tuples = onerel->rd_rel->reltuples;
+			ivinfo.strategy = vac_strategy;
+
+			stats = index_vacuum_cleanup(&ivinfo, NULL);
+
+			if (stats)
+				pfree(stats);
+		}
+	}
+
+	/* Done with indexes */
+	vac_close_indexes(nindexes, Irel, NoLock);
+
+	/* Log the action if appropriate */
+	if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
+	{
+		TimestampTz endtime = GetCurrentTimestamp();
+
+		if (params->log_min_duration == 0 ||
+			TimestampDifferenceExceeds(starttime, endtime,
+									   params->log_min_duration))
+		{
+			long		delay_in_ms;
+			double		read_rate = 0;
+			double		write_rate = 0;
+			StringInfoData buf;
+
+			/*
+			 * Calculate the difference in the Page Hit/Miss/Dirty that
+			 * happened as part of the analyze by subtracting out the
+			 * pre-analyze values which we saved above.
+			 */
+			AnalyzePageHit = VacuumPageHit - AnalyzePageHit;
+			AnalyzePageMiss = VacuumPageMiss - AnalyzePageMiss;
+			AnalyzePageDirty = VacuumPageDirty - AnalyzePageDirty;
+
+			/*
+			 * We do not expect an analyze to take > 25 days and it simplifies
+			 * things a bit to use TimestampDifferenceMilliseconds.
+			 */
+			delay_in_ms = TimestampDifferenceMilliseconds(starttime, endtime);
+
+			/*
+			 * Note that we are reporting these read/write rates in the same
+			 * manner as VACUUM does, which means that while the 'average read
+			 * rate' here actually corresponds to page misses and resulting
+			 * reads which are also picked up by track_io_timing, if enabled,
+			 * the 'average write rate' is actually talking about the rate of
+			 * pages being dirtied, not being written out, so it's typical to
+			 * have a non-zero 'avg write rate' while I/O timings only reports
+			 * reads.
+			 *
+			 * It's not clear that an ANALYZE will ever result in
+			 * FlushBuffer() being called, but we track and support reporting
+			 * on I/O write time in case that changes as it's practically free
+			 * to do so anyway.
+			 */
+
+			if (delay_in_ms > 0)
+			{
+				read_rate = (double) BLCKSZ * AnalyzePageMiss / (1024 * 1024) /
+					(delay_in_ms / 1000.0);
+				write_rate = (double) BLCKSZ * AnalyzePageDirty / (1024 * 1024) /
+					(delay_in_ms / 1000.0);
+			}
+
+			/*
+			 * We split this up so we don't emit empty I/O timing values when
+			 * track_io_timing isn't enabled.
+			 */
+
+			initStringInfo(&buf);
+			appendStringInfo(&buf, _("automatic analyze of table \"%s.%s.%s\"\n"),
+							 get_database_name(MyDatabaseId),
+							 get_namespace_name(RelationGetNamespace(onerel)),
+							 RelationGetRelationName(onerel));
+			if (track_io_timing)
+			{
+				double		read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
+				double		write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
+
+				appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
+								 read_ms, write_ms);
+			}
+			appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
+							 read_rate, write_rate);
+			appendStringInfo(&buf, _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
+							 (long long) AnalyzePageHit,
+							 (long long) AnalyzePageMiss,
+							 (long long) AnalyzePageDirty);
+			appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
+
+			ereport(LOG,
+					(errmsg_internal("%s", buf.data)));
+
+			pfree(buf.data);
+		}
+	}
+
+	/* Roll back any GUC changes executed by index functions */
+	AtEOXact_GUC(false, save_nestlevel);
+
+	/* Restore userid and security context */
+	SetUserIdAndSecContext(save_userid, save_sec_context);
+
+	/* Restore current context and release memory */
+	MemoryContextSwitchTo(caller_context);
+	MemoryContextDelete(anl_context);
+	anl_context = NULL;
+}
+
+/*
+ * Compute statistics about indexes of a relation
+ */
+static void
+compute_index_stats(Relation onerel, double totalrows,
+					AnlIndexData *indexdata, int nindexes,
+					HeapTuple *rows, int numrows,
+					MemoryContext col_context)
+{
+	MemoryContext ind_context,
+				old_context;
+	Datum		values[INDEX_MAX_KEYS];
+	bool		isnull[INDEX_MAX_KEYS];
+	int			ind,
+				i;
+
+	ind_context = AllocSetContextCreate(anl_context,
+										"Analyze Index",
+										ALLOCSET_DEFAULT_SIZES);
+	old_context = MemoryContextSwitchTo(ind_context);
+
+	for (ind = 0; ind < nindexes; ind++)
+	{
+		AnlIndexData *thisdata = &indexdata[ind];
+		IndexInfo  *indexInfo = thisdata->indexInfo;
+		int			attr_cnt = thisdata->attr_cnt;
+		TupleTableSlot *slot;
+		EState	   *estate;
+		ExprContext *econtext;
+		ExprState  *predicate;
+		Datum	   *exprvals;
+		bool	   *exprnulls;
+		int			numindexrows,
+					tcnt,
+					rowno;
+		double		totalindexrows;
+
+		/* Ignore index if no columns to analyze and not partial */
+		if (attr_cnt == 0 && indexInfo->ii_Predicate == NIL)
+			continue;
+
+		/*
+		 * Need an EState for evaluation of index expressions and
+		 * partial-index predicates.  Create it in the per-index context to be
+		 * sure it gets cleaned up at the bottom of the loop.
+		 */
+		estate = CreateExecutorState();
+		econtext = GetPerTupleExprContext(estate);
+		/* Need a slot to hold the current heap tuple, too */
+		slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel),
+										&TTSOpsHeapTuple);
+
+		/* Arrange for econtext's scan tuple to be the tuple under test */
+		econtext->ecxt_scantuple = slot;
+
+		/* Set up execution state for predicate. */
+		predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+
+		/* Compute and save index expression values */
+		exprvals = (Datum *) palloc(numrows * attr_cnt * sizeof(Datum));
+		exprnulls = (bool *) palloc(numrows * attr_cnt * sizeof(bool));
+		numindexrows = 0;
+		tcnt = 0;
+		for (rowno = 0; rowno < numrows; rowno++)
+		{
+			HeapTuple	heapTuple = rows[rowno];
+
+			vacuum_delay_point();
+
+			/*
+			 * Reset the per-tuple context each time, to reclaim any cruft
+			 * left behind by evaluating the predicate or index expressions.
+			 */
+			ResetExprContext(econtext);
+
+			/* Set up for predicate or expression evaluation */
+			ExecStoreHeapTuple(heapTuple, slot, false);
+
+			/* If index is partial, check predicate */
+			if (predicate != NULL)
+			{
+				if (!ExecQual(predicate, econtext))
+					continue;
+			}
+			numindexrows++;
+
+			if (attr_cnt > 0)
+			{
+				/*
+				 * Evaluate the index row to compute expression values. We
+				 * could do this by hand, but FormIndexDatum is convenient.
+				 */
+				FormIndexDatum(indexInfo,
+							   slot,
+							   estate,
+							   values,
+							   isnull);
+
+				/*
+				 * Save just the columns we care about.  We copy the values
+				 * into ind_context from the estate's per-tuple context.
+				 */
+				for (i = 0; i < attr_cnt; i++)
+				{
+					VacAttrStats *stats = thisdata->vacattrstats[i];
+					int			attnum = stats->attr->attnum;
+
+					if (isnull[attnum - 1])
+					{
+						exprvals[tcnt] = (Datum) 0;
+						exprnulls[tcnt] = true;
+					}
+					else
+					{
+						exprvals[tcnt] = datumCopy(values[attnum - 1],
+												   stats->attrtype->typbyval,
+												   stats->attrtype->typlen);
+						exprnulls[tcnt] = false;
+					}
+					tcnt++;
+				}
+			}
+		}
+
+		/*
+		 * Having counted the number of rows that pass the predicate in the
+		 * sample, we can estimate the total number of rows in the index.
+		 */
+		thisdata->tupleFract = (double) numindexrows / (double) numrows;
+		totalindexrows = ceil(thisdata->tupleFract * totalrows);
+
+		/*
+		 * Now we can compute the statistics for the expression columns.
+		 */
+		if (numindexrows > 0)
+		{
+			MemoryContextSwitchTo(col_context);
+			for (i = 0; i < attr_cnt; i++)
+			{
+				VacAttrStats *stats = thisdata->vacattrstats[i];
+
+				stats->exprvals = exprvals + i;
+				stats->exprnulls = exprnulls + i;
+				stats->rowstride = attr_cnt;
+				stats->compute_stats(stats,
+									 ind_fetch_func,
+									 numindexrows,
+									 totalindexrows);
+
+				MemoryContextResetAndDeleteChildren(col_context);
+			}
+		}
+
+		/* And clean up */
+		MemoryContextSwitchTo(ind_context);
+
+		ExecDropSingleTupleTableSlot(slot);
+		FreeExecutorState(estate);
+		MemoryContextResetAndDeleteChildren(ind_context);
+	}
+
+	MemoryContextSwitchTo(old_context);
+	MemoryContextDelete(ind_context);
+}
+
+/*
+ * examine_attribute -- pre-analysis of a single column
+ *
+ * Determine whether the column is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it.  If not, return NULL.
+ *
+ * If index_expr isn't NULL, then we're trying to analyze an expression index,
+ * and index_expr is the expression tree representing the column's data.
+ */
+static VacAttrStats *
+examine_attribute(Relation onerel, int attnum, Node *index_expr)
+{
+	Form_pg_attribute attr = TupleDescAttr(onerel->rd_att, attnum - 1);
+	HeapTuple	typtuple;
+	VacAttrStats *stats;
+	int			i;
+	bool		ok;
+
+	/* Never analyze dropped columns */
+	if (attr->attisdropped)
+		return NULL;
+
+	/* Don't analyze column if user has specified not to */
+	if (attr->attstattarget == 0)
+		return NULL;
+
+	/*
+	 * Create the VacAttrStats struct.  Note that we only have a copy of the
+	 * fixed fields of the pg_attribute tuple.
+	 */
+	stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
+	stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
+	memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
+
+	/*
+	 * When analyzing an expression index, believe the expression tree's type
+	 * not the column datatype --- the latter might be the opckeytype storage
+	 * type of the opclass, which is not interesting for our purposes.  (Note:
+	 * if we did anything with non-expression index columns, we'd need to
+	 * figure out where to get the correct type info from, but for now that's
+	 * not a problem.)	It's not clear whether anyone will care about the
+	 * typmod, but we store that too just in case.
+	 */
+	if (index_expr)
+	{
+		stats->attrtypid = exprType(index_expr);
+		stats->attrtypmod = exprTypmod(index_expr);
+
+		/*
+		 * If a collation has been specified for the index column, use that in
+		 * preference to anything else; but if not, fall back to whatever we
+		 * can get from the expression.
+		 */
+		if (OidIsValid(onerel->rd_indcollation[attnum - 1]))
+			stats->attrcollid = onerel->rd_indcollation[attnum - 1];
+		else
+			stats->attrcollid = exprCollation(index_expr);
+	}
+	else
+	{
+		stats->attrtypid = attr->atttypid;
+		stats->attrtypmod = attr->atttypmod;
+		stats->attrcollid = attr->attcollation;
+	}
+
+	typtuple = SearchSysCacheCopy1(TYPEOID,
+								   ObjectIdGetDatum(stats->attrtypid));
+	if (!HeapTupleIsValid(typtuple))
+		elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
+	stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
+	stats->anl_context = anl_context;
+	stats->tupattnum = attnum;
+
+	/*
+	 * The fields describing the stats->stavalues[n] element types default to
+	 * the type of the data being analyzed, but the type-specific typanalyze
+	 * function can change them if it wants to store something else.
+	 */
+	for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+	{
+		stats->statypid[i] = stats->attrtypid;
+		stats->statyplen[i] = stats->attrtype->typlen;
+		stats->statypbyval[i] = stats->attrtype->typbyval;
+		stats->statypalign[i] = stats->attrtype->typalign;
+	}
+
+	/*
+	 * Call the type-specific typanalyze function.  If none is specified, use
+	 * std_typanalyze().
+	 */
+	if (OidIsValid(stats->attrtype->typanalyze))
+		ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
+										   PointerGetDatum(stats)));
+	else
+		ok = std_typanalyze(stats);
+
+	if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
+	{
+		heap_freetuple(typtuple);
+		pfree(stats->attr);
+		pfree(stats);
+		return NULL;
+	}
+
+	return stats;
+}
+
+/*
+ * acquire_sample_rows -- acquire a random sample of rows from the table
+ *
+ * Selected rows are returned in the caller-allocated array rows[], which
+ * must have at least targrows entries.
+ * The actual number of rows selected is returned as the function result.
+ * We also estimate the total numbers of live and dead rows in the table,
+ * and return them into *totalrows and *totaldeadrows, respectively.
+ *
+ * The returned list of tuples is in order by physical position in the table.
+ * (We will rely on this later to derive correlation estimates.)
+ *
+ * As of May 2004 we use a new two-stage method:  Stage one selects up
+ * to targrows random blocks (or all blocks, if there aren't so many).
+ * Stage two scans these blocks and uses the Vitter algorithm to create
+ * a random sample of targrows rows (or less, if there are less in the
+ * sample of blocks).  The two stages are executed simultaneously: each
+ * block is processed as soon as stage one returns its number and while
+ * the rows are read stage two controls which ones are to be inserted
+ * into the sample.
+ *
+ * Although every row has an equal chance of ending up in the final
+ * sample, this sampling method is not perfect: not every possible
+ * sample has an equal chance of being selected.  For large relations
+ * the number of different blocks represented by the sample tends to be
+ * too small.  We can live with that for now.  Improvements are welcome.
+ *
+ * An important property of this sampling method is that because we do
+ * look at a statistically unbiased set of blocks, we should get
+ * unbiased estimates of the average numbers of live and dead rows per
+ * block.  The previous sampling method put too much credence in the row
+ * density near the start of the table.
+ */
+static int
+acquire_sample_rows(Relation onerel, int elevel,
+					HeapTuple *rows, int targrows,
+					double *totalrows, double *totaldeadrows)
+{
+	int			numrows = 0;	/* # rows now in reservoir */
+	double		samplerows = 0; /* total # rows collected */
+	double		liverows = 0;	/* # live rows seen */
+	double		deadrows = 0;	/* # dead rows seen */
+	double		rowstoskip = -1;	/* -1 means not set yet */
+	uint32		randseed;		/* Seed for block sampler(s) */
+	BlockNumber totalblocks;
+	TransactionId OldestXmin;
+	BlockSamplerData bs;
+	ReservoirStateData rstate;
+	TupleTableSlot *slot;
+	TableScanDesc scan;
+	BlockNumber nblocks;
+	BlockNumber blksdone = 0;
+#ifdef USE_PREFETCH
+	int			prefetch_maximum = 0;	/* blocks to prefetch if enabled */
+	BlockSamplerData prefetch_bs;
+#endif
+
+	Assert(targrows > 0);
+
+	totalblocks = RelationGetNumberOfBlocks(onerel);
+
+	/* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
+	OldestXmin = GetOldestNonRemovableTransactionId(onerel);
+
+	/* Prepare for sampling block numbers */
+	randseed = pg_prng_uint32(&pg_global_prng_state);
+	nblocks = BlockSampler_Init(&bs, totalblocks, targrows, randseed);
+
+#ifdef USE_PREFETCH
+	prefetch_maximum = get_tablespace_maintenance_io_concurrency(onerel->rd_rel->reltablespace);
+	/* Create another BlockSampler, using the same seed, for prefetching */
+	if (prefetch_maximum)
+		(void) BlockSampler_Init(&prefetch_bs, totalblocks, targrows, randseed);
+#endif
+
+	/* Report sampling block numbers */
+	pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_TOTAL,
+								 nblocks);
+
+	/* Prepare for sampling rows */
+	reservoir_init_selection_state(&rstate, targrows);
+
+	scan = table_beginscan_analyze(onerel);
+	slot = table_slot_create(onerel, NULL);
+
+#ifdef USE_PREFETCH
+
+	/*
+	 * If we are doing prefetching, then go ahead and tell the kernel about
+	 * the first set of pages we are going to want.  This also moves our
+	 * iterator out ahead of the main one being used, where we will keep it so
+	 * that we're always pre-fetching out prefetch_maximum number of blocks
+	 * ahead.
+	 */
+	if (prefetch_maximum)
+	{
+		for (int i = 0; i < prefetch_maximum; i++)
+		{
+			BlockNumber prefetch_block;
+
+			if (!BlockSampler_HasMore(&prefetch_bs))
+				break;
+
+			prefetch_block = BlockSampler_Next(&prefetch_bs);
+			PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, prefetch_block);
+		}
+	}
+#endif
+
+	/* Outer loop over blocks to sample */
+	while (BlockSampler_HasMore(&bs))
+	{
+		bool		block_accepted;
+		BlockNumber targblock = BlockSampler_Next(&bs);
+#ifdef USE_PREFETCH
+		BlockNumber prefetch_targblock = InvalidBlockNumber;
+
+		/*
+		 * Make sure that every time the main BlockSampler is moved forward
+		 * that our prefetch BlockSampler also gets moved forward, so that we
+		 * always stay out ahead.
+		 */
+		if (prefetch_maximum && BlockSampler_HasMore(&prefetch_bs))
+			prefetch_targblock = BlockSampler_Next(&prefetch_bs);
+#endif
+
+		vacuum_delay_point();
+
+		block_accepted = table_scan_analyze_next_block(scan, targblock, vac_strategy);
+
+#ifdef USE_PREFETCH
+
+		/*
+		 * When pre-fetching, after we get a block, tell the kernel about the
+		 * next one we will want, if there's any left.
+		 *
+		 * We want to do this even if the table_scan_analyze_next_block() call
+		 * above decides against analyzing the block it picked.
+		 */
+		if (prefetch_maximum && prefetch_targblock != InvalidBlockNumber)
+			PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, prefetch_targblock);
+#endif
+
+		/*
+		 * Don't analyze if table_scan_analyze_next_block() indicated this
+		 * block is unsuitable for analyzing.
+		 */
+		if (!block_accepted)
+			continue;
+
+		while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
+		{
+			/*
+			 * The first targrows sample rows are simply copied into the
+			 * reservoir. Then we start replacing tuples in the sample until
+			 * we reach the end of the relation.  This algorithm is from Jeff
+			 * Vitter's paper (see full citation in utils/misc/sampling.c). It
+			 * works by repeatedly computing the number of tuples to skip
+			 * before selecting a tuple, which replaces a randomly chosen
+			 * element of the reservoir (current set of tuples).  At all times
+			 * the reservoir is a true random sample of the tuples we've
+			 * passed over so far, so when we fall off the end of the relation
+			 * we're done.
+			 */
+			if (numrows < targrows)
+				rows[numrows++] = ExecCopySlotHeapTuple(slot);
+			else
+			{
+				/*
+				 * t in Vitter's paper is the number of records already
+				 * processed.  If we need to compute a new S value, we must
+				 * use the not-yet-incremented value of samplerows as t.
+				 */
+				if (rowstoskip < 0)
+					rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
+
+				if (rowstoskip <= 0)
+				{
+					/*
+					 * Found a suitable tuple, so save it, replacing one old
+					 * tuple at random
+					 */
+					int			k = (int) (targrows * sampler_random_fract(&rstate.randstate));
+
+					Assert(k >= 0 && k < targrows);
+					heap_freetuple(rows[k]);
+					rows[k] = ExecCopySlotHeapTuple(slot);
+				}
+
+				rowstoskip -= 1;
+			}
+
+			samplerows += 1;
+		}
+
+		pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_DONE,
+									 ++blksdone);
+	}
+
+	ExecDropSingleTupleTableSlot(slot);
+	table_endscan(scan);
+
+	/*
+	 * If we didn't find as many tuples as we wanted then we're done. No sort
+	 * is needed, since they're already in order.
+	 *
+	 * Otherwise we need to sort the collected tuples by position
+	 * (itempointer). It's not worth worrying about corner cases where the
+	 * tuples are already sorted.
+	 */
+	if (numrows == targrows)
+		qsort_interruptible((void *) rows, numrows, sizeof(HeapTuple),
+							compare_rows, NULL);
+
+	/*
+	 * Estimate total numbers of live and dead rows in relation, extrapolating
+	 * on the assumption that the average tuple density in pages we didn't
+	 * scan is the same as in the pages we did scan.  Since what we scanned is
+	 * a random sample of the pages in the relation, this should be a good
+	 * assumption.
+	 */
+	if (bs.m > 0)
+	{
+		*totalrows = floor((liverows / bs.m) * totalblocks + 0.5);
+		*totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
+	}
+	else
+	{
+		*totalrows = 0.0;
+		*totaldeadrows = 0.0;
+	}
+
+	/*
+	 * Emit some interesting relation info
+	 */
+	ereport(elevel,
+			(errmsg("\"%s\": scanned %d of %u pages, "
+					"containing %.0f live rows and %.0f dead rows; "
+					"%d rows in sample, %.0f estimated total rows",
+					RelationGetRelationName(onerel),
+					bs.m, totalblocks,
+					liverows, deadrows,
+					numrows, *totalrows)));
+
+	return numrows;
+}
+
+/*
+ * Comparator for sorting rows[] array
+ */
+static int
+compare_rows(const void *a, const void *b, void *arg)
+{
+	HeapTuple	ha = *(const HeapTuple *) a;
+	HeapTuple	hb = *(const HeapTuple *) b;
+	BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
+	OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
+	BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
+	OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
+
+	if (ba < bb)
+		return -1;
+	if (ba > bb)
+		return 1;
+	if (oa < ob)
+		return -1;
+	if (oa > ob)
+		return 1;
+	return 0;
+}
+
+
+/*
+ * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
+ *
+ * This has the same API as acquire_sample_rows, except that rows are
+ * collected from all inheritance children as well as the specified table.
+ * We fail and return zero if there are no inheritance children, or if all
+ * children are foreign tables that don't support ANALYZE.
+ */
+static int
+acquire_inherited_sample_rows(Relation onerel, int elevel,
+							  HeapTuple *rows, int targrows,
+							  double *totalrows, double *totaldeadrows)
+{
+	List	   *tableOIDs;
+	Relation   *rels;
+	AcquireSampleRowsFunc *acquirefuncs;
+	double	   *relblocks;
+	double		totalblocks;
+	int			numrows,
+				nrels,
+				i;
+	ListCell   *lc;
+	bool		has_child;
+
+	/* Initialize output parameters to zero now, in case we exit early */
+	*totalrows = 0;
+	*totaldeadrows = 0;
+
+	/*
+	 * Find all members of inheritance set.  We only need AccessShareLock on
+	 * the children.
+	 */
+	tableOIDs =
+		find_all_inheritors(RelationGetRelid(onerel), AccessShareLock, NULL);
+
+	/*
+	 * Check that there's at least one descendant, else fail.  This could
+	 * happen despite analyze_rel's relhassubclass check, if table once had a
+	 * child but no longer does.  In that case, we can clear the
+	 * relhassubclass field so as not to make the same mistake again later.
+	 * (This is safe because we hold ShareUpdateExclusiveLock.)
+	 */
+	if (list_length(tableOIDs) < 2)
+	{
+		/* CCI because we already updated the pg_class row in this command */
+		CommandCounterIncrement();
+		SetRelationHasSubclass(RelationGetRelid(onerel), false);
+		ereport(elevel,
+				(errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables",
+						get_namespace_name(RelationGetNamespace(onerel)),
+						RelationGetRelationName(onerel))));
+		return 0;
+	}
+
+	/*
+	 * Identify acquirefuncs to use, and count blocks in all the relations.
+	 * The result could overflow BlockNumber, so we use double arithmetic.
+	 */
+	rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation));
+	acquirefuncs = (AcquireSampleRowsFunc *)
+		palloc(list_length(tableOIDs) * sizeof(AcquireSampleRowsFunc));
+	relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double));
+	totalblocks = 0;
+	nrels = 0;
+	has_child = false;
+	foreach(lc, tableOIDs)
+	{
+		Oid			childOID = lfirst_oid(lc);
+		Relation	childrel;
+		AcquireSampleRowsFunc acquirefunc = NULL;
+		BlockNumber relpages = 0;
+
+		/* We already got the needed lock */
+		childrel = table_open(childOID, NoLock);
+
+		/* Ignore if temp table of another backend */
+		if (RELATION_IS_OTHER_TEMP(childrel))
+		{
+			/* ... but release the lock on it */
+			Assert(childrel != onerel);
+			table_close(childrel, AccessShareLock);
+			continue;
+		}
+
+		/* Check table type (MATVIEW can't happen, but might as well allow) */
+		if (childrel->rd_rel->relkind == RELKIND_RELATION ||
+			childrel->rd_rel->relkind == RELKIND_MATVIEW)
+		{
+			/* Regular table, so use the regular row acquisition function */
+			acquirefunc = acquire_sample_rows;
+			relpages = RelationGetNumberOfBlocks(childrel);
+		}
+		else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			/*
+			 * For a foreign table, call the FDW's hook function to see
+			 * whether it supports analysis.
+			 */
+			FdwRoutine *fdwroutine;
+			bool		ok = false;
+
+			fdwroutine = GetFdwRoutineForRelation(childrel, false);
+
+			if (fdwroutine->AnalyzeForeignTable != NULL)
+				ok = fdwroutine->AnalyzeForeignTable(childrel,
+													 &acquirefunc,
+													 &relpages);
+
+			if (!ok)
+			{
+				/* ignore, but release the lock on it */
+				Assert(childrel != onerel);
+				table_close(childrel, AccessShareLock);
+				continue;
+			}
+		}
+		else
+		{
+			/*
+			 * ignore, but release the lock on it.  don't try to unlock the
+			 * passed-in relation
+			 */
+			Assert(childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+			if (childrel != onerel)
+				table_close(childrel, AccessShareLock);
+			else
+				table_close(childrel, NoLock);
+			continue;
+		}
+
+		/* OK, we'll process this child */
+		has_child = true;
+		rels[nrels] = childrel;
+		acquirefuncs[nrels] = acquirefunc;
+		relblocks[nrels] = (double) relpages;
+		totalblocks += (double) relpages;
+		nrels++;
+	}
+
+	/*
+	 * If we don't have at least one child table to consider, fail.  If the
+	 * relation is a partitioned table, it's not counted as a child table.
+	 */
+	if (!has_child)
+	{
+		ereport(elevel,
+				(errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no analyzable child tables",
+						get_namespace_name(RelationGetNamespace(onerel)),
+						RelationGetRelationName(onerel))));
+		return 0;
+	}
+
+	/*
+	 * Now sample rows from each relation, proportionally to its fraction of
+	 * the total block count.  (This might be less than desirable if the child
+	 * rels have radically different free-space percentages, but it's not
+	 * clear that it's worth working harder.)
+	 */
+	pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_TOTAL,
+								 nrels);
+	numrows = 0;
+	for (i = 0; i < nrels; i++)
+	{
+		Relation	childrel = rels[i];
+		AcquireSampleRowsFunc acquirefunc = acquirefuncs[i];
+		double		childblocks = relblocks[i];
+
+		/*
+		 * Report progress.  The sampling function will normally report blocks
+		 * done/total, but we need to reset them to 0 here, so that they don't
+		 * show an old value until that.
+		 */
+		{
+			const int	progress_index[] = {
+				PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID,
+				PROGRESS_ANALYZE_BLOCKS_DONE,
+				PROGRESS_ANALYZE_BLOCKS_TOTAL
+			};
+			const int64 progress_vals[] = {
+				RelationGetRelid(childrel),
+				0,
+				0,
+			};
+
+			pgstat_progress_update_multi_param(3, progress_index, progress_vals);
+		}
+
+		if (childblocks > 0)
+		{
+			int			childtargrows;
+
+			childtargrows = (int) rint(targrows * childblocks / totalblocks);
+			/* Make sure we don't overrun due to roundoff error */
+			childtargrows = Min(childtargrows, targrows - numrows);
+			if (childtargrows > 0)
+			{
+				int			childrows;
+				double		trows,
+							tdrows;
+
+				/* Fetch a random sample of the child's rows */
+				childrows = (*acquirefunc) (childrel, elevel,
+											rows + numrows, childtargrows,
+											&trows, &tdrows);
+
+				/* We may need to convert from child's rowtype to parent's */
+				if (childrows > 0 &&
+					!equalTupleDescs(RelationGetDescr(childrel),
+									 RelationGetDescr(onerel)))
+				{
+					TupleConversionMap *map;
+
+					map = convert_tuples_by_name(RelationGetDescr(childrel),
+												 RelationGetDescr(onerel));
+					if (map != NULL)
+					{
+						int			j;
+
+						for (j = 0; j < childrows; j++)
+						{
+							HeapTuple	newtup;
+
+							newtup = execute_attr_map_tuple(rows[numrows + j], map);
+							heap_freetuple(rows[numrows + j]);
+							rows[numrows + j] = newtup;
+						}
+						free_conversion_map(map);
+					}
+				}
+
+				/* And add to counts */
+				numrows += childrows;
+				*totalrows += trows;
+				*totaldeadrows += tdrows;
+			}
+		}
+
+		/*
+		 * Note: we cannot release the child-table locks, since we may have
+		 * pointers to their TOAST tables in the sampled rows.
+		 */
+		table_close(childrel, NoLock);
+		pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_DONE,
+									 i + 1);
+	}
+
+	return numrows;
+}
+
+
+/*
+ *	update_attstats() -- update attribute statistics for one relation
+ *
+ *		Statistics are stored in several places: the pg_class row for the
+ *		relation has stats about the whole relation, and there is a
+ *		pg_statistic row for each (non-system) attribute that has ever
+ *		been analyzed.  The pg_class values are updated by VACUUM, not here.
+ *
+ *		pg_statistic rows are just added or updated normally.  This means
+ *		that pg_statistic will probably contain some deleted rows at the
+ *		completion of a vacuum cycle, unless it happens to get vacuumed last.
+ *
+ *		To keep things simple, we punt for pg_statistic, and don't try
+ *		to compute or store rows for pg_statistic itself in pg_statistic.
+ *		This could possibly be made to work, but it's not worth the trouble.
+ *		Note analyze_rel() has seen to it that we won't come here when
+ *		vacuuming pg_statistic itself.
+ *
+ *		Note: there would be a race condition here if two backends could
+ *		ANALYZE the same table concurrently.  Presently, we lock that out
+ *		by taking a self-exclusive lock on the relation in analyze_rel().
+ */
+static void
+update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
+{
+	Relation	sd;
+	int			attno;
+
+	if (natts <= 0)
+		return;					/* nothing to do */
+
+	sd = table_open(StatisticRelationId, RowExclusiveLock);
+
+	for (attno = 0; attno < natts; attno++)
+	{
+		VacAttrStats *stats = vacattrstats[attno];
+		HeapTuple	stup,
+					oldtup;
+		int			i,
+					k,
+					n;
+		Datum		values[Natts_pg_statistic];
+		bool		nulls[Natts_pg_statistic];
+		bool		replaces[Natts_pg_statistic];
+
+		/* Ignore attr if we weren't able to collect stats */
+		if (!stats->stats_valid)
+			continue;
+
+		/*
+		 * Construct a new pg_statistic tuple
+		 */
+		for (i = 0; i < Natts_pg_statistic; ++i)
+		{
+			nulls[i] = false;
+			replaces[i] = true;
+		}
+
+		values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid);
+		values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum);
+		values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh);
+		values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
+		values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
+		values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
+		i = Anum_pg_statistic_stakind1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
+		}
+		i = Anum_pg_statistic_staop1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			values[i++] = ObjectIdGetDatum(stats->staop[k]);	/* staopN */
+		}
+		i = Anum_pg_statistic_stacoll1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			values[i++] = ObjectIdGetDatum(stats->stacoll[k]);	/* stacollN */
+		}
+		i = Anum_pg_statistic_stanumbers1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			int			nnum = stats->numnumbers[k];
+
+			if (nnum > 0)
+			{
+				Datum	   *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
+				ArrayType  *arry;
+
+				for (n = 0; n < nnum; n++)
+					numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
+				/* XXX knows more than it should about type float4: */
+				arry = construct_array(numdatums, nnum,
+									   FLOAT4OID,
+									   sizeof(float4), true, TYPALIGN_INT);
+				values[i++] = PointerGetDatum(arry);	/* stanumbersN */
+			}
+			else
+			{
+				nulls[i] = true;
+				values[i++] = (Datum) 0;
+			}
+		}
+		i = Anum_pg_statistic_stavalues1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			if (stats->numvalues[k] > 0)
+			{
+				ArrayType  *arry;
+
+				arry = construct_array(stats->stavalues[k],
+									   stats->numvalues[k],
+									   stats->statypid[k],
+									   stats->statyplen[k],
+									   stats->statypbyval[k],
+									   stats->statypalign[k]);
+				values[i++] = PointerGetDatum(arry);	/* stavaluesN */
+			}
+			else
+			{
+				nulls[i] = true;
+				values[i++] = (Datum) 0;
+			}
+		}
+
+		/* Is there already a pg_statistic tuple for this attribute? */
+		oldtup = SearchSysCache3(STATRELATTINH,
+								 ObjectIdGetDatum(relid),
+								 Int16GetDatum(stats->attr->attnum),
+								 BoolGetDatum(inh));
+
+		if (HeapTupleIsValid(oldtup))
+		{
+			/* Yes, replace it */
+			stup = heap_modify_tuple(oldtup,
+									 RelationGetDescr(sd),
+									 values,
+									 nulls,
+									 replaces);
+			ReleaseSysCache(oldtup);
+			CatalogTupleUpdate(sd, &stup->t_self, stup);
+		}
+		else
+		{
+			/* No, insert new tuple */
+			stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);
+			CatalogTupleInsert(sd, stup);
+		}
+
+		heap_freetuple(stup);
+	}
+
+	table_close(sd, RowExclusiveLock);
+}
+
+/*
+ * Standard fetch function for use by compute_stats subroutines.
+ *
+ * This exists to provide some insulation between compute_stats routines
+ * and the actual storage of the sample data.
+ */
+static Datum
+std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
+{
+	int			attnum = stats->tupattnum;
+	HeapTuple	tuple = stats->rows[rownum];
+	TupleDesc	tupDesc = stats->tupDesc;
+
+	return heap_getattr(tuple, attnum, tupDesc, isNull);
+}
+
+/*
+ * Fetch function for analyzing index expressions.
+ *
+ * We have not bothered to construct index tuples, instead the data is
+ * just in Datum arrays.
+ */
+static Datum
+ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
+{
+	int			i;
+
+	/* exprvals and exprnulls are already offset for proper column */
+	i = rownum * stats->rowstride;
+	*isNull = stats->exprnulls[i];
+	return stats->exprvals[i];
+}
+
+
+/*==========================================================================
+ *
+ * Code below this point represents the "standard" type-specific statistics
+ * analysis algorithms.  This code can be replaced on a per-data-type basis
+ * by setting a nonzero value in pg_type.typanalyze.
+ *
+ *==========================================================================
+ */
+
+
+/*
+ * To avoid consuming too much memory during analysis and/or too much space
+ * in the resulting pg_statistic rows, we ignore varlena datums that are wider
+ * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
+ * and distinct-value calculations since a wide value is unlikely to be
+ * duplicated at all, much less be a most-common value.  For the same reason,
+ * ignoring wide values will not affect our estimates of histogram bin
+ * boundaries very much.
+ */
+#define WIDTH_THRESHOLD  1024
+
+#define swapInt(a,b)	do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)
+#define swapDatum(a,b)	do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)
+
+/*
+ * Extra information used by the default analysis routines
+ */
+typedef struct
+{
+	int			count;			/* # of duplicates */
+	int			first;			/* values[] index of first occurrence */
+} ScalarMCVItem;
+
+typedef struct
+{
+	SortSupport ssup;
+	int		   *tupnoLink;
+} CompareScalarsContext;
+
+
+static void compute_trivial_stats(VacAttrStatsP stats,
+								  AnalyzeAttrFetchFunc fetchfunc,
+								  int samplerows,
+								  double totalrows);
+static void compute_distinct_stats(VacAttrStatsP stats,
+								   AnalyzeAttrFetchFunc fetchfunc,
+								   int samplerows,
+								   double totalrows);
+static void compute_scalar_stats(VacAttrStatsP stats,
+								 AnalyzeAttrFetchFunc fetchfunc,
+								 int samplerows,
+								 double totalrows);
+static int	compare_scalars(const void *a, const void *b, void *arg);
+static int	compare_mcvs(const void *a, const void *b, void *arg);
+static int	analyze_mcv_list(int *mcv_counts,
+							 int num_mcv,
+							 double stadistinct,
+							 double stanullfrac,
+							 int samplerows,
+							 double totalrows);
+
+
+/*
+ * std_typanalyze -- the default type-specific typanalyze function
+ */
+bool
+std_typanalyze(VacAttrStats *stats)
+{
+	Form_pg_attribute attr = stats->attr;
+	Oid			ltopr;
+	Oid			eqopr;
+	StdAnalyzeData *mystats;
+
+	/* If the attstattarget column is negative, use the default value */
+	/* NB: it is okay to scribble on stats->attr since it's a copy */
+	if (attr->attstattarget < 0)
+		attr->attstattarget = default_statistics_target;
+
+	/* Look for default "<" and "=" operators for column's type */
+	get_sort_group_operators(stats->attrtypid,
+							 false, false, false,
+							 &ltopr, &eqopr, NULL,
+							 NULL);
+
+	/* Save the operator info for compute_stats routines */
+	mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData));
+	mystats->eqopr = eqopr;
+	mystats->eqfunc = OidIsValid(eqopr) ? get_opcode(eqopr) : InvalidOid;
+	mystats->ltopr = ltopr;
+	stats->extra_data = mystats;
+
+	/*
+	 * Determine which standard statistics algorithm to use
+	 */
+	if (OidIsValid(eqopr) && OidIsValid(ltopr))
+	{
+		/* Seems to be a scalar datatype */
+		stats->compute_stats = compute_scalar_stats;
+		/*--------------------
+		 * The following choice of minrows is based on the paper
+		 * "Random sampling for histogram construction: how much is enough?"
+		 * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
+		 * Proceedings of ACM SIGMOD International Conference on Management
+		 * of Data, 1998, Pages 436-447.  Their Corollary 1 to Theorem 5
+		 * says that for table size n, histogram size k, maximum relative
+		 * error in bin size f, and error probability gamma, the minimum
+		 * random sample size is
+		 *		r = 4 * k * ln(2*n/gamma) / f^2
+		 * Taking f = 0.5, gamma = 0.01, n = 10^6 rows, we obtain
+		 *		r = 305.82 * k
+		 * Note that because of the log function, the dependence on n is
+		 * quite weak; even at n = 10^12, a 300*k sample gives <= 0.66
+		 * bin size error with probability 0.99.  So there's no real need to
+		 * scale for n, which is a good thing because we don't necessarily
+		 * know it at this point.
+		 *--------------------
+		 */
+		stats->minrows = 300 * attr->attstattarget;
+	}
+	else if (OidIsValid(eqopr))
+	{
+		/* We can still recognize distinct values */
+		stats->compute_stats = compute_distinct_stats;
+		/* Might as well use the same minrows as above */
+		stats->minrows = 300 * attr->attstattarget;
+	}
+	else
+	{
+		/* Can't do much but the trivial stuff */
+		stats->compute_stats = compute_trivial_stats;
+		/* Might as well use the same minrows as above */
+		stats->minrows = 300 * attr->attstattarget;
+	}
+
+	return true;
+}
+
+
+/*
+ *	compute_trivial_stats() -- compute very basic column statistics
+ *
+ *	We use this when we cannot find a hash "=" operator for the datatype.
+ *
+ *	We determine the fraction of non-null rows and the average datum width.
+ */
+static void
+compute_trivial_stats(VacAttrStatsP stats,
+					  AnalyzeAttrFetchFunc fetchfunc,
+					  int samplerows,
+					  double totalrows)
+{
+	int			i;
+	int			null_cnt = 0;
+	int			nonnull_cnt = 0;
+	double		total_width = 0;
+	bool		is_varlena = (!stats->attrtype->typbyval &&
+							  stats->attrtype->typlen == -1);
+	bool		is_varwidth = (!stats->attrtype->typbyval &&
+							   stats->attrtype->typlen < 0);
+
+	for (i = 0; i < samplerows; i++)
+	{
+		Datum		value;
+		bool		isnull;
+
+		vacuum_delay_point();
+
+		value = fetchfunc(stats, i, &isnull);
+
+		/* Check for null/nonnull */
+		if (isnull)
+		{
+			null_cnt++;
+			continue;
+		}
+		nonnull_cnt++;
+
+		/*
+		 * If it's a variable-width field, add up widths for average width
+		 * calculation.  Note that if the value is toasted, we use the toasted
+		 * width.  We don't bother with this calculation if it's a fixed-width
+		 * type.
+		 */
+		if (is_varlena)
+		{
+			total_width += VARSIZE_ANY(DatumGetPointer(value));
+		}
+		else if (is_varwidth)
+		{
+			/* must be cstring */
+			total_width += strlen(DatumGetCString(value)) + 1;
+		}
+	}
+
+	/* We can only compute average width if we found some non-null values. */
+	if (nonnull_cnt > 0)
+	{
+		stats->stats_valid = true;
+		/* Do the simple null-frac and width stats */
+		stats->stanullfrac = (double) null_cnt / (double) samplerows;
+		if (is_varwidth)
+			stats->stawidth = total_width / (double) nonnull_cnt;
+		else
+			stats->stawidth = stats->attrtype->typlen;
+		stats->stadistinct = 0.0;	/* "unknown" */
+	}
+	else if (null_cnt > 0)
+	{
+		/* We found only nulls; assume the column is entirely null */
+		stats->stats_valid = true;
+		stats->stanullfrac = 1.0;
+		if (is_varwidth)
+			stats->stawidth = 0;	/* "unknown" */
+		else
+			stats->stawidth = stats->attrtype->typlen;
+		stats->stadistinct = 0.0;	/* "unknown" */
+	}
+}
+
+
+/*
+ *	compute_distinct_stats() -- compute column statistics including ndistinct
+ *
+ *	We use this when we can find only an "=" operator for the datatype.
+ *
+ *	We determine the fraction of non-null rows, the average width, the
+ *	most common values, and the (estimated) number of distinct values.
+ *
+ *	The most common values are determined by brute force: we keep a list
+ *	of previously seen values, ordered by number of times seen, as we scan
+ *	the samples.  A newly seen value is inserted just after the last
+ *	multiply-seen value, causing the bottommost (oldest) singly-seen value
+ *	to drop off the list.  The accuracy of this method, and also its cost,
+ *	depend mainly on the length of the list we are willing to keep.
+ */
+static void
+compute_distinct_stats(VacAttrStatsP stats,
+					   AnalyzeAttrFetchFunc fetchfunc,
+					   int samplerows,
+					   double totalrows)
+{
+	int			i;
+	int			null_cnt = 0;
+	int			nonnull_cnt = 0;
+	int			toowide_cnt = 0;
+	double		total_width = 0;
+	bool		is_varlena = (!stats->attrtype->typbyval &&
+							  stats->attrtype->typlen == -1);
+	bool		is_varwidth = (!stats->attrtype->typbyval &&
+							   stats->attrtype->typlen < 0);
+	FmgrInfo	f_cmpeq;
+	typedef struct
+	{
+		Datum		value;
+		int			count;
+	} TrackItem;
+	TrackItem  *track;
+	int			track_cnt,
+				track_max;
+	int			num_mcv = stats->attr->attstattarget;
+	StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
+
+	/*
+	 * We track up to 2*n values for an n-element MCV list; but at least 10
+	 */
+	track_max = 2 * num_mcv;
+	if (track_max < 10)
+		track_max = 10;
+	track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
+	track_cnt = 0;
+
+	fmgr_info(mystats->eqfunc, &f_cmpeq);
+
+	for (i = 0; i < samplerows; i++)
+	{
+		Datum		value;
+		bool		isnull;
+		bool		match;
+		int			firstcount1,
+					j;
+
+		vacuum_delay_point();
+
+		value = fetchfunc(stats, i, &isnull);
+
+		/* Check for null/nonnull */
+		if (isnull)
+		{
+			null_cnt++;
+			continue;
+		}
+		nonnull_cnt++;
+
+		/*
+		 * If it's a variable-width field, add up widths for average width
+		 * calculation.  Note that if the value is toasted, we use the toasted
+		 * width.  We don't bother with this calculation if it's a fixed-width
+		 * type.
+		 */
+		if (is_varlena)
+		{
+			total_width += VARSIZE_ANY(DatumGetPointer(value));
+
+			/*
+			 * If the value is toasted, we want to detoast it just once to
+			 * avoid repeated detoastings and resultant excess memory usage
+			 * during the comparisons.  Also, check to see if the value is
+			 * excessively wide, and if so don't detoast at all --- just
+			 * ignore the value.
+			 */
+			if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
+			{
+				toowide_cnt++;
+				continue;
+			}
+			value = PointerGetDatum(PG_DETOAST_DATUM(value));
+		}
+		else if (is_varwidth)
+		{
+			/* must be cstring */
+			total_width += strlen(DatumGetCString(value)) + 1;
+		}
+
+		/*
+		 * See if the value matches anything we're already tracking.
+		 */
+		match = false;
+		firstcount1 = track_cnt;
+		for (j = 0; j < track_cnt; j++)
+		{
+			if (DatumGetBool(FunctionCall2Coll(&f_cmpeq,
+											   stats->attrcollid,
+											   value, track[j].value)))
+			{
+				match = true;
+				break;
+			}
+			if (j < firstcount1 && track[j].count == 1)
+				firstcount1 = j;
+		}
+
+		if (match)
+		{
+			/* Found a match */
+			track[j].count++;
+			/* This value may now need to "bubble up" in the track list */
+			while (j > 0 && track[j].count > track[j - 1].count)
+			{
+				swapDatum(track[j].value, track[j - 1].value);
+				swapInt(track[j].count, track[j - 1].count);
+				j--;
+			}
+		}
+		else
+		{
+			/* No match.  Insert at head of count-1 list */
+			if (track_cnt < track_max)
+				track_cnt++;
+			for (j = track_cnt - 1; j > firstcount1; j--)
+			{
+				track[j].value = track[j - 1].value;
+				track[j].count = track[j - 1].count;
+			}
+			if (firstcount1 < track_cnt)
+			{
+				track[firstcount1].value = value;
+				track[firstcount1].count = 1;
+			}
+		}
+	}
+
+	/* We can only compute real stats if we found some non-null values. */
+	if (nonnull_cnt > 0)
+	{
+		int			nmultiple,
+					summultiple;
+
+		stats->stats_valid = true;
+		/* Do the simple null-frac and width stats */
+		stats->stanullfrac = (double) null_cnt / (double) samplerows;
+		if (is_varwidth)
+			stats->stawidth = total_width / (double) nonnull_cnt;
+		else
+			stats->stawidth = stats->attrtype->typlen;
+
+		/* Count the number of values we found multiple times */
+		summultiple = 0;
+		for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
+		{
+			if (track[nmultiple].count == 1)
+				break;
+			summultiple += track[nmultiple].count;
+		}
+
+		if (nmultiple == 0)
+		{
+			/*
+			 * If we found no repeated non-null values, assume it's a unique
+			 * column; but be sure to discount for any nulls we found.
+			 */
+			stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
+		}
+		else if (track_cnt < track_max && toowide_cnt == 0 &&
+				 nmultiple == track_cnt)
+		{
+			/*
+			 * Our track list includes every value in the sample, and every
+			 * value appeared more than once.  Assume the column has just
+			 * these values.  (This case is meant to address columns with
+			 * small, fixed sets of possible values, such as boolean or enum
+			 * columns.  If there are any values that appear just once in the
+			 * sample, including too-wide values, we should assume that that's
+			 * not what we're dealing with.)
+			 */
+			stats->stadistinct = track_cnt;
+		}
+		else
+		{
+			/*----------
+			 * Estimate the number of distinct values using the estimator
+			 * proposed by Haas and Stokes in IBM Research Report RJ 10025:
+			 *		n*d / (n - f1 + f1*n/N)
+			 * where f1 is the number of distinct values that occurred
+			 * exactly once in our sample of n rows (from a total of N),
+			 * and d is the total number of distinct values in the sample.
+			 * This is their Duj1 estimator; the other estimators they
+			 * recommend are considerably more complex, and are numerically
+			 * very unstable when n is much smaller than N.
+			 *
+			 * In this calculation, we consider only non-nulls.  We used to
+			 * include rows with null values in the n and N counts, but that
+			 * leads to inaccurate answers in columns with many nulls, and
+			 * it's intuitively bogus anyway considering the desired result is
+			 * the number of distinct non-null values.
+			 *
+			 * We assume (not very reliably!) that all the multiply-occurring
+			 * values are reflected in the final track[] list, and the other
+			 * nonnull values all appeared but once.  (XXX this usually
+			 * results in a drastic overestimate of ndistinct.  Can we do
+			 * any better?)
+			 *----------
+			 */
+			int			f1 = nonnull_cnt - summultiple;
+			int			d = f1 + nmultiple;
+			double		n = samplerows - null_cnt;
+			double		N = totalrows * (1.0 - stats->stanullfrac);
+			double		stadistinct;
+
+			/* N == 0 shouldn't happen, but just in case ... */
+			if (N > 0)
+				stadistinct = (n * d) / ((n - f1) + f1 * n / N);
+			else
+				stadistinct = 0;
+
+			/* Clamp to sane range in case of roundoff error */
+			if (stadistinct < d)
+				stadistinct = d;
+			if (stadistinct > N)
+				stadistinct = N;
+			/* And round to integer */
+			stats->stadistinct = floor(stadistinct + 0.5);
+		}
+
+		/*
+		 * If we estimated the number of distinct values at more than 10% of
+		 * the total row count (a very arbitrary limit), then assume that
+		 * stadistinct should scale with the row count rather than be a fixed
+		 * value.
+		 */
+		if (stats->stadistinct > 0.1 * totalrows)
+			stats->stadistinct = -(stats->stadistinct / totalrows);
+
+		/*
+		 * Decide how many values are worth storing as most-common values. If
+		 * we are able to generate a complete MCV list (all the values in the
+		 * sample will fit, and we think these are all the ones in the table),
+		 * then do so.  Otherwise, store only those values that are
+		 * significantly more common than the values not in the list.
+		 *
+		 * Note: the first of these cases is meant to address columns with
+		 * small, fixed sets of possible values, such as boolean or enum
+		 * columns.  If we can *completely* represent the column population by
+		 * an MCV list that will fit into the stats target, then we should do
+		 * so and thus provide the planner with complete information.  But if
+		 * the MCV list is not complete, it's generally worth being more
+		 * selective, and not just filling it all the way up to the stats
+		 * target.
+		 */
+		if (track_cnt < track_max && toowide_cnt == 0 &&
+			stats->stadistinct > 0 &&
+			track_cnt <= num_mcv)
+		{
+			/* Track list includes all values seen, and all will fit */
+			num_mcv = track_cnt;
+		}
+		else
+		{
+			int		   *mcv_counts;
+
+			/* Incomplete list; decide how many values are worth keeping */
+			if (num_mcv > track_cnt)
+				num_mcv = track_cnt;
+
+			if (num_mcv > 0)
+			{
+				mcv_counts = (int *) palloc(num_mcv * sizeof(int));
+				for (i = 0; i < num_mcv; i++)
+					mcv_counts[i] = track[i].count;
+
+				num_mcv = analyze_mcv_list(mcv_counts, num_mcv,
+										   stats->stadistinct,
+										   stats->stanullfrac,
+										   samplerows, totalrows);
+			}
+		}
+
+		/* Generate MCV slot entry */
+		if (num_mcv > 0)
+		{
+			MemoryContext old_context;
+			Datum	   *mcv_values;
+			float4	   *mcv_freqs;
+
+			/* Must copy the target values into anl_context */
+			old_context = MemoryContextSwitchTo(stats->anl_context);
+			mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+			mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+			for (i = 0; i < num_mcv; i++)
+			{
+				mcv_values[i] = datumCopy(track[i].value,
+										  stats->attrtype->typbyval,
+										  stats->attrtype->typlen);
+				mcv_freqs[i] = (double) track[i].count / (double) samplerows;
+			}
+			MemoryContextSwitchTo(old_context);
+
+			stats->stakind[0] = STATISTIC_KIND_MCV;
+			stats->staop[0] = mystats->eqopr;
+			stats->stacoll[0] = stats->attrcollid;
+			stats->stanumbers[0] = mcv_freqs;
+			stats->numnumbers[0] = num_mcv;
+			stats->stavalues[0] = mcv_values;
+			stats->numvalues[0] = num_mcv;
+
+			/*
+			 * Accept the defaults for stats->statypid and others. They have
+			 * been set before we were called (see vacuum.h)
+			 */
+		}
+	}
+	else if (null_cnt > 0)
+	{
+		/* We found only nulls; assume the column is entirely null */
+		stats->stats_valid = true;
+		stats->stanullfrac = 1.0;
+		if (is_varwidth)
+			stats->stawidth = 0;	/* "unknown" */
+		else
+			stats->stawidth = stats->attrtype->typlen;
+		stats->stadistinct = 0.0;	/* "unknown" */
+	}
+
+	/* We don't need to bother cleaning up any of our temporary palloc's */
+}
+
+
+/*
+ *	compute_scalar_stats() -- compute column statistics
+ *
+ *	We use this when we can find "=" and "<" operators for the datatype.
+ *
+ *	We determine the fraction of non-null rows, the average width, the
+ *	most common values, the (estimated) number of distinct values, the
+ *	distribution histogram, and the correlation of physical to logical order.
+ *
+ *	The desired stats can be determined fairly easily after sorting the
+ *	data values into order.
+ */
+static void
+compute_scalar_stats(VacAttrStatsP stats,
+					 AnalyzeAttrFetchFunc fetchfunc,
+					 int samplerows,
+					 double totalrows)
+{
+	int			i;
+	int			null_cnt = 0;
+	int			nonnull_cnt = 0;
+	int			toowide_cnt = 0;
+	double		total_width = 0;
+	bool		is_varlena = (!stats->attrtype->typbyval &&
+							  stats->attrtype->typlen == -1);
+	bool		is_varwidth = (!stats->attrtype->typbyval &&
+							   stats->attrtype->typlen < 0);
+	double		corr_xysum;
+	SortSupportData ssup;
+	ScalarItem *values;
+	int			values_cnt = 0;
+	int		   *tupnoLink;
+	ScalarMCVItem *track;
+	int			track_cnt = 0;
+	int			num_mcv = stats->attr->attstattarget;
+	int			num_bins = stats->attr->attstattarget;
+	StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
+
+	values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
+	tupnoLink = (int *) palloc(samplerows * sizeof(int));
+	track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
+
+	memset(&ssup, 0, sizeof(ssup));
+	ssup.ssup_cxt = CurrentMemoryContext;
+	ssup.ssup_collation = stats->attrcollid;
+	ssup.ssup_nulls_first = false;
+
+	/*
+	 * For now, don't perform abbreviated key conversion, because full values
+	 * are required for MCV slot generation.  Supporting that optimization
+	 * would necessitate teaching compare_scalars() to call a tie-breaker.
+	 */
+	ssup.abbreviate = false;
+
+	PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
+
+	/* Initial scan to find sortable values */
+	for (i = 0; i < samplerows; i++)
+	{
+		Datum		value;
+		bool		isnull;
+
+		vacuum_delay_point();
+
+		value = fetchfunc(stats, i, &isnull);
+
+		/* Check for null/nonnull */
+		if (isnull)
+		{
+			null_cnt++;
+			continue;
+		}
+		nonnull_cnt++;
+
+		/*
+		 * If it's a variable-width field, add up widths for average width
+		 * calculation.  Note that if the value is toasted, we use the toasted
+		 * width.  We don't bother with this calculation if it's a fixed-width
+		 * type.
+		 */
+		if (is_varlena)
+		{
+			total_width += VARSIZE_ANY(DatumGetPointer(value));
+
+			/*
+			 * If the value is toasted, we want to detoast it just once to
+			 * avoid repeated detoastings and resultant excess memory usage
+			 * during the comparisons.  Also, check to see if the value is
+			 * excessively wide, and if so don't detoast at all --- just
+			 * ignore the value.
+			 */
+			if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
+			{
+				toowide_cnt++;
+				continue;
+			}
+			value = PointerGetDatum(PG_DETOAST_DATUM(value));
+		}
+		else if (is_varwidth)
+		{
+			/* must be cstring */
+			total_width += strlen(DatumGetCString(value)) + 1;
+		}
+
+		/* Add it to the list to be sorted */
+		values[values_cnt].value = value;
+		values[values_cnt].tupno = values_cnt;
+		tupnoLink[values_cnt] = values_cnt;
+		values_cnt++;
+	}
+
+	/* We can only compute real stats if we found some sortable values. */
+	if (values_cnt > 0)
+	{
+		int			ndistinct,	/* # distinct values in sample */
+					nmultiple,	/* # that appear multiple times */
+					num_hist,
+					dups_cnt;
+		int			slot_idx = 0;
+		CompareScalarsContext cxt;
+
+		/* Sort the collected values */
+		cxt.ssup = &ssup;
+		cxt.tupnoLink = tupnoLink;
+		qsort_interruptible((void *) values, values_cnt, sizeof(ScalarItem),
+							compare_scalars, (void *) &cxt);
+
+		/*
+		 * Now scan the values in order, find the most common ones, and also
+		 * accumulate ordering-correlation statistics.
+		 *
+		 * To determine which are most common, we first have to count the
+		 * number of duplicates of each value.  The duplicates are adjacent in
+		 * the sorted list, so a brute-force approach is to compare successive
+		 * datum values until we find two that are not equal. However, that
+		 * requires N-1 invocations of the datum comparison routine, which are
+		 * completely redundant with work that was done during the sort.  (The
+		 * sort algorithm must at some point have compared each pair of items
+		 * that are adjacent in the sorted order; otherwise it could not know
+		 * that it's ordered the pair correctly.) We exploit this by having
+		 * compare_scalars remember the highest tupno index that each
+		 * ScalarItem has been found equal to.  At the end of the sort, a
+		 * ScalarItem's tupnoLink will still point to itself if and only if it
+		 * is the last item of its group of duplicates (since the group will
+		 * be ordered by tupno).
+		 */
+		corr_xysum = 0;
+		ndistinct = 0;
+		nmultiple = 0;
+		dups_cnt = 0;
+		for (i = 0; i < values_cnt; i++)
+		{
+			int			tupno = values[i].tupno;
+
+			corr_xysum += ((double) i) * ((double) tupno);
+			dups_cnt++;
+			if (tupnoLink[tupno] == tupno)
+			{
+				/* Reached end of duplicates of this value */
+				ndistinct++;
+				if (dups_cnt > 1)
+				{
+					nmultiple++;
+					if (track_cnt < num_mcv ||
+						dups_cnt > track[track_cnt - 1].count)
+					{
+						/*
+						 * Found a new item for the mcv list; find its
+						 * position, bubbling down old items if needed. Loop
+						 * invariant is that j points at an empty/ replaceable
+						 * slot.
+						 */
+						int			j;
+
+						if (track_cnt < num_mcv)
+							track_cnt++;
+						for (j = track_cnt - 1; j > 0; j--)
+						{
+							if (dups_cnt <= track[j - 1].count)
+								break;
+							track[j].count = track[j - 1].count;
+							track[j].first = track[j - 1].first;
+						}
+						track[j].count = dups_cnt;
+						track[j].first = i + 1 - dups_cnt;
+					}
+				}
+				dups_cnt = 0;
+			}
+		}
+
+		stats->stats_valid = true;
+		/* Do the simple null-frac and width stats */
+		stats->stanullfrac = (double) null_cnt / (double) samplerows;
+		if (is_varwidth)
+			stats->stawidth = total_width / (double) nonnull_cnt;
+		else
+			stats->stawidth = stats->attrtype->typlen;
+
+		if (nmultiple == 0)
+		{
+			/*
+			 * If we found no repeated non-null values, assume it's a unique
+			 * column; but be sure to discount for any nulls we found.
+			 */
+			stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
+		}
+		else if (toowide_cnt == 0 && nmultiple == ndistinct)
+		{
+			/*
+			 * Every value in the sample appeared more than once.  Assume the
+			 * column has just these values.  (This case is meant to address
+			 * columns with small, fixed sets of possible values, such as
+			 * boolean or enum columns.  If there are any values that appear
+			 * just once in the sample, including too-wide values, we should
+			 * assume that that's not what we're dealing with.)
+			 */
+			stats->stadistinct = ndistinct;
+		}
+		else
+		{
+			/*----------
+			 * Estimate the number of distinct values using the estimator
+			 * proposed by Haas and Stokes in IBM Research Report RJ 10025:
+			 *		n*d / (n - f1 + f1*n/N)
+			 * where f1 is the number of distinct values that occurred
+			 * exactly once in our sample of n rows (from a total of N),
+			 * and d is the total number of distinct values in the sample.
+			 * This is their Duj1 estimator; the other estimators they
+			 * recommend are considerably more complex, and are numerically
+			 * very unstable when n is much smaller than N.
+			 *
+			 * In this calculation, we consider only non-nulls.  We used to
+			 * include rows with null values in the n and N counts, but that
+			 * leads to inaccurate answers in columns with many nulls, and
+			 * it's intuitively bogus anyway considering the desired result is
+			 * the number of distinct non-null values.
+			 *
+			 * Overwidth values are assumed to have been distinct.
+			 *----------
+			 */
+			int			f1 = ndistinct - nmultiple + toowide_cnt;
+			int			d = f1 + nmultiple;
+			double		n = samplerows - null_cnt;
+			double		N = totalrows * (1.0 - stats->stanullfrac);
+			double		stadistinct;
+
+			/* N == 0 shouldn't happen, but just in case ... */
+			if (N > 0)
+				stadistinct = (n * d) / ((n - f1) + f1 * n / N);
+			else
+				stadistinct = 0;
+
+			/* Clamp to sane range in case of roundoff error */
+			if (stadistinct < d)
+				stadistinct = d;
+			if (stadistinct > N)
+				stadistinct = N;
+			/* And round to integer */
+			stats->stadistinct = floor(stadistinct + 0.5);
+		}
+
+		/*
+		 * If we estimated the number of distinct values at more than 10% of
+		 * the total row count (a very arbitrary limit), then assume that
+		 * stadistinct should scale with the row count rather than be a fixed
+		 * value.
+		 */
+		if (stats->stadistinct > 0.1 * totalrows)
+			stats->stadistinct = -(stats->stadistinct / totalrows);
+
+		/*
+		 * Decide how many values are worth storing as most-common values. If
+		 * we are able to generate a complete MCV list (all the values in the
+		 * sample will fit, and we think these are all the ones in the table),
+		 * then do so.  Otherwise, store only those values that are
+		 * significantly more common than the values not in the list.
+		 *
+		 * Note: the first of these cases is meant to address columns with
+		 * small, fixed sets of possible values, such as boolean or enum
+		 * columns.  If we can *completely* represent the column population by
+		 * an MCV list that will fit into the stats target, then we should do
+		 * so and thus provide the planner with complete information.  But if
+		 * the MCV list is not complete, it's generally worth being more
+		 * selective, and not just filling it all the way up to the stats
+		 * target.
+		 */
+		if (track_cnt == ndistinct && toowide_cnt == 0 &&
+			stats->stadistinct > 0 &&
+			track_cnt <= num_mcv)
+		{
+			/* Track list includes all values seen, and all will fit */
+			num_mcv = track_cnt;
+		}
+		else
+		{
+			int		   *mcv_counts;
+
+			/* Incomplete list; decide how many values are worth keeping */
+			if (num_mcv > track_cnt)
+				num_mcv = track_cnt;
+
+			if (num_mcv > 0)
+			{
+				mcv_counts = (int *) palloc(num_mcv * sizeof(int));
+				for (i = 0; i < num_mcv; i++)
+					mcv_counts[i] = track[i].count;
+
+				num_mcv = analyze_mcv_list(mcv_counts, num_mcv,
+										   stats->stadistinct,
+										   stats->stanullfrac,
+										   samplerows, totalrows);
+			}
+		}
+
+		/* Generate MCV slot entry */
+		if (num_mcv > 0)
+		{
+			MemoryContext old_context;
+			Datum	   *mcv_values;
+			float4	   *mcv_freqs;
+
+			/* Must copy the target values into anl_context */
+			old_context = MemoryContextSwitchTo(stats->anl_context);
+			mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+			mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+			for (i = 0; i < num_mcv; i++)
+			{
+				mcv_values[i] = datumCopy(values[track[i].first].value,
+										  stats->attrtype->typbyval,
+										  stats->attrtype->typlen);
+				mcv_freqs[i] = (double) track[i].count / (double) samplerows;
+			}
+			MemoryContextSwitchTo(old_context);
+
+			stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
+			stats->staop[slot_idx] = mystats->eqopr;
+			stats->stacoll[slot_idx] = stats->attrcollid;
+			stats->stanumbers[slot_idx] = mcv_freqs;
+			stats->numnumbers[slot_idx] = num_mcv;
+			stats->stavalues[slot_idx] = mcv_values;
+			stats->numvalues[slot_idx] = num_mcv;
+
+			/*
+			 * Accept the defaults for stats->statypid and others. They have
+			 * been set before we were called (see vacuum.h)
+			 */
+			slot_idx++;
+		}
+
+		/*
+		 * Generate a histogram slot entry if there are at least two distinct
+		 * values not accounted for in the MCV list.  (This ensures the
+		 * histogram won't collapse to empty or a singleton.)
+		 */
+		num_hist = ndistinct - num_mcv;
+		if (num_hist > num_bins)
+			num_hist = num_bins + 1;
+		if (num_hist >= 2)
+		{
+			MemoryContext old_context;
+			Datum	   *hist_values;
+			int			nvals;
+			int			pos,
+						posfrac,
+						delta,
+						deltafrac;
+
+			/* Sort the MCV items into position order to speed next loop */
+			qsort_interruptible((void *) track, num_mcv, sizeof(ScalarMCVItem),
+								compare_mcvs, NULL);
+
+			/*
+			 * Collapse out the MCV items from the values[] array.
+			 *
+			 * Note we destroy the values[] array here... but we don't need it
+			 * for anything more.  We do, however, still need values_cnt.
+			 * nvals will be the number of remaining entries in values[].
+			 */
+			if (num_mcv > 0)
+			{
+				int			src,
+							dest;
+				int			j;
+
+				src = dest = 0;
+				j = 0;			/* index of next interesting MCV item */
+				while (src < values_cnt)
+				{
+					int			ncopy;
+
+					if (j < num_mcv)
+					{
+						int			first = track[j].first;
+
+						if (src >= first)
+						{
+							/* advance past this MCV item */
+							src = first + track[j].count;
+							j++;
+							continue;
+						}
+						ncopy = first - src;
+					}
+					else
+						ncopy = values_cnt - src;
+					memmove(&values[dest], &values[src],
+							ncopy * sizeof(ScalarItem));
+					src += ncopy;
+					dest += ncopy;
+				}
+				nvals = dest;
+			}
+			else
+				nvals = values_cnt;
+			Assert(nvals >= num_hist);
+
+			/* Must copy the target values into anl_context */
+			old_context = MemoryContextSwitchTo(stats->anl_context);
+			hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
+
+			/*
+			 * The object of this loop is to copy the first and last values[]
+			 * entries along with evenly-spaced values in between.  So the
+			 * i'th value is values[(i * (nvals - 1)) / (num_hist - 1)].  But
+			 * computing that subscript directly risks integer overflow when
+			 * the stats target is more than a couple thousand.  Instead we
+			 * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking
+			 * the integral and fractional parts of the sum separately.
+			 */
+			delta = (nvals - 1) / (num_hist - 1);
+			deltafrac = (nvals - 1) % (num_hist - 1);
+			pos = posfrac = 0;
+
+			for (i = 0; i < num_hist; i++)
+			{
+				hist_values[i] = datumCopy(values[pos].value,
+										   stats->attrtype->typbyval,
+										   stats->attrtype->typlen);
+				pos += delta;
+				posfrac += deltafrac;
+				if (posfrac >= (num_hist - 1))
+				{
+					/* fractional part exceeds 1, carry to integer part */
+					pos++;
+					posfrac -= (num_hist - 1);
+				}
+			}
+
+			MemoryContextSwitchTo(old_context);
+
+			stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
+			stats->staop[slot_idx] = mystats->ltopr;
+			stats->stacoll[slot_idx] = stats->attrcollid;
+			stats->stavalues[slot_idx] = hist_values;
+			stats->numvalues[slot_idx] = num_hist;
+
+			/*
+			 * Accept the defaults for stats->statypid and others. They have
+			 * been set before we were called (see vacuum.h)
+			 */
+			slot_idx++;
+		}
+
+		/* Generate a correlation entry if there are multiple values */
+		if (values_cnt > 1)
+		{
+			MemoryContext old_context;
+			float4	   *corrs;
+			double		corr_xsum,
+						corr_x2sum;
+
+			/* Must copy the target values into anl_context */
+			old_context = MemoryContextSwitchTo(stats->anl_context);
+			corrs = (float4 *) palloc(sizeof(float4));
+			MemoryContextSwitchTo(old_context);
+
+			/*----------
+			 * Since we know the x and y value sets are both
+			 *		0, 1, ..., values_cnt-1
+			 * we have sum(x) = sum(y) =
+			 *		(values_cnt-1)*values_cnt / 2
+			 * and sum(x^2) = sum(y^2) =
+			 *		(values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
+			 *----------
+			 */
+			corr_xsum = ((double) (values_cnt - 1)) *
+				((double) values_cnt) / 2.0;
+			corr_x2sum = ((double) (values_cnt - 1)) *
+				((double) values_cnt) * (double) (2 * values_cnt - 1) / 6.0;
+
+			/* And the correlation coefficient reduces to */
+			corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
+				(values_cnt * corr_x2sum - corr_xsum * corr_xsum);
+
+			stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
+			stats->staop[slot_idx] = mystats->ltopr;
+			stats->stacoll[slot_idx] = stats->attrcollid;
+			stats->stanumbers[slot_idx] = corrs;
+			stats->numnumbers[slot_idx] = 1;
+			slot_idx++;
+		}
+	}
+	else if (nonnull_cnt > 0)
+	{
+		/* We found some non-null values, but they were all too wide */
+		Assert(nonnull_cnt == toowide_cnt);
+		stats->stats_valid = true;
+		/* Do the simple null-frac and width stats */
+		stats->stanullfrac = (double) null_cnt / (double) samplerows;
+		if (is_varwidth)
+			stats->stawidth = total_width / (double) nonnull_cnt;
+		else
+			stats->stawidth = stats->attrtype->typlen;
+		/* Assume all too-wide values are distinct, so it's a unique column */
+		stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
+	}
+	else if (null_cnt > 0)
+	{
+		/* We found only nulls; assume the column is entirely null */
+		stats->stats_valid = true;
+		stats->stanullfrac = 1.0;
+		if (is_varwidth)
+			stats->stawidth = 0;	/* "unknown" */
+		else
+			stats->stawidth = stats->attrtype->typlen;
+		stats->stadistinct = 0.0;	/* "unknown" */
+	}
+
+	/* We don't need to bother cleaning up any of our temporary palloc's */
+}
+
+/*
+ * Comparator for sorting ScalarItems
+ *
+ * Aside from sorting the items, we update the tupnoLink[] array
+ * whenever two ScalarItems are found to contain equal datums.  The array
+ * is indexed by tupno; for each ScalarItem, it contains the highest
+ * tupno that that item's datum has been found to be equal to.  This allows
+ * us to avoid additional comparisons in compute_scalar_stats().
+ */
+static int
+compare_scalars(const void *a, const void *b, void *arg)
+{
+	Datum		da = ((const ScalarItem *) a)->value;
+	int			ta = ((const ScalarItem *) a)->tupno;
+	Datum		db = ((const ScalarItem *) b)->value;
+	int			tb = ((const ScalarItem *) b)->tupno;
+	CompareScalarsContext *cxt = (CompareScalarsContext *) arg;
+	int			compare;
+
+	compare = ApplySortComparator(da, false, db, false, cxt->ssup);
+	if (compare != 0)
+		return compare;
+
+	/*
+	 * The two datums are equal, so update cxt->tupnoLink[].
+	 */
+	if (cxt->tupnoLink[ta] < tb)
+		cxt->tupnoLink[ta] = tb;
+	if (cxt->tupnoLink[tb] < ta)
+		cxt->tupnoLink[tb] = ta;
+
+	/*
+	 * For equal datums, sort by tupno
+	 */
+	return ta - tb;
+}
+
+/*
+ * Comparator for sorting ScalarMCVItems by position
+ */
+static int
+compare_mcvs(const void *a, const void *b, void *arg)
+{
+	int			da = ((const ScalarMCVItem *) a)->first;
+	int			db = ((const ScalarMCVItem *) b)->first;
+
+	return da - db;
+}
+
+/*
+ * Analyze the list of common values in the sample and decide how many are
+ * worth storing in the table's MCV list.
+ *
+ * mcv_counts is assumed to be a list of the counts of the most common values
+ * seen in the sample, starting with the most common.  The return value is the
+ * number that are significantly more common than the values not in the list,
+ * and which are therefore deemed worth storing in the table's MCV list.
+ */
+static int
+analyze_mcv_list(int *mcv_counts,
+				 int num_mcv,
+				 double stadistinct,
+				 double stanullfrac,
+				 int samplerows,
+				 double totalrows)
+{
+	double		ndistinct_table;
+	double		sumcount;
+	int			i;
+
+	/*
+	 * If the entire table was sampled, keep the whole list.  This also
+	 * protects us against division by zero in the code below.
+	 */
+	if (samplerows == totalrows || totalrows <= 1.0)
+		return num_mcv;
+
+	/* Re-extract the estimated number of distinct nonnull values in table */
+	ndistinct_table = stadistinct;
+	if (ndistinct_table < 0)
+		ndistinct_table = -ndistinct_table * totalrows;
+
+	/*
+	 * Exclude the least common values from the MCV list, if they are not
+	 * significantly more common than the estimated selectivity they would
+	 * have if they weren't in the list.  All non-MCV values are assumed to be
+	 * equally common, after taking into account the frequencies of all the
+	 * values in the MCV list and the number of nulls (c.f. eqsel()).
+	 *
+	 * Here sumcount tracks the total count of all but the last (least common)
+	 * value in the MCV list, allowing us to determine the effect of excluding
+	 * that value from the list.
+	 *
+	 * Note that we deliberately do this by removing values from the full
+	 * list, rather than starting with an empty list and adding values,
+	 * because the latter approach can fail to add any values if all the most
+	 * common values have around the same frequency and make up the majority
+	 * of the table, so that the overall average frequency of all values is
+	 * roughly the same as that of the common values.  This would lead to any
+	 * uncommon values being significantly overestimated.
+	 */
+	sumcount = 0.0;
+	for (i = 0; i < num_mcv - 1; i++)
+		sumcount += mcv_counts[i];
+
+	while (num_mcv > 0)
+	{
+		double		selec,
+					otherdistinct,
+					N,
+					n,
+					K,
+					variance,
+					stddev;
+
+		/*
+		 * Estimated selectivity the least common value would have if it
+		 * wasn't in the MCV list (c.f. eqsel()).
+		 */
+		selec = 1.0 - sumcount / samplerows - stanullfrac;
+		if (selec < 0.0)
+			selec = 0.0;
+		if (selec > 1.0)
+			selec = 1.0;
+		otherdistinct = ndistinct_table - (num_mcv - 1);
+		if (otherdistinct > 1)
+			selec /= otherdistinct;
+
+		/*
+		 * If the value is kept in the MCV list, its population frequency is
+		 * assumed to equal its sample frequency.  We use the lower end of a
+		 * textbook continuity-corrected Wald-type confidence interval to
+		 * determine if that is significantly more common than the non-MCV
+		 * frequency --- specifically we assume the population frequency is
+		 * highly likely to be within around 2 standard errors of the sample
+		 * frequency, which equates to an interval of 2 standard deviations
+		 * either side of the sample count, plus an additional 0.5 for the
+		 * continuity correction.  Since we are sampling without replacement,
+		 * this is a hypergeometric distribution.
+		 *
+		 * XXX: Empirically, this approach seems to work quite well, but it
+		 * may be worth considering more advanced techniques for estimating
+		 * the confidence interval of the hypergeometric distribution.
+		 */
+		N = totalrows;
+		n = samplerows;
+		K = N * mcv_counts[num_mcv - 1] / n;
+		variance = n * K * (N - K) * (N - n) / (N * N * (N - 1));
+		stddev = sqrt(variance);
+
+		if (mcv_counts[num_mcv - 1] > selec * samplerows + 2 * stddev + 0.5)
+		{
+			/*
+			 * The value is significantly more common than the non-MCV
+			 * selectivity would suggest.  Keep it, and all the other more
+			 * common values in the list.
+			 */
+			break;
+		}
+		else
+		{
+			/* Discard this value and consider the next least common value */
+			num_mcv--;
+			if (num_mcv == 0)
+				break;
+			sumcount -= mcv_counts[num_mcv - 1];
+		}
+	}
+	return num_mcv;
+}
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
new file mode 100644
index 0000000..3e1b92d
--- /dev/null
+++ b/src/backend/commands/async.c
@@ -0,0 +1,2446 @@
+/*-------------------------------------------------------------------------
+ *
+ * async.c
+ *	  Asynchronous notification: NOTIFY, LISTEN, UNLISTEN
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/async.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*-------------------------------------------------------------------------
+ * Async Notification Model as of 9.0:
+ *
+ * 1. Multiple backends on same machine. Multiple backends listening on
+ *	  several channels. (Channels are also called "conditions" in other
+ *	  parts of the code.)
+ *
+ * 2. There is one central queue in disk-based storage (directory pg_notify/),
+ *	  with actively-used pages mapped into shared memory by the slru.c module.
+ *	  All notification messages are placed in the queue and later read out
+ *	  by listening backends.
+ *
+ *	  There is no central knowledge of which backend listens on which channel;
+ *	  every backend has its own list of interesting channels.
+ *
+ *	  Although there is only one queue, notifications are treated as being
+ *	  database-local; this is done by including the sender's database OID
+ *	  in each notification message.  Listening backends ignore messages
+ *	  that don't match their database OID.  This is important because it
+ *	  ensures senders and receivers have the same database encoding and won't
+ *	  misinterpret non-ASCII text in the channel name or payload string.
+ *
+ *	  Since notifications are not expected to survive database crashes,
+ *	  we can simply clean out the pg_notify data at any reboot, and there
+ *	  is no need for WAL support or fsync'ing.
+ *
+ * 3. Every backend that is listening on at least one channel registers by
+ *	  entering its PID into the array in AsyncQueueControl. It then scans all
+ *	  incoming notifications in the central queue and first compares the
+ *	  database OID of the notification with its own database OID and then
+ *	  compares the notified channel with the list of channels that it listens
+ *	  to. In case there is a match it delivers the notification event to its
+ *	  frontend.  Non-matching events are simply skipped.
+ *
+ * 4. The NOTIFY statement (routine Async_Notify) stores the notification in
+ *	  a backend-local list which will not be processed until transaction end.
+ *
+ *	  Duplicate notifications from the same transaction are sent out as one
+ *	  notification only. This is done to save work when for example a trigger
+ *	  on a 2 million row table fires a notification for each row that has been
+ *	  changed. If the application needs to receive every single notification
+ *	  that has been sent, it can easily add some unique string into the extra
+ *	  payload parameter.
+ *
+ *	  When the transaction is ready to commit, PreCommit_Notify() adds the
+ *	  pending notifications to the head of the queue. The head pointer of the
+ *	  queue always points to the next free position and a position is just a
+ *	  page number and the offset in that page. This is done before marking the
+ *	  transaction as committed in clog. If we run into problems writing the
+ *	  notifications, we can still call elog(ERROR, ...) and the transaction
+ *	  will roll back.
+ *
+ *	  Once we have put all of the notifications into the queue, we return to
+ *	  CommitTransaction() which will then do the actual transaction commit.
+ *
+ *	  After commit we are called another time (AtCommit_Notify()). Here we
+ *	  make any actual updates to the effective listen state (listenChannels).
+ *	  Then we signal any backends that may be interested in our messages
+ *	  (including our own backend, if listening).  This is done by
+ *	  SignalBackends(), which scans the list of listening backends and sends a
+ *	  PROCSIG_NOTIFY_INTERRUPT signal to every listening backend (we don't
+ *	  know which backend is listening on which channel so we must signal them
+ *	  all).  We can exclude backends that are already up to date, though, and
+ *	  we can also exclude backends that are in other databases (unless they
+ *	  are way behind and should be kicked to make them advance their
+ *	  pointers).
+ *
+ *	  Finally, after we are out of the transaction altogether and about to go
+ *	  idle, we scan the queue for messages that need to be sent to our
+ *	  frontend (which might be notifies from other backends, or self-notifies
+ *	  from our own).  This step is not part of the CommitTransaction sequence
+ *	  for two important reasons.  First, we could get errors while sending
+ *	  data to our frontend, and it's really bad for errors to happen in
+ *	  post-commit cleanup.  Second, in cases where a procedure issues commits
+ *	  within a single frontend command, we don't want to send notifies to our
+ *	  frontend until the command is done; but notifies to other backends
+ *	  should go out immediately after each commit.
+ *
+ * 5. Upon receipt of a PROCSIG_NOTIFY_INTERRUPT signal, the signal handler
+ *	  sets the process's latch, which triggers the event to be processed
+ *	  immediately if this backend is idle (i.e., it is waiting for a frontend
+ *	  command and is not within a transaction block. C.f.
+ *	  ProcessClientReadInterrupt()).  Otherwise the handler may only set a
+ *	  flag, which will cause the processing to occur just before we next go
+ *	  idle.
+ *
+ *	  Inbound-notify processing consists of reading all of the notifications
+ *	  that have arrived since scanning last time. We read every notification
+ *	  until we reach either a notification from an uncommitted transaction or
+ *	  the head pointer's position.
+ *
+ * 6. To avoid SLRU wraparound and limit disk space consumption, the tail
+ *	  pointer needs to be advanced so that old pages can be truncated.
+ *	  This is relatively expensive (notably, it requires an exclusive lock),
+ *	  so we don't want to do it often.  We make sending backends do this work
+ *	  if they advanced the queue head into a new page, but only once every
+ *	  QUEUE_CLEANUP_DELAY pages.
+ *
+ * An application that listens on the same channel it notifies will get
+ * NOTIFY messages for its own NOTIFYs.  These can be ignored, if not useful,
+ * by comparing be_pid in the NOTIFY message to the application's own backend's
+ * PID.  (As of FE/BE protocol 2.0, the backend's PID is provided to the
+ * frontend during startup.)  The above design guarantees that notifies from
+ * other backends will never be missed by ignoring self-notifies.
+ *
+ * The amount of shared memory used for notify management (NUM_NOTIFY_BUFFERS)
+ * can be varied without affecting anything but performance.  The maximum
+ * amount of notification data that can be queued at one time is determined
+ * by slru.c's wraparound limit; see QUEUE_MAX_PAGE below.
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <limits.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include "access/parallel.h"
+#include "access/slru.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "catalog/pg_database.h"
+#include "commands/async.h"
+#include "common/hashfn.h"
+#include "funcapi.h"
+#include "libpq/libpq.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "storage/ipc.h"
+#include "storage/lmgr.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "storage/procsignal.h"
+#include "storage/sinval.h"
+#include "tcop/tcopprot.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/ps_status.h"
+#include "utils/snapmgr.h"
+#include "utils/timestamp.h"
+
+
+/*
+ * Maximum size of a NOTIFY payload, including terminating NULL.  This
+ * must be kept small enough so that a notification message fits on one
+ * SLRU page.  The magic fudge factor here is noncritical as long as it's
+ * more than AsyncQueueEntryEmptySize --- we make it significantly bigger
+ * than that, so changes in that data structure won't affect user-visible
+ * restrictions.
+ */
+#define NOTIFY_PAYLOAD_MAX_LENGTH	(BLCKSZ - NAMEDATALEN - 128)
+
+/*
+ * Struct representing an entry in the global notify queue
+ *
+ * This struct declaration has the maximal length, but in a real queue entry
+ * the data area is only big enough for the actual channel and payload strings
+ * (each null-terminated).  AsyncQueueEntryEmptySize is the minimum possible
+ * entry size, if both channel and payload strings are empty (but note it
+ * doesn't include alignment padding).
+ *
+ * The "length" field should always be rounded up to the next QUEUEALIGN
+ * multiple so that all fields are properly aligned.
+ */
+typedef struct AsyncQueueEntry
+{
+	int			length;			/* total allocated length of entry */
+	Oid			dboid;			/* sender's database OID */
+	TransactionId xid;			/* sender's XID */
+	int32		srcPid;			/* sender's PID */
+	char		data[NAMEDATALEN + NOTIFY_PAYLOAD_MAX_LENGTH];
+} AsyncQueueEntry;
+
+/* Currently, no field of AsyncQueueEntry requires more than int alignment */
+#define QUEUEALIGN(len)		INTALIGN(len)
+
+#define AsyncQueueEntryEmptySize	(offsetof(AsyncQueueEntry, data) + 2)
+
+/*
+ * Struct describing a queue position, and assorted macros for working with it
+ */
+typedef struct QueuePosition
+{
+	int			page;			/* SLRU page number */
+	int			offset;			/* byte offset within page */
+} QueuePosition;
+
+#define QUEUE_POS_PAGE(x)		((x).page)
+#define QUEUE_POS_OFFSET(x)		((x).offset)
+
+#define SET_QUEUE_POS(x,y,z) \
+	do { \
+		(x).page = (y); \
+		(x).offset = (z); \
+	} while (0)
+
+#define QUEUE_POS_EQUAL(x,y) \
+	((x).page == (y).page && (x).offset == (y).offset)
+
+#define QUEUE_POS_IS_ZERO(x) \
+	((x).page == 0 && (x).offset == 0)
+
+/* choose logically smaller QueuePosition */
+#define QUEUE_POS_MIN(x,y) \
+	(asyncQueuePagePrecedes((x).page, (y).page) ? (x) : \
+	 (x).page != (y).page ? (y) : \
+	 (x).offset < (y).offset ? (x) : (y))
+
+/* choose logically larger QueuePosition */
+#define QUEUE_POS_MAX(x,y) \
+	(asyncQueuePagePrecedes((x).page, (y).page) ? (y) : \
+	 (x).page != (y).page ? (x) : \
+	 (x).offset > (y).offset ? (x) : (y))
+
+/*
+ * Parameter determining how often we try to advance the tail pointer:
+ * we do that after every QUEUE_CLEANUP_DELAY pages of NOTIFY data.  This is
+ * also the distance by which a backend in another database needs to be
+ * behind before we'll decide we need to wake it up to advance its pointer.
+ *
+ * Resist the temptation to make this really large.  While that would save
+ * work in some places, it would add cost in others.  In particular, this
+ * should likely be less than NUM_NOTIFY_BUFFERS, to ensure that backends
+ * catch up before the pages they'll need to read fall out of SLRU cache.
+ */
+#define QUEUE_CLEANUP_DELAY 4
+
+/*
+ * Struct describing a listening backend's status
+ */
+typedef struct QueueBackendStatus
+{
+	int32		pid;			/* either a PID or InvalidPid */
+	Oid			dboid;			/* backend's database OID, or InvalidOid */
+	BackendId	nextListener;	/* id of next listener, or InvalidBackendId */
+	QueuePosition pos;			/* backend has read queue up to here */
+} QueueBackendStatus;
+
+/*
+ * Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff)
+ *
+ * The AsyncQueueControl structure is protected by the NotifyQueueLock and
+ * NotifyQueueTailLock.
+ *
+ * When holding NotifyQueueLock in SHARED mode, backends may only inspect
+ * their own entries as well as the head and tail pointers. Consequently we
+ * can allow a backend to update its own record while holding only SHARED lock
+ * (since no other backend will inspect it).
+ *
+ * When holding NotifyQueueLock in EXCLUSIVE mode, backends can inspect the
+ * entries of other backends and also change the head pointer. When holding
+ * both NotifyQueueLock and NotifyQueueTailLock in EXCLUSIVE mode, backends
+ * can change the tail pointers.
+ *
+ * NotifySLRULock is used as the control lock for the pg_notify SLRU buffers.
+ * In order to avoid deadlocks, whenever we need multiple locks, we first get
+ * NotifyQueueTailLock, then NotifyQueueLock, and lastly NotifySLRULock.
+ *
+ * Each backend uses the backend[] array entry with index equal to its
+ * BackendId (which can range from 1 to MaxBackends).  We rely on this to make
+ * SendProcSignal fast.
+ *
+ * The backend[] array entries for actively-listening backends are threaded
+ * together using firstListener and the nextListener links, so that we can
+ * scan them without having to iterate over inactive entries.  We keep this
+ * list in order by BackendId so that the scan is cache-friendly when there
+ * are many active entries.
+ */
+typedef struct AsyncQueueControl
+{
+	QueuePosition head;			/* head points to the next free location */
+	QueuePosition tail;			/* tail must be <= the queue position of every
+								 * listening backend */
+	int			stopPage;		/* oldest unrecycled page; must be <=
+								 * tail.page */
+	BackendId	firstListener;	/* id of first listener, or InvalidBackendId */
+	TimestampTz lastQueueFillWarn;	/* time of last queue-full msg */
+	QueueBackendStatus backend[FLEXIBLE_ARRAY_MEMBER];
+	/* backend[0] is not used; used entries are from [1] to [MaxBackends] */
+} AsyncQueueControl;
+
+static AsyncQueueControl *asyncQueueControl;
+
+#define QUEUE_HEAD					(asyncQueueControl->head)
+#define QUEUE_TAIL					(asyncQueueControl->tail)
+#define QUEUE_STOP_PAGE				(asyncQueueControl->stopPage)
+#define QUEUE_FIRST_LISTENER		(asyncQueueControl->firstListener)
+#define QUEUE_BACKEND_PID(i)		(asyncQueueControl->backend[i].pid)
+#define QUEUE_BACKEND_DBOID(i)		(asyncQueueControl->backend[i].dboid)
+#define QUEUE_NEXT_LISTENER(i)		(asyncQueueControl->backend[i].nextListener)
+#define QUEUE_BACKEND_POS(i)		(asyncQueueControl->backend[i].pos)
+
+/*
+ * The SLRU buffer area through which we access the notification queue
+ */
+static SlruCtlData NotifyCtlData;
+
+#define NotifyCtl					(&NotifyCtlData)
+#define QUEUE_PAGESIZE				BLCKSZ
+#define QUEUE_FULL_WARN_INTERVAL	5000	/* warn at most once every 5s */
+
+/*
+ * Use segments 0000 through FFFF.  Each contains SLRU_PAGES_PER_SEGMENT pages
+ * which gives us the pages from 0 to SLRU_PAGES_PER_SEGMENT * 0x10000 - 1.
+ * We could use as many segments as SlruScanDirectory() allows, but this gives
+ * us so much space already that it doesn't seem worth the trouble.
+ *
+ * The most data we can have in the queue at a time is QUEUE_MAX_PAGE/2
+ * pages, because more than that would confuse slru.c into thinking there
+ * was a wraparound condition.  With the default BLCKSZ this means there
+ * can be up to 8GB of queued-and-not-read data.
+ *
+ * Note: it's possible to redefine QUEUE_MAX_PAGE with a smaller multiple of
+ * SLRU_PAGES_PER_SEGMENT, for easier testing of queue-full behaviour.
+ */
+#define QUEUE_MAX_PAGE			(SLRU_PAGES_PER_SEGMENT * 0x10000 - 1)
+
+/*
+ * listenChannels identifies the channels we are actually listening to
+ * (ie, have committed a LISTEN on).  It is a simple list of channel names,
+ * allocated in TopMemoryContext.
+ */
+static List *listenChannels = NIL;	/* list of C strings */
+
+/*
+ * State for pending LISTEN/UNLISTEN actions consists of an ordered list of
+ * all actions requested in the current transaction.  As explained above,
+ * we don't actually change listenChannels until we reach transaction commit.
+ *
+ * The list is kept in CurTransactionContext.  In subtransactions, each
+ * subtransaction has its own list in its own CurTransactionContext, but
+ * successful subtransactions attach their lists to their parent's list.
+ * Failed subtransactions simply discard their lists.
+ */
+typedef enum
+{
+	LISTEN_LISTEN,
+	LISTEN_UNLISTEN,
+	LISTEN_UNLISTEN_ALL
+} ListenActionKind;
+
+typedef struct
+{
+	ListenActionKind action;
+	char		channel[FLEXIBLE_ARRAY_MEMBER]; /* nul-terminated string */
+} ListenAction;
+
+typedef struct ActionList
+{
+	int			nestingLevel;	/* current transaction nesting depth */
+	List	   *actions;		/* list of ListenAction structs */
+	struct ActionList *upper;	/* details for upper transaction levels */
+} ActionList;
+
+static ActionList *pendingActions = NULL;
+
+/*
+ * State for outbound notifies consists of a list of all channels+payloads
+ * NOTIFYed in the current transaction.  We do not actually perform a NOTIFY
+ * until and unless the transaction commits.  pendingNotifies is NULL if no
+ * NOTIFYs have been done in the current (sub) transaction.
+ *
+ * We discard duplicate notify events issued in the same transaction.
+ * Hence, in addition to the list proper (which we need to track the order
+ * of the events, since we guarantee to deliver them in order), we build a
+ * hash table which we can probe to detect duplicates.  Since building the
+ * hash table is somewhat expensive, we do so only once we have at least
+ * MIN_HASHABLE_NOTIFIES events queued in the current (sub) transaction;
+ * before that we just scan the events linearly.
+ *
+ * The list is kept in CurTransactionContext.  In subtransactions, each
+ * subtransaction has its own list in its own CurTransactionContext, but
+ * successful subtransactions add their entries to their parent's list.
+ * Failed subtransactions simply discard their lists.  Since these lists
+ * are independent, there may be notify events in a subtransaction's list
+ * that duplicate events in some ancestor (sub) transaction; we get rid of
+ * the dups when merging the subtransaction's list into its parent's.
+ *
+ * Note: the action and notify lists do not interact within a transaction.
+ * In particular, if a transaction does NOTIFY and then LISTEN on the same
+ * condition name, it will get a self-notify at commit.  This is a bit odd
+ * but is consistent with our historical behavior.
+ */
+typedef struct Notification
+{
+	uint16		channel_len;	/* length of channel-name string */
+	uint16		payload_len;	/* length of payload string */
+	/* null-terminated channel name, then null-terminated payload follow */
+	char		data[FLEXIBLE_ARRAY_MEMBER];
+} Notification;
+
+typedef struct NotificationList
+{
+	int			nestingLevel;	/* current transaction nesting depth */
+	List	   *events;			/* list of Notification structs */
+	HTAB	   *hashtab;		/* hash of NotificationHash structs, or NULL */
+	struct NotificationList *upper; /* details for upper transaction levels */
+} NotificationList;
+
+#define MIN_HASHABLE_NOTIFIES 16	/* threshold to build hashtab */
+
+typedef struct NotificationHash
+{
+	Notification *event;		/* => the actual Notification struct */
+} NotificationHash;
+
+static NotificationList *pendingNotifies = NULL;
+
+/*
+ * Inbound notifications are initially processed by HandleNotifyInterrupt(),
+ * called from inside a signal handler. That just sets the
+ * notifyInterruptPending flag and sets the process
+ * latch. ProcessNotifyInterrupt() will then be called whenever it's safe to
+ * actually deal with the interrupt.
+ */
+volatile sig_atomic_t notifyInterruptPending = false;
+
+/* True if we've registered an on_shmem_exit cleanup */
+static bool unlistenExitRegistered = false;
+
+/* True if we're currently registered as a listener in asyncQueueControl */
+static bool amRegisteredListener = false;
+
+/* have we advanced to a page that's a multiple of QUEUE_CLEANUP_DELAY? */
+static bool tryAdvanceTail = false;
+
+/* GUC parameter */
+bool		Trace_notify = false;
+
+/* local function prototypes */
+static int	asyncQueuePageDiff(int p, int q);
+static bool asyncQueuePagePrecedes(int p, int q);
+static void queue_listen(ListenActionKind action, const char *channel);
+static void Async_UnlistenOnExit(int code, Datum arg);
+static void Exec_ListenPreCommit(void);
+static void Exec_ListenCommit(const char *channel);
+static void Exec_UnlistenCommit(const char *channel);
+static void Exec_UnlistenAllCommit(void);
+static bool IsListeningOn(const char *channel);
+static void asyncQueueUnregister(void);
+static bool asyncQueueIsFull(void);
+static bool asyncQueueAdvance(volatile QueuePosition *position, int entryLength);
+static void asyncQueueNotificationToEntry(Notification *n, AsyncQueueEntry *qe);
+static ListCell *asyncQueueAddEntries(ListCell *nextNotify);
+static double asyncQueueUsage(void);
+static void asyncQueueFillWarning(void);
+static void SignalBackends(void);
+static void asyncQueueReadAllNotifications(void);
+static bool asyncQueueProcessPageEntries(volatile QueuePosition *current,
+										 QueuePosition stop,
+										 char *page_buffer,
+										 Snapshot snapshot);
+static void asyncQueueAdvanceTail(void);
+static void ProcessIncomingNotify(bool flush);
+static bool AsyncExistsPendingNotify(Notification *n);
+static void AddEventToPendingNotifies(Notification *n);
+static uint32 notification_hash(const void *key, Size keysize);
+static int	notification_match(const void *key1, const void *key2, Size keysize);
+static void ClearPendingActionsAndNotifies(void);
+
+/*
+ * Compute the difference between two queue page numbers (i.e., p - q),
+ * accounting for wraparound.
+ */
+static int
+asyncQueuePageDiff(int p, int q)
+{
+	int			diff;
+
+	/*
+	 * We have to compare modulo (QUEUE_MAX_PAGE+1)/2.  Both inputs should be
+	 * in the range 0..QUEUE_MAX_PAGE.
+	 */
+	Assert(p >= 0 && p <= QUEUE_MAX_PAGE);
+	Assert(q >= 0 && q <= QUEUE_MAX_PAGE);
+
+	diff = p - q;
+	if (diff >= ((QUEUE_MAX_PAGE + 1) / 2))
+		diff -= QUEUE_MAX_PAGE + 1;
+	else if (diff < -((QUEUE_MAX_PAGE + 1) / 2))
+		diff += QUEUE_MAX_PAGE + 1;
+	return diff;
+}
+
+/*
+ * Is p < q, accounting for wraparound?
+ *
+ * Since asyncQueueIsFull() blocks creation of a page that could precede any
+ * extant page, we need not assess entries within a page.
+ */
+static bool
+asyncQueuePagePrecedes(int p, int q)
+{
+	return asyncQueuePageDiff(p, q) < 0;
+}
+
+/*
+ * Report space needed for our shared memory area
+ */
+Size
+AsyncShmemSize(void)
+{
+	Size		size;
+
+	/* This had better match AsyncShmemInit */
+	size = mul_size(MaxBackends + 1, sizeof(QueueBackendStatus));
+	size = add_size(size, offsetof(AsyncQueueControl, backend));
+
+	size = add_size(size, SimpleLruShmemSize(NUM_NOTIFY_BUFFERS, 0));
+
+	return size;
+}
+
+/*
+ * Initialize our shared memory area
+ */
+void
+AsyncShmemInit(void)
+{
+	bool		found;
+	Size		size;
+
+	/*
+	 * Create or attach to the AsyncQueueControl structure.
+	 *
+	 * The used entries in the backend[] array run from 1 to MaxBackends; the
+	 * zero'th entry is unused but must be allocated.
+	 */
+	size = mul_size(MaxBackends + 1, sizeof(QueueBackendStatus));
+	size = add_size(size, offsetof(AsyncQueueControl, backend));
+
+	asyncQueueControl = (AsyncQueueControl *)
+		ShmemInitStruct("Async Queue Control", size, &found);
+
+	if (!found)
+	{
+		/* First time through, so initialize it */
+		SET_QUEUE_POS(QUEUE_HEAD, 0, 0);
+		SET_QUEUE_POS(QUEUE_TAIL, 0, 0);
+		QUEUE_STOP_PAGE = 0;
+		QUEUE_FIRST_LISTENER = InvalidBackendId;
+		asyncQueueControl->lastQueueFillWarn = 0;
+		/* zero'th entry won't be used, but let's initialize it anyway */
+		for (int i = 0; i <= MaxBackends; i++)
+		{
+			QUEUE_BACKEND_PID(i) = InvalidPid;
+			QUEUE_BACKEND_DBOID(i) = InvalidOid;
+			QUEUE_NEXT_LISTENER(i) = InvalidBackendId;
+			SET_QUEUE_POS(QUEUE_BACKEND_POS(i), 0, 0);
+		}
+	}
+
+	/*
+	 * Set up SLRU management of the pg_notify data.
+	 */
+	NotifyCtl->PagePrecedes = asyncQueuePagePrecedes;
+	SimpleLruInit(NotifyCtl, "Notify", NUM_NOTIFY_BUFFERS, 0,
+				  NotifySLRULock, "pg_notify", LWTRANCHE_NOTIFY_BUFFER,
+				  SYNC_HANDLER_NONE);
+
+	if (!found)
+	{
+		/*
+		 * During start or reboot, clean out the pg_notify directory.
+		 */
+		(void) SlruScanDirectory(NotifyCtl, SlruScanDirCbDeleteAll, NULL);
+	}
+}
+
+
+/*
+ * pg_notify -
+ *	  SQL function to send a notification event
+ */
+Datum
+pg_notify(PG_FUNCTION_ARGS)
+{
+	const char *channel;
+	const char *payload;
+
+	if (PG_ARGISNULL(0))
+		channel = "";
+	else
+		channel = text_to_cstring(PG_GETARG_TEXT_PP(0));
+
+	if (PG_ARGISNULL(1))
+		payload = "";
+	else
+		payload = text_to_cstring(PG_GETARG_TEXT_PP(1));
+
+	/* For NOTIFY as a statement, this is checked in ProcessUtility */
+	PreventCommandDuringRecovery("NOTIFY");
+
+	Async_Notify(channel, payload);
+
+	PG_RETURN_VOID();
+}
+
+
+/*
+ * Async_Notify
+ *
+ *		This is executed by the SQL notify command.
+ *
+ *		Adds the message to the list of pending notifies.
+ *		Actual notification happens during transaction commit.
+ *		^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ */
+void
+Async_Notify(const char *channel, const char *payload)
+{
+	int			my_level = GetCurrentTransactionNestLevel();
+	size_t		channel_len;
+	size_t		payload_len;
+	Notification *n;
+	MemoryContext oldcontext;
+
+	if (IsParallelWorker())
+		elog(ERROR, "cannot send notifications from a parallel worker");
+
+	if (Trace_notify)
+		elog(DEBUG1, "Async_Notify(%s)", channel);
+
+	channel_len = channel ? strlen(channel) : 0;
+	payload_len = payload ? strlen(payload) : 0;
+
+	/* a channel name must be specified */
+	if (channel_len == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("channel name cannot be empty")));
+
+	/* enforce length limits */
+	if (channel_len >= NAMEDATALEN)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("channel name too long")));
+
+	if (payload_len >= NOTIFY_PAYLOAD_MAX_LENGTH)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("payload string too long")));
+
+	/*
+	 * We must construct the Notification entry, even if we end up not using
+	 * it, in order to compare it cheaply to existing list entries.
+	 *
+	 * The notification list needs to live until end of transaction, so store
+	 * it in the transaction context.
+	 */
+	oldcontext = MemoryContextSwitchTo(CurTransactionContext);
+
+	n = (Notification *) palloc(offsetof(Notification, data) +
+								channel_len + payload_len + 2);
+	n->channel_len = channel_len;
+	n->payload_len = payload_len;
+	strcpy(n->data, channel);
+	if (payload)
+		strcpy(n->data + channel_len + 1, payload);
+	else
+		n->data[channel_len + 1] = '\0';
+
+	if (pendingNotifies == NULL || my_level > pendingNotifies->nestingLevel)
+	{
+		NotificationList *notifies;
+
+		/*
+		 * First notify event in current (sub)xact. Note that we allocate the
+		 * NotificationList in TopTransactionContext; the nestingLevel might
+		 * get changed later by AtSubCommit_Notify.
+		 */
+		notifies = (NotificationList *)
+			MemoryContextAlloc(TopTransactionContext,
+							   sizeof(NotificationList));
+		notifies->nestingLevel = my_level;
+		notifies->events = list_make1(n);
+		/* We certainly don't need a hashtable yet */
+		notifies->hashtab = NULL;
+		notifies->upper = pendingNotifies;
+		pendingNotifies = notifies;
+	}
+	else
+	{
+		/* Now check for duplicates */
+		if (AsyncExistsPendingNotify(n))
+		{
+			/* It's a dup, so forget it */
+			pfree(n);
+			MemoryContextSwitchTo(oldcontext);
+			return;
+		}
+
+		/* Append more events to existing list */
+		AddEventToPendingNotifies(n);
+	}
+
+	MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * queue_listen
+ *		Common code for listen, unlisten, unlisten all commands.
+ *
+ *		Adds the request to the list of pending actions.
+ *		Actual update of the listenChannels list happens during transaction
+ *		commit.
+ */
+static void
+queue_listen(ListenActionKind action, const char *channel)
+{
+	MemoryContext oldcontext;
+	ListenAction *actrec;
+	int			my_level = GetCurrentTransactionNestLevel();
+
+	/*
+	 * Unlike Async_Notify, we don't try to collapse out duplicates. It would
+	 * be too complicated to ensure we get the right interactions of
+	 * conflicting LISTEN/UNLISTEN/UNLISTEN_ALL, and it's unlikely that there
+	 * would be any performance benefit anyway in sane applications.
+	 */
+	oldcontext = MemoryContextSwitchTo(CurTransactionContext);
+
+	/* space for terminating null is included in sizeof(ListenAction) */
+	actrec = (ListenAction *) palloc(offsetof(ListenAction, channel) +
+									 strlen(channel) + 1);
+	actrec->action = action;
+	strcpy(actrec->channel, channel);
+
+	if (pendingActions == NULL || my_level > pendingActions->nestingLevel)
+	{
+		ActionList *actions;
+
+		/*
+		 * First action in current sub(xact). Note that we allocate the
+		 * ActionList in TopTransactionContext; the nestingLevel might get
+		 * changed later by AtSubCommit_Notify.
+		 */
+		actions = (ActionList *)
+			MemoryContextAlloc(TopTransactionContext, sizeof(ActionList));
+		actions->nestingLevel = my_level;
+		actions->actions = list_make1(actrec);
+		actions->upper = pendingActions;
+		pendingActions = actions;
+	}
+	else
+		pendingActions->actions = lappend(pendingActions->actions, actrec);
+
+	MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Async_Listen
+ *
+ *		This is executed by the SQL listen command.
+ */
+void
+Async_Listen(const char *channel)
+{
+	if (Trace_notify)
+		elog(DEBUG1, "Async_Listen(%s,%d)", channel, MyProcPid);
+
+	queue_listen(LISTEN_LISTEN, channel);
+}
+
+/*
+ * Async_Unlisten
+ *
+ *		This is executed by the SQL unlisten command.
+ */
+void
+Async_Unlisten(const char *channel)
+{
+	if (Trace_notify)
+		elog(DEBUG1, "Async_Unlisten(%s,%d)", channel, MyProcPid);
+
+	/* If we couldn't possibly be listening, no need to queue anything */
+	if (pendingActions == NULL && !unlistenExitRegistered)
+		return;
+
+	queue_listen(LISTEN_UNLISTEN, channel);
+}
+
+/*
+ * Async_UnlistenAll
+ *
+ *		This is invoked by UNLISTEN * command, and also at backend exit.
+ */
+void
+Async_UnlistenAll(void)
+{
+	if (Trace_notify)
+		elog(DEBUG1, "Async_UnlistenAll(%d)", MyProcPid);
+
+	/* If we couldn't possibly be listening, no need to queue anything */
+	if (pendingActions == NULL && !unlistenExitRegistered)
+		return;
+
+	queue_listen(LISTEN_UNLISTEN_ALL, "");
+}
+
+/*
+ * SQL function: return a set of the channel names this backend is actively
+ * listening to.
+ *
+ * Note: this coding relies on the fact that the listenChannels list cannot
+ * change within a transaction.
+ */
+Datum
+pg_listening_channels(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+
+	/* stuff done only on the first call of the function */
+	if (SRF_IS_FIRSTCALL())
+	{
+		/* create a function context for cross-call persistence */
+		funcctx = SRF_FIRSTCALL_INIT();
+	}
+
+	/* stuff done on every call of the function */
+	funcctx = SRF_PERCALL_SETUP();
+
+	if (funcctx->call_cntr < list_length(listenChannels))
+	{
+		char	   *channel = (char *) list_nth(listenChannels,
+												funcctx->call_cntr);
+
+		SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(channel));
+	}
+
+	SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * Async_UnlistenOnExit
+ *
+ * This is executed at backend exit if we have done any LISTENs in this
+ * backend.  It might not be necessary anymore, if the user UNLISTENed
+ * everything, but we don't try to detect that case.
+ */
+static void
+Async_UnlistenOnExit(int code, Datum arg)
+{
+	Exec_UnlistenAllCommit();
+	asyncQueueUnregister();
+}
+
+/*
+ * AtPrepare_Notify
+ *
+ *		This is called at the prepare phase of a two-phase
+ *		transaction.  Save the state for possible commit later.
+ */
+void
+AtPrepare_Notify(void)
+{
+	/* It's not allowed to have any pending LISTEN/UNLISTEN/NOTIFY actions */
+	if (pendingActions || pendingNotifies)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot PREPARE a transaction that has executed LISTEN, UNLISTEN, or NOTIFY")));
+}
+
+/*
+ * PreCommit_Notify
+ *
+ *		This is called at transaction commit, before actually committing to
+ *		clog.
+ *
+ *		If there are pending LISTEN actions, make sure we are listed in the
+ *		shared-memory listener array.  This must happen before commit to
+ *		ensure we don't miss any notifies from transactions that commit
+ *		just after ours.
+ *
+ *		If there are outbound notify requests in the pendingNotifies list,
+ *		add them to the global queue.  We do that before commit so that
+ *		we can still throw error if we run out of queue space.
+ */
+void
+PreCommit_Notify(void)
+{
+	ListCell   *p;
+
+	if (!pendingActions && !pendingNotifies)
+		return;					/* no relevant statements in this xact */
+
+	if (Trace_notify)
+		elog(DEBUG1, "PreCommit_Notify");
+
+	/* Preflight for any pending listen/unlisten actions */
+	if (pendingActions != NULL)
+	{
+		foreach(p, pendingActions->actions)
+		{
+			ListenAction *actrec = (ListenAction *) lfirst(p);
+
+			switch (actrec->action)
+			{
+				case LISTEN_LISTEN:
+					Exec_ListenPreCommit();
+					break;
+				case LISTEN_UNLISTEN:
+					/* there is no Exec_UnlistenPreCommit() */
+					break;
+				case LISTEN_UNLISTEN_ALL:
+					/* there is no Exec_UnlistenAllPreCommit() */
+					break;
+			}
+		}
+	}
+
+	/* Queue any pending notifies (must happen after the above) */
+	if (pendingNotifies)
+	{
+		ListCell   *nextNotify;
+
+		/*
+		 * Make sure that we have an XID assigned to the current transaction.
+		 * GetCurrentTransactionId is cheap if we already have an XID, but not
+		 * so cheap if we don't, and we'd prefer not to do that work while
+		 * holding NotifyQueueLock.
+		 */
+		(void) GetCurrentTransactionId();
+
+		/*
+		 * Serialize writers by acquiring a special lock that we hold till
+		 * after commit.  This ensures that queue entries appear in commit
+		 * order, and in particular that there are never uncommitted queue
+		 * entries ahead of committed ones, so an uncommitted transaction
+		 * can't block delivery of deliverable notifications.
+		 *
+		 * We use a heavyweight lock so that it'll automatically be released
+		 * after either commit or abort.  This also allows deadlocks to be
+		 * detected, though really a deadlock shouldn't be possible here.
+		 *
+		 * The lock is on "database 0", which is pretty ugly but it doesn't
+		 * seem worth inventing a special locktag category just for this.
+		 * (Historical note: before PG 9.0, a similar lock on "database 0" was
+		 * used by the flatfiles mechanism.)
+		 */
+		LockSharedObject(DatabaseRelationId, InvalidOid, 0,
+						 AccessExclusiveLock);
+
+		/* Now push the notifications into the queue */
+		nextNotify = list_head(pendingNotifies->events);
+		while (nextNotify != NULL)
+		{
+			/*
+			 * Add the pending notifications to the queue.  We acquire and
+			 * release NotifyQueueLock once per page, which might be overkill
+			 * but it does allow readers to get in while we're doing this.
+			 *
+			 * A full queue is very uncommon and should really not happen,
+			 * given that we have so much space available in the SLRU pages.
+			 * Nevertheless we need to deal with this possibility. Note that
+			 * when we get here we are in the process of committing our
+			 * transaction, but we have not yet committed to clog, so at this
+			 * point in time we can still roll the transaction back.
+			 */
+			LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
+			asyncQueueFillWarning();
+			if (asyncQueueIsFull())
+				ereport(ERROR,
+						(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+						 errmsg("too many notifications in the NOTIFY queue")));
+			nextNotify = asyncQueueAddEntries(nextNotify);
+			LWLockRelease(NotifyQueueLock);
+		}
+
+		/* Note that we don't clear pendingNotifies; AtCommit_Notify will. */
+	}
+}
+
+/*
+ * AtCommit_Notify
+ *
+ *		This is called at transaction commit, after committing to clog.
+ *
+ *		Update listenChannels and clear transaction-local state.
+ *
+ *		If we issued any notifications in the transaction, send signals to
+ *		listening backends (possibly including ourselves) to process them.
+ *		Also, if we filled enough queue pages with new notifies, try to
+ *		advance the queue tail pointer.
+ */
+void
+AtCommit_Notify(void)
+{
+	ListCell   *p;
+
+	/*
+	 * Allow transactions that have not executed LISTEN/UNLISTEN/NOTIFY to
+	 * return as soon as possible
+	 */
+	if (!pendingActions && !pendingNotifies)
+		return;
+
+	if (Trace_notify)
+		elog(DEBUG1, "AtCommit_Notify");
+
+	/* Perform any pending listen/unlisten actions */
+	if (pendingActions != NULL)
+	{
+		foreach(p, pendingActions->actions)
+		{
+			ListenAction *actrec = (ListenAction *) lfirst(p);
+
+			switch (actrec->action)
+			{
+				case LISTEN_LISTEN:
+					Exec_ListenCommit(actrec->channel);
+					break;
+				case LISTEN_UNLISTEN:
+					Exec_UnlistenCommit(actrec->channel);
+					break;
+				case LISTEN_UNLISTEN_ALL:
+					Exec_UnlistenAllCommit();
+					break;
+			}
+		}
+	}
+
+	/* If no longer listening to anything, get out of listener array */
+	if (amRegisteredListener && listenChannels == NIL)
+		asyncQueueUnregister();
+
+	/*
+	 * Send signals to listening backends.  We need do this only if there are
+	 * pending notifies, which were previously added to the shared queue by
+	 * PreCommit_Notify().
+	 */
+	if (pendingNotifies != NULL)
+		SignalBackends();
+
+	/*
+	 * If it's time to try to advance the global tail pointer, do that.
+	 *
+	 * (It might seem odd to do this in the sender, when more than likely the
+	 * listeners won't yet have read the messages we just sent.  However,
+	 * there's less contention if only the sender does it, and there is little
+	 * need for urgency in advancing the global tail.  So this typically will
+	 * be clearing out messages that were sent some time ago.)
+	 */
+	if (tryAdvanceTail)
+	{
+		tryAdvanceTail = false;
+		asyncQueueAdvanceTail();
+	}
+
+	/* And clean up */
+	ClearPendingActionsAndNotifies();
+}
+
+/*
+ * Exec_ListenPreCommit --- subroutine for PreCommit_Notify
+ *
+ * This function must make sure we are ready to catch any incoming messages.
+ */
+static void
+Exec_ListenPreCommit(void)
+{
+	QueuePosition head;
+	QueuePosition max;
+	BackendId	prevListener;
+
+	/*
+	 * Nothing to do if we are already listening to something, nor if we
+	 * already ran this routine in this transaction.
+	 */
+	if (amRegisteredListener)
+		return;
+
+	if (Trace_notify)
+		elog(DEBUG1, "Exec_ListenPreCommit(%d)", MyProcPid);
+
+	/*
+	 * Before registering, make sure we will unlisten before dying. (Note:
+	 * this action does not get undone if we abort later.)
+	 */
+	if (!unlistenExitRegistered)
+	{
+		before_shmem_exit(Async_UnlistenOnExit, 0);
+		unlistenExitRegistered = true;
+	}
+
+	/*
+	 * This is our first LISTEN, so establish our pointer.
+	 *
+	 * We set our pointer to the global tail pointer and then move it forward
+	 * over already-committed notifications.  This ensures we cannot miss any
+	 * not-yet-committed notifications.  We might get a few more but that
+	 * doesn't hurt.
+	 *
+	 * In some scenarios there might be a lot of committed notifications that
+	 * have not yet been pruned away (because some backend is being lazy about
+	 * reading them).  To reduce our startup time, we can look at other
+	 * backends and adopt the maximum "pos" pointer of any backend that's in
+	 * our database; any notifications it's already advanced over are surely
+	 * committed and need not be re-examined by us.  (We must consider only
+	 * backends connected to our DB, because others will not have bothered to
+	 * check committed-ness of notifications in our DB.)
+	 *
+	 * We need exclusive lock here so we can look at other backends' entries
+	 * and manipulate the list links.
+	 */
+	LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
+	head = QUEUE_HEAD;
+	max = QUEUE_TAIL;
+	prevListener = InvalidBackendId;
+	for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i))
+	{
+		if (QUEUE_BACKEND_DBOID(i) == MyDatabaseId)
+			max = QUEUE_POS_MAX(max, QUEUE_BACKEND_POS(i));
+		/* Also find last listening backend before this one */
+		if (i < MyBackendId)
+			prevListener = i;
+	}
+	QUEUE_BACKEND_POS(MyBackendId) = max;
+	QUEUE_BACKEND_PID(MyBackendId) = MyProcPid;
+	QUEUE_BACKEND_DBOID(MyBackendId) = MyDatabaseId;
+	/* Insert backend into list of listeners at correct position */
+	if (prevListener > 0)
+	{
+		QUEUE_NEXT_LISTENER(MyBackendId) = QUEUE_NEXT_LISTENER(prevListener);
+		QUEUE_NEXT_LISTENER(prevListener) = MyBackendId;
+	}
+	else
+	{
+		QUEUE_NEXT_LISTENER(MyBackendId) = QUEUE_FIRST_LISTENER;
+		QUEUE_FIRST_LISTENER = MyBackendId;
+	}
+	LWLockRelease(NotifyQueueLock);
+
+	/* Now we are listed in the global array, so remember we're listening */
+	amRegisteredListener = true;
+
+	/*
+	 * Try to move our pointer forward as far as possible.  This will skip
+	 * over already-committed notifications, which we want to do because they
+	 * might be quite stale.  Note that we are not yet listening on anything,
+	 * so we won't deliver such notifications to our frontend.  Also, although
+	 * our transaction might have executed NOTIFY, those message(s) aren't
+	 * queued yet so we won't skip them here.
+	 */
+	if (!QUEUE_POS_EQUAL(max, head))
+		asyncQueueReadAllNotifications();
+}
+
+/*
+ * Exec_ListenCommit --- subroutine for AtCommit_Notify
+ *
+ * Add the channel to the list of channels we are listening on.
+ */
+static void
+Exec_ListenCommit(const char *channel)
+{
+	MemoryContext oldcontext;
+
+	/* Do nothing if we are already listening on this channel */
+	if (IsListeningOn(channel))
+		return;
+
+	/*
+	 * Add the new channel name to listenChannels.
+	 *
+	 * XXX It is theoretically possible to get an out-of-memory failure here,
+	 * which would be bad because we already committed.  For the moment it
+	 * doesn't seem worth trying to guard against that, but maybe improve this
+	 * later.
+	 */
+	oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+	listenChannels = lappend(listenChannels, pstrdup(channel));
+	MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Exec_UnlistenCommit --- subroutine for AtCommit_Notify
+ *
+ * Remove the specified channel name from listenChannels.
+ */
+static void
+Exec_UnlistenCommit(const char *channel)
+{
+	ListCell   *q;
+
+	if (Trace_notify)
+		elog(DEBUG1, "Exec_UnlistenCommit(%s,%d)", channel, MyProcPid);
+
+	foreach(q, listenChannels)
+	{
+		char	   *lchan = (char *) lfirst(q);
+
+		if (strcmp(lchan, channel) == 0)
+		{
+			listenChannels = foreach_delete_current(listenChannels, q);
+			pfree(lchan);
+			break;
+		}
+	}
+
+	/*
+	 * We do not complain about unlistening something not being listened;
+	 * should we?
+	 */
+}
+
+/*
+ * Exec_UnlistenAllCommit --- subroutine for AtCommit_Notify
+ *
+ *		Unlisten on all channels for this backend.
+ */
+static void
+Exec_UnlistenAllCommit(void)
+{
+	if (Trace_notify)
+		elog(DEBUG1, "Exec_UnlistenAllCommit(%d)", MyProcPid);
+
+	list_free_deep(listenChannels);
+	listenChannels = NIL;
+}
+
+/*
+ * Test whether we are actively listening on the given channel name.
+ *
+ * Note: this function is executed for every notification found in the queue.
+ * Perhaps it is worth further optimization, eg convert the list to a sorted
+ * array so we can binary-search it.  In practice the list is likely to be
+ * fairly short, though.
+ */
+static bool
+IsListeningOn(const char *channel)
+{
+	ListCell   *p;
+
+	foreach(p, listenChannels)
+	{
+		char	   *lchan = (char *) lfirst(p);
+
+		if (strcmp(lchan, channel) == 0)
+			return true;
+	}
+	return false;
+}
+
+/*
+ * Remove our entry from the listeners array when we are no longer listening
+ * on any channel.  NB: must not fail if we're already not listening.
+ */
+static void
+asyncQueueUnregister(void)
+{
+	Assert(listenChannels == NIL);	/* else caller error */
+
+	if (!amRegisteredListener)	/* nothing to do */
+		return;
+
+	/*
+	 * Need exclusive lock here to manipulate list links.
+	 */
+	LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
+	/* Mark our entry as invalid */
+	QUEUE_BACKEND_PID(MyBackendId) = InvalidPid;
+	QUEUE_BACKEND_DBOID(MyBackendId) = InvalidOid;
+	/* and remove it from the list */
+	if (QUEUE_FIRST_LISTENER == MyBackendId)
+		QUEUE_FIRST_LISTENER = QUEUE_NEXT_LISTENER(MyBackendId);
+	else
+	{
+		for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i))
+		{
+			if (QUEUE_NEXT_LISTENER(i) == MyBackendId)
+			{
+				QUEUE_NEXT_LISTENER(i) = QUEUE_NEXT_LISTENER(MyBackendId);
+				break;
+			}
+		}
+	}
+	QUEUE_NEXT_LISTENER(MyBackendId) = InvalidBackendId;
+	LWLockRelease(NotifyQueueLock);
+
+	/* mark ourselves as no longer listed in the global array */
+	amRegisteredListener = false;
+}
+
+/*
+ * Test whether there is room to insert more notification messages.
+ *
+ * Caller must hold at least shared NotifyQueueLock.
+ */
+static bool
+asyncQueueIsFull(void)
+{
+	int			nexthead;
+	int			boundary;
+
+	/*
+	 * The queue is full if creating a new head page would create a page that
+	 * logically precedes the current global tail pointer, ie, the head
+	 * pointer would wrap around compared to the tail.  We cannot create such
+	 * a head page for fear of confusing slru.c.  For safety we round the tail
+	 * pointer back to a segment boundary (truncation logic in
+	 * asyncQueueAdvanceTail does not do this, so doing it here is optional).
+	 *
+	 * Note that this test is *not* dependent on how much space there is on
+	 * the current head page.  This is necessary because asyncQueueAddEntries
+	 * might try to create the next head page in any case.
+	 */
+	nexthead = QUEUE_POS_PAGE(QUEUE_HEAD) + 1;
+	if (nexthead > QUEUE_MAX_PAGE)
+		nexthead = 0;			/* wrap around */
+	boundary = QUEUE_STOP_PAGE;
+	boundary -= boundary % SLRU_PAGES_PER_SEGMENT;
+	return asyncQueuePagePrecedes(nexthead, boundary);
+}
+
+/*
+ * Advance the QueuePosition to the next entry, assuming that the current
+ * entry is of length entryLength.  If we jump to a new page the function
+ * returns true, else false.
+ */
+static bool
+asyncQueueAdvance(volatile QueuePosition *position, int entryLength)
+{
+	int			pageno = QUEUE_POS_PAGE(*position);
+	int			offset = QUEUE_POS_OFFSET(*position);
+	bool		pageJump = false;
+
+	/*
+	 * Move to the next writing position: First jump over what we have just
+	 * written or read.
+	 */
+	offset += entryLength;
+	Assert(offset <= QUEUE_PAGESIZE);
+
+	/*
+	 * In a second step check if another entry can possibly be written to the
+	 * page. If so, stay here, we have reached the next position. If not, then
+	 * we need to move on to the next page.
+	 */
+	if (offset + QUEUEALIGN(AsyncQueueEntryEmptySize) > QUEUE_PAGESIZE)
+	{
+		pageno++;
+		if (pageno > QUEUE_MAX_PAGE)
+			pageno = 0;			/* wrap around */
+		offset = 0;
+		pageJump = true;
+	}
+
+	SET_QUEUE_POS(*position, pageno, offset);
+	return pageJump;
+}
+
+/*
+ * Fill the AsyncQueueEntry at *qe with an outbound notification message.
+ */
+static void
+asyncQueueNotificationToEntry(Notification *n, AsyncQueueEntry *qe)
+{
+	size_t		channellen = n->channel_len;
+	size_t		payloadlen = n->payload_len;
+	int			entryLength;
+
+	Assert(channellen < NAMEDATALEN);
+	Assert(payloadlen < NOTIFY_PAYLOAD_MAX_LENGTH);
+
+	/* The terminators are already included in AsyncQueueEntryEmptySize */
+	entryLength = AsyncQueueEntryEmptySize + payloadlen + channellen;
+	entryLength = QUEUEALIGN(entryLength);
+	qe->length = entryLength;
+	qe->dboid = MyDatabaseId;
+	qe->xid = GetCurrentTransactionId();
+	qe->srcPid = MyProcPid;
+	memcpy(qe->data, n->data, channellen + payloadlen + 2);
+}
+
+/*
+ * Add pending notifications to the queue.
+ *
+ * We go page by page here, i.e. we stop once we have to go to a new page but
+ * we will be called again and then fill that next page. If an entry does not
+ * fit into the current page, we write a dummy entry with an InvalidOid as the
+ * database OID in order to fill the page. So every page is always used up to
+ * the last byte which simplifies reading the page later.
+ *
+ * We are passed the list cell (in pendingNotifies->events) containing the next
+ * notification to write and return the first still-unwritten cell back.
+ * Eventually we will return NULL indicating all is done.
+ *
+ * We are holding NotifyQueueLock already from the caller and grab
+ * NotifySLRULock locally in this function.
+ */
+static ListCell *
+asyncQueueAddEntries(ListCell *nextNotify)
+{
+	AsyncQueueEntry qe;
+	QueuePosition queue_head;
+	int			pageno;
+	int			offset;
+	int			slotno;
+
+	/* We hold both NotifyQueueLock and NotifySLRULock during this operation */
+	LWLockAcquire(NotifySLRULock, LW_EXCLUSIVE);
+
+	/*
+	 * We work with a local copy of QUEUE_HEAD, which we write back to shared
+	 * memory upon exiting.  The reason for this is that if we have to advance
+	 * to a new page, SimpleLruZeroPage might fail (out of disk space, for
+	 * instance), and we must not advance QUEUE_HEAD if it does.  (Otherwise,
+	 * subsequent insertions would try to put entries into a page that slru.c
+	 * thinks doesn't exist yet.)  So, use a local position variable.  Note
+	 * that if we do fail, any already-inserted queue entries are forgotten;
+	 * this is okay, since they'd be useless anyway after our transaction
+	 * rolls back.
+	 */
+	queue_head = QUEUE_HEAD;
+
+	/*
+	 * If this is the first write since the postmaster started, we need to
+	 * initialize the first page of the async SLRU.  Otherwise, the current
+	 * page should be initialized already, so just fetch it.
+	 *
+	 * (We could also take the first path when the SLRU position has just
+	 * wrapped around, but re-zeroing the page is harmless in that case.)
+	 */
+	pageno = QUEUE_POS_PAGE(queue_head);
+	if (QUEUE_POS_IS_ZERO(queue_head))
+		slotno = SimpleLruZeroPage(NotifyCtl, pageno);
+	else
+		slotno = SimpleLruReadPage(NotifyCtl, pageno, true,
+								   InvalidTransactionId);
+
+	/* Note we mark the page dirty before writing in it */
+	NotifyCtl->shared->page_dirty[slotno] = true;
+
+	while (nextNotify != NULL)
+	{
+		Notification *n = (Notification *) lfirst(nextNotify);
+
+		/* Construct a valid queue entry in local variable qe */
+		asyncQueueNotificationToEntry(n, &qe);
+
+		offset = QUEUE_POS_OFFSET(queue_head);
+
+		/* Check whether the entry really fits on the current page */
+		if (offset + qe.length <= QUEUE_PAGESIZE)
+		{
+			/* OK, so advance nextNotify past this item */
+			nextNotify = lnext(pendingNotifies->events, nextNotify);
+		}
+		else
+		{
+			/*
+			 * Write a dummy entry to fill up the page. Actually readers will
+			 * only check dboid and since it won't match any reader's database
+			 * OID, they will ignore this entry and move on.
+			 */
+			qe.length = QUEUE_PAGESIZE - offset;
+			qe.dboid = InvalidOid;
+			qe.data[0] = '\0';	/* empty channel */
+			qe.data[1] = '\0';	/* empty payload */
+		}
+
+		/* Now copy qe into the shared buffer page */
+		memcpy(NotifyCtl->shared->page_buffer[slotno] + offset,
+			   &qe,
+			   qe.length);
+
+		/* Advance queue_head appropriately, and detect if page is full */
+		if (asyncQueueAdvance(&(queue_head), qe.length))
+		{
+			/*
+			 * Page is full, so we're done here, but first fill the next page
+			 * with zeroes.  The reason to do this is to ensure that slru.c's
+			 * idea of the head page is always the same as ours, which avoids
+			 * boundary problems in SimpleLruTruncate.  The test in
+			 * asyncQueueIsFull() ensured that there is room to create this
+			 * page without overrunning the queue.
+			 */
+			slotno = SimpleLruZeroPage(NotifyCtl, QUEUE_POS_PAGE(queue_head));
+
+			/*
+			 * If the new page address is a multiple of QUEUE_CLEANUP_DELAY,
+			 * set flag to remember that we should try to advance the tail
+			 * pointer (we don't want to actually do that right here).
+			 */
+			if (QUEUE_POS_PAGE(queue_head) % QUEUE_CLEANUP_DELAY == 0)
+				tryAdvanceTail = true;
+
+			/* And exit the loop */
+			break;
+		}
+	}
+
+	/* Success, so update the global QUEUE_HEAD */
+	QUEUE_HEAD = queue_head;
+
+	LWLockRelease(NotifySLRULock);
+
+	return nextNotify;
+}
+
+/*
+ * SQL function to return the fraction of the notification queue currently
+ * occupied.
+ */
+Datum
+pg_notification_queue_usage(PG_FUNCTION_ARGS)
+{
+	double		usage;
+
+	/* Advance the queue tail so we don't report a too-large result */
+	asyncQueueAdvanceTail();
+
+	LWLockAcquire(NotifyQueueLock, LW_SHARED);
+	usage = asyncQueueUsage();
+	LWLockRelease(NotifyQueueLock);
+
+	PG_RETURN_FLOAT8(usage);
+}
+
+/*
+ * Return the fraction of the queue that is currently occupied.
+ *
+ * The caller must hold NotifyQueueLock in (at least) shared mode.
+ *
+ * Note: we measure the distance to the logical tail page, not the physical
+ * tail page.  In some sense that's wrong, but the relative position of the
+ * physical tail is affected by details such as SLRU segment boundaries,
+ * so that a result based on that is unpleasantly unstable.
+ */
+static double
+asyncQueueUsage(void)
+{
+	int			headPage = QUEUE_POS_PAGE(QUEUE_HEAD);
+	int			tailPage = QUEUE_POS_PAGE(QUEUE_TAIL);
+	int			occupied;
+
+	occupied = headPage - tailPage;
+
+	if (occupied == 0)
+		return (double) 0;		/* fast exit for common case */
+
+	if (occupied < 0)
+	{
+		/* head has wrapped around, tail not yet */
+		occupied += QUEUE_MAX_PAGE + 1;
+	}
+
+	return (double) occupied / (double) ((QUEUE_MAX_PAGE + 1) / 2);
+}
+
+/*
+ * Check whether the queue is at least half full, and emit a warning if so.
+ *
+ * This is unlikely given the size of the queue, but possible.
+ * The warnings show up at most once every QUEUE_FULL_WARN_INTERVAL.
+ *
+ * Caller must hold exclusive NotifyQueueLock.
+ */
+static void
+asyncQueueFillWarning(void)
+{
+	double		fillDegree;
+	TimestampTz t;
+
+	fillDegree = asyncQueueUsage();
+	if (fillDegree < 0.5)
+		return;
+
+	t = GetCurrentTimestamp();
+
+	if (TimestampDifferenceExceeds(asyncQueueControl->lastQueueFillWarn,
+								   t, QUEUE_FULL_WARN_INTERVAL))
+	{
+		QueuePosition min = QUEUE_HEAD;
+		int32		minPid = InvalidPid;
+
+		for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i))
+		{
+			Assert(QUEUE_BACKEND_PID(i) != InvalidPid);
+			min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i));
+			if (QUEUE_POS_EQUAL(min, QUEUE_BACKEND_POS(i)))
+				minPid = QUEUE_BACKEND_PID(i);
+		}
+
+		ereport(WARNING,
+				(errmsg("NOTIFY queue is %.0f%% full", fillDegree * 100),
+				 (minPid != InvalidPid ?
+				  errdetail("The server process with PID %d is among those with the oldest transactions.", minPid)
+				  : 0),
+				 (minPid != InvalidPid ?
+				  errhint("The NOTIFY queue cannot be emptied until that process ends its current transaction.")
+				  : 0)));
+
+		asyncQueueControl->lastQueueFillWarn = t;
+	}
+}
+
+/*
+ * Send signals to listening backends.
+ *
+ * Normally we signal only backends in our own database, since only those
+ * backends could be interested in notifies we send.  However, if there's
+ * notify traffic in our database but no traffic in another database that
+ * does have listener(s), those listeners will fall further and further
+ * behind.  Waken them anyway if they're far enough behind, so that they'll
+ * advance their queue position pointers, allowing the global tail to advance.
+ *
+ * Since we know the BackendId and the Pid the signaling is quite cheap.
+ *
+ * This is called during CommitTransaction(), so it's important for it
+ * to have very low probability of failure.
+ */
+static void
+SignalBackends(void)
+{
+	int32	   *pids;
+	BackendId  *ids;
+	int			count;
+
+	/*
+	 * Identify backends that we need to signal.  We don't want to send
+	 * signals while holding the NotifyQueueLock, so this loop just builds a
+	 * list of target PIDs.
+	 *
+	 * XXX in principle these pallocs could fail, which would be bad. Maybe
+	 * preallocate the arrays?  They're not that large, though.
+	 */
+	pids = (int32 *) palloc(MaxBackends * sizeof(int32));
+	ids = (BackendId *) palloc(MaxBackends * sizeof(BackendId));
+	count = 0;
+
+	LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
+	for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i))
+	{
+		int32		pid = QUEUE_BACKEND_PID(i);
+		QueuePosition pos;
+
+		Assert(pid != InvalidPid);
+		pos = QUEUE_BACKEND_POS(i);
+		if (QUEUE_BACKEND_DBOID(i) == MyDatabaseId)
+		{
+			/*
+			 * Always signal listeners in our own database, unless they're
+			 * already caught up (unlikely, but possible).
+			 */
+			if (QUEUE_POS_EQUAL(pos, QUEUE_HEAD))
+				continue;
+		}
+		else
+		{
+			/*
+			 * Listeners in other databases should be signaled only if they
+			 * are far behind.
+			 */
+			if (asyncQueuePageDiff(QUEUE_POS_PAGE(QUEUE_HEAD),
+								   QUEUE_POS_PAGE(pos)) < QUEUE_CLEANUP_DELAY)
+				continue;
+		}
+		/* OK, need to signal this one */
+		pids[count] = pid;
+		ids[count] = i;
+		count++;
+	}
+	LWLockRelease(NotifyQueueLock);
+
+	/* Now send signals */
+	for (int i = 0; i < count; i++)
+	{
+		int32		pid = pids[i];
+
+		/*
+		 * If we are signaling our own process, no need to involve the kernel;
+		 * just set the flag directly.
+		 */
+		if (pid == MyProcPid)
+		{
+			notifyInterruptPending = true;
+			continue;
+		}
+
+		/*
+		 * Note: assuming things aren't broken, a signal failure here could
+		 * only occur if the target backend exited since we released
+		 * NotifyQueueLock; which is unlikely but certainly possible. So we
+		 * just log a low-level debug message if it happens.
+		 */
+		if (SendProcSignal(pid, PROCSIG_NOTIFY_INTERRUPT, ids[i]) < 0)
+			elog(DEBUG3, "could not signal backend with PID %d: %m", pid);
+	}
+
+	pfree(pids);
+	pfree(ids);
+}
+
+/*
+ * AtAbort_Notify
+ *
+ *	This is called at transaction abort.
+ *
+ *	Gets rid of pending actions and outbound notifies that we would have
+ *	executed if the transaction got committed.
+ */
+void
+AtAbort_Notify(void)
+{
+	/*
+	 * If we LISTEN but then roll back the transaction after PreCommit_Notify,
+	 * we have registered as a listener but have not made any entry in
+	 * listenChannels.  In that case, deregister again.
+	 */
+	if (amRegisteredListener && listenChannels == NIL)
+		asyncQueueUnregister();
+
+	/* And clean up */
+	ClearPendingActionsAndNotifies();
+}
+
+/*
+ * AtSubCommit_Notify() --- Take care of subtransaction commit.
+ *
+ * Reassign all items in the pending lists to the parent transaction.
+ */
+void
+AtSubCommit_Notify(void)
+{
+	int			my_level = GetCurrentTransactionNestLevel();
+
+	/* If there are actions at our nesting level, we must reparent them. */
+	if (pendingActions != NULL &&
+		pendingActions->nestingLevel >= my_level)
+	{
+		if (pendingActions->upper == NULL ||
+			pendingActions->upper->nestingLevel < my_level - 1)
+		{
+			/* nothing to merge; give the whole thing to the parent */
+			--pendingActions->nestingLevel;
+		}
+		else
+		{
+			ActionList *childPendingActions = pendingActions;
+
+			pendingActions = pendingActions->upper;
+
+			/*
+			 * Mustn't try to eliminate duplicates here --- see queue_listen()
+			 */
+			pendingActions->actions =
+				list_concat(pendingActions->actions,
+							childPendingActions->actions);
+			pfree(childPendingActions);
+		}
+	}
+
+	/* If there are notifies at our nesting level, we must reparent them. */
+	if (pendingNotifies != NULL &&
+		pendingNotifies->nestingLevel >= my_level)
+	{
+		Assert(pendingNotifies->nestingLevel == my_level);
+
+		if (pendingNotifies->upper == NULL ||
+			pendingNotifies->upper->nestingLevel < my_level - 1)
+		{
+			/* nothing to merge; give the whole thing to the parent */
+			--pendingNotifies->nestingLevel;
+		}
+		else
+		{
+			/*
+			 * Formerly, we didn't bother to eliminate duplicates here, but
+			 * now we must, else we fall foul of "Assert(!found)", either here
+			 * or during a later attempt to build the parent-level hashtable.
+			 */
+			NotificationList *childPendingNotifies = pendingNotifies;
+			ListCell   *l;
+
+			pendingNotifies = pendingNotifies->upper;
+			/* Insert all the subxact's events into parent, except for dups */
+			foreach(l, childPendingNotifies->events)
+			{
+				Notification *childn = (Notification *) lfirst(l);
+
+				if (!AsyncExistsPendingNotify(childn))
+					AddEventToPendingNotifies(childn);
+			}
+			pfree(childPendingNotifies);
+		}
+	}
+}
+
+/*
+ * AtSubAbort_Notify() --- Take care of subtransaction abort.
+ */
+void
+AtSubAbort_Notify(void)
+{
+	int			my_level = GetCurrentTransactionNestLevel();
+
+	/*
+	 * All we have to do is pop the stack --- the actions/notifies made in
+	 * this subxact are no longer interesting, and the space will be freed
+	 * when CurTransactionContext is recycled. We still have to free the
+	 * ActionList and NotificationList objects themselves, though, because
+	 * those are allocated in TopTransactionContext.
+	 *
+	 * Note that there might be no entries at all, or no entries for the
+	 * current subtransaction level, either because none were ever created, or
+	 * because we reentered this routine due to trouble during subxact abort.
+	 */
+	while (pendingActions != NULL &&
+		   pendingActions->nestingLevel >= my_level)
+	{
+		ActionList *childPendingActions = pendingActions;
+
+		pendingActions = pendingActions->upper;
+		pfree(childPendingActions);
+	}
+
+	while (pendingNotifies != NULL &&
+		   pendingNotifies->nestingLevel >= my_level)
+	{
+		NotificationList *childPendingNotifies = pendingNotifies;
+
+		pendingNotifies = pendingNotifies->upper;
+		pfree(childPendingNotifies);
+	}
+}
+
+/*
+ * HandleNotifyInterrupt
+ *
+ *		Signal handler portion of interrupt handling. Let the backend know
+ *		that there's a pending notify interrupt. If we're currently reading
+ *		from the client, this will interrupt the read and
+ *		ProcessClientReadInterrupt() will call ProcessNotifyInterrupt().
+ */
+void
+HandleNotifyInterrupt(void)
+{
+	/*
+	 * Note: this is called by a SIGNAL HANDLER. You must be very wary what
+	 * you do here.
+	 */
+
+	/* signal that work needs to be done */
+	notifyInterruptPending = true;
+
+	/* make sure the event is processed in due course */
+	SetLatch(MyLatch);
+}
+
+/*
+ * ProcessNotifyInterrupt
+ *
+ *		This is called if we see notifyInterruptPending set, just before
+ *		transmitting ReadyForQuery at the end of a frontend command, and
+ *		also if a notify signal occurs while reading from the frontend.
+ *		HandleNotifyInterrupt() will cause the read to be interrupted
+ *		via the process's latch, and this routine will get called.
+ *		If we are truly idle (ie, *not* inside a transaction block),
+ *		process the incoming notifies.
+ *
+ *		If "flush" is true, force any frontend messages out immediately.
+ *		This can be false when being called at the end of a frontend command,
+ *		since we'll flush after sending ReadyForQuery.
+ */
+void
+ProcessNotifyInterrupt(bool flush)
+{
+	if (IsTransactionOrTransactionBlock())
+		return;					/* not really idle */
+
+	/* Loop in case another signal arrives while sending messages */
+	while (notifyInterruptPending)
+		ProcessIncomingNotify(flush);
+}
+
+
+/*
+ * Read all pending notifications from the queue, and deliver appropriate
+ * ones to my frontend.  Stop when we reach queue head or an uncommitted
+ * notification.
+ */
+static void
+asyncQueueReadAllNotifications(void)
+{
+	volatile QueuePosition pos;
+	QueuePosition head;
+	Snapshot	snapshot;
+
+	/* page_buffer must be adequately aligned, so use a union */
+	union
+	{
+		char		buf[QUEUE_PAGESIZE];
+		AsyncQueueEntry align;
+	}			page_buffer;
+
+	/* Fetch current state */
+	LWLockAcquire(NotifyQueueLock, LW_SHARED);
+	/* Assert checks that we have a valid state entry */
+	Assert(MyProcPid == QUEUE_BACKEND_PID(MyBackendId));
+	pos = QUEUE_BACKEND_POS(MyBackendId);
+	head = QUEUE_HEAD;
+	LWLockRelease(NotifyQueueLock);
+
+	if (QUEUE_POS_EQUAL(pos, head))
+	{
+		/* Nothing to do, we have read all notifications already. */
+		return;
+	}
+
+	/*----------
+	 * Get snapshot we'll use to decide which xacts are still in progress.
+	 * This is trickier than it might seem, because of race conditions.
+	 * Consider the following example:
+	 *
+	 * Backend 1:					 Backend 2:
+	 *
+	 * transaction starts
+	 * UPDATE foo SET ...;
+	 * NOTIFY foo;
+	 * commit starts
+	 * queue the notify message
+	 *								 transaction starts
+	 *								 LISTEN foo;  -- first LISTEN in session
+	 *								 SELECT * FROM foo WHERE ...;
+	 * commit to clog
+	 *								 commit starts
+	 *								 add backend 2 to array of listeners
+	 *								 advance to queue head (this code)
+	 *								 commit to clog
+	 *
+	 * Transaction 2's SELECT has not seen the UPDATE's effects, since that
+	 * wasn't committed yet.  Ideally we'd ensure that client 2 would
+	 * eventually get transaction 1's notify message, but there's no way
+	 * to do that; until we're in the listener array, there's no guarantee
+	 * that the notify message doesn't get removed from the queue.
+	 *
+	 * Therefore the coding technique transaction 2 is using is unsafe:
+	 * applications must commit a LISTEN before inspecting database state,
+	 * if they want to ensure they will see notifications about subsequent
+	 * changes to that state.
+	 *
+	 * What we do guarantee is that we'll see all notifications from
+	 * transactions committing after the snapshot we take here.
+	 * Exec_ListenPreCommit has already added us to the listener array,
+	 * so no not-yet-committed messages can be removed from the queue
+	 * before we see them.
+	 *----------
+	 */
+	snapshot = RegisterSnapshot(GetLatestSnapshot());
+
+	/*
+	 * It is possible that we fail while trying to send a message to our
+	 * frontend (for example, because of encoding conversion failure).  If
+	 * that happens it is critical that we not try to send the same message
+	 * over and over again.  Therefore, we place a PG_TRY block here that will
+	 * forcibly advance our queue position before we lose control to an error.
+	 * (We could alternatively retake NotifyQueueLock and move the position
+	 * before handling each individual message, but that seems like too much
+	 * lock traffic.)
+	 */
+	PG_TRY();
+	{
+		bool		reachedStop;
+
+		do
+		{
+			int			curpage = QUEUE_POS_PAGE(pos);
+			int			curoffset = QUEUE_POS_OFFSET(pos);
+			int			slotno;
+			int			copysize;
+
+			/*
+			 * We copy the data from SLRU into a local buffer, so as to avoid
+			 * holding the NotifySLRULock while we are examining the entries
+			 * and possibly transmitting them to our frontend.  Copy only the
+			 * part of the page we will actually inspect.
+			 */
+			slotno = SimpleLruReadPage_ReadOnly(NotifyCtl, curpage,
+												InvalidTransactionId);
+			if (curpage == QUEUE_POS_PAGE(head))
+			{
+				/* we only want to read as far as head */
+				copysize = QUEUE_POS_OFFSET(head) - curoffset;
+				if (copysize < 0)
+					copysize = 0;	/* just for safety */
+			}
+			else
+			{
+				/* fetch all the rest of the page */
+				copysize = QUEUE_PAGESIZE - curoffset;
+			}
+			memcpy(page_buffer.buf + curoffset,
+				   NotifyCtl->shared->page_buffer[slotno] + curoffset,
+				   copysize);
+			/* Release lock that we got from SimpleLruReadPage_ReadOnly() */
+			LWLockRelease(NotifySLRULock);
+
+			/*
+			 * Process messages up to the stop position, end of page, or an
+			 * uncommitted message.
+			 *
+			 * Our stop position is what we found to be the head's position
+			 * when we entered this function. It might have changed already.
+			 * But if it has, we will receive (or have already received and
+			 * queued) another signal and come here again.
+			 *
+			 * We are not holding NotifyQueueLock here! The queue can only
+			 * extend beyond the head pointer (see above) and we leave our
+			 * backend's pointer where it is so nobody will truncate or
+			 * rewrite pages under us. Especially we don't want to hold a lock
+			 * while sending the notifications to the frontend.
+			 */
+			reachedStop = asyncQueueProcessPageEntries(&pos, head,
+													   page_buffer.buf,
+													   snapshot);
+		} while (!reachedStop);
+	}
+	PG_FINALLY();
+	{
+		/* Update shared state */
+		LWLockAcquire(NotifyQueueLock, LW_SHARED);
+		QUEUE_BACKEND_POS(MyBackendId) = pos;
+		LWLockRelease(NotifyQueueLock);
+	}
+	PG_END_TRY();
+
+	/* Done with snapshot */
+	UnregisterSnapshot(snapshot);
+}
+
+/*
+ * Fetch notifications from the shared queue, beginning at position current,
+ * and deliver relevant ones to my frontend.
+ *
+ * The current page must have been fetched into page_buffer from shared
+ * memory.  (We could access the page right in shared memory, but that
+ * would imply holding the NotifySLRULock throughout this routine.)
+ *
+ * We stop if we reach the "stop" position, or reach a notification from an
+ * uncommitted transaction, or reach the end of the page.
+ *
+ * The function returns true once we have reached the stop position or an
+ * uncommitted notification, and false if we have finished with the page.
+ * In other words: once it returns true there is no need to look further.
+ * The QueuePosition *current is advanced past all processed messages.
+ */
+static bool
+asyncQueueProcessPageEntries(volatile QueuePosition *current,
+							 QueuePosition stop,
+							 char *page_buffer,
+							 Snapshot snapshot)
+{
+	bool		reachedStop = false;
+	bool		reachedEndOfPage;
+	AsyncQueueEntry *qe;
+
+	do
+	{
+		QueuePosition thisentry = *current;
+
+		if (QUEUE_POS_EQUAL(thisentry, stop))
+			break;
+
+		qe = (AsyncQueueEntry *) (page_buffer + QUEUE_POS_OFFSET(thisentry));
+
+		/*
+		 * Advance *current over this message, possibly to the next page. As
+		 * noted in the comments for asyncQueueReadAllNotifications, we must
+		 * do this before possibly failing while processing the message.
+		 */
+		reachedEndOfPage = asyncQueueAdvance(current, qe->length);
+
+		/* Ignore messages destined for other databases */
+		if (qe->dboid == MyDatabaseId)
+		{
+			if (XidInMVCCSnapshot(qe->xid, snapshot))
+			{
+				/*
+				 * The source transaction is still in progress, so we can't
+				 * process this message yet.  Break out of the loop, but first
+				 * back up *current so we will reprocess the message next
+				 * time.  (Note: it is unlikely but not impossible for
+				 * TransactionIdDidCommit to fail, so we can't really avoid
+				 * this advance-then-back-up behavior when dealing with an
+				 * uncommitted message.)
+				 *
+				 * Note that we must test XidInMVCCSnapshot before we test
+				 * TransactionIdDidCommit, else we might return a message from
+				 * a transaction that is not yet visible to snapshots; compare
+				 * the comments at the head of heapam_visibility.c.
+				 *
+				 * Also, while our own xact won't be listed in the snapshot,
+				 * we need not check for TransactionIdIsCurrentTransactionId
+				 * because our transaction cannot (yet) have queued any
+				 * messages.
+				 */
+				*current = thisentry;
+				reachedStop = true;
+				break;
+			}
+			else if (TransactionIdDidCommit(qe->xid))
+			{
+				/* qe->data is the null-terminated channel name */
+				char	   *channel = qe->data;
+
+				if (IsListeningOn(channel))
+				{
+					/* payload follows channel name */
+					char	   *payload = qe->data + strlen(channel) + 1;
+
+					NotifyMyFrontEnd(channel, payload, qe->srcPid);
+				}
+			}
+			else
+			{
+				/*
+				 * The source transaction aborted or crashed, so we just
+				 * ignore its notifications.
+				 */
+			}
+		}
+
+		/* Loop back if we're not at end of page */
+	} while (!reachedEndOfPage);
+
+	if (QUEUE_POS_EQUAL(*current, stop))
+		reachedStop = true;
+
+	return reachedStop;
+}
+
+/*
+ * Advance the shared queue tail variable to the minimum of all the
+ * per-backend tail pointers.  Truncate pg_notify space if possible.
+ *
+ * This is (usually) called during CommitTransaction(), so it's important for
+ * it to have very low probability of failure.
+ */
+static void
+asyncQueueAdvanceTail(void)
+{
+	QueuePosition min;
+	int			oldtailpage;
+	int			newtailpage;
+	int			boundary;
+
+	/* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
+	LWLockAcquire(NotifyQueueTailLock, LW_EXCLUSIVE);
+
+	/*
+	 * Compute the new tail.  Pre-v13, it's essential that QUEUE_TAIL be exact
+	 * (ie, exactly match at least one backend's queue position), so it must
+	 * be updated atomically with the actual computation.  Since v13, we could
+	 * get away with not doing it like that, but it seems prudent to keep it
+	 * so.
+	 *
+	 * Also, because incoming backends will scan forward from QUEUE_TAIL, that
+	 * must be advanced before we can truncate any data.  Thus, QUEUE_TAIL is
+	 * the logical tail, while QUEUE_STOP_PAGE is the physical tail, or oldest
+	 * un-truncated page.  When QUEUE_STOP_PAGE != QUEUE_POS_PAGE(QUEUE_TAIL),
+	 * there are pages we can truncate but haven't yet finished doing so.
+	 *
+	 * For concurrency's sake, we don't want to hold NotifyQueueLock while
+	 * performing SimpleLruTruncate.  This is OK because no backend will try
+	 * to access the pages we are in the midst of truncating.
+	 */
+	LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
+	min = QUEUE_HEAD;
+	for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i))
+	{
+		Assert(QUEUE_BACKEND_PID(i) != InvalidPid);
+		min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i));
+	}
+	QUEUE_TAIL = min;
+	oldtailpage = QUEUE_STOP_PAGE;
+	LWLockRelease(NotifyQueueLock);
+
+	/*
+	 * We can truncate something if the global tail advanced across an SLRU
+	 * segment boundary.
+	 *
+	 * XXX it might be better to truncate only once every several segments, to
+	 * reduce the number of directory scans.
+	 */
+	newtailpage = QUEUE_POS_PAGE(min);
+	boundary = newtailpage - (newtailpage % SLRU_PAGES_PER_SEGMENT);
+	if (asyncQueuePagePrecedes(oldtailpage, boundary))
+	{
+		/*
+		 * SimpleLruTruncate() will ask for NotifySLRULock but will also
+		 * release the lock again.
+		 */
+		SimpleLruTruncate(NotifyCtl, newtailpage);
+
+		/*
+		 * Update QUEUE_STOP_PAGE.  This changes asyncQueueIsFull()'s verdict
+		 * for the segment immediately prior to the old tail, allowing fresh
+		 * data into that segment.
+		 */
+		LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
+		QUEUE_STOP_PAGE = newtailpage;
+		LWLockRelease(NotifyQueueLock);
+	}
+
+	LWLockRelease(NotifyQueueTailLock);
+}
+
+/*
+ * ProcessIncomingNotify
+ *
+ *		Scan the queue for arriving notifications and report them to the front
+ *		end.  The notifications might be from other sessions, or our own;
+ *		there's no need to distinguish here.
+ *
+ *		If "flush" is true, force any frontend messages out immediately.
+ *
+ *		NOTE: since we are outside any transaction, we must create our own.
+ */
+static void
+ProcessIncomingNotify(bool flush)
+{
+	/* We *must* reset the flag */
+	notifyInterruptPending = false;
+
+	/* Do nothing else if we aren't actively listening */
+	if (listenChannels == NIL)
+		return;
+
+	if (Trace_notify)
+		elog(DEBUG1, "ProcessIncomingNotify");
+
+	set_ps_display("notify interrupt");
+
+	/*
+	 * We must run asyncQueueReadAllNotifications inside a transaction, else
+	 * bad things happen if it gets an error.
+	 */
+	StartTransactionCommand();
+
+	asyncQueueReadAllNotifications();
+
+	CommitTransactionCommand();
+
+	/*
+	 * If this isn't an end-of-command case, we must flush the notify messages
+	 * to ensure frontend gets them promptly.
+	 */
+	if (flush)
+		pq_flush();
+
+	set_ps_display("idle");
+
+	if (Trace_notify)
+		elog(DEBUG1, "ProcessIncomingNotify: done");
+}
+
+/*
+ * Send NOTIFY message to my front end.
+ */
+void
+NotifyMyFrontEnd(const char *channel, const char *payload, int32 srcPid)
+{
+	if (whereToSendOutput == DestRemote)
+	{
+		StringInfoData buf;
+
+		pq_beginmessage(&buf, 'A');
+		pq_sendint32(&buf, srcPid);
+		pq_sendstring(&buf, channel);
+		pq_sendstring(&buf, payload);
+		pq_endmessage(&buf);
+
+		/*
+		 * NOTE: we do not do pq_flush() here.  Some level of caller will
+		 * handle it later, allowing this message to be combined into a packet
+		 * with other ones.
+		 */
+	}
+	else
+		elog(INFO, "NOTIFY for \"%s\" payload \"%s\"", channel, payload);
+}
+
+/* Does pendingNotifies include a match for the given event? */
+static bool
+AsyncExistsPendingNotify(Notification *n)
+{
+	if (pendingNotifies == NULL)
+		return false;
+
+	if (pendingNotifies->hashtab != NULL)
+	{
+		/* Use the hash table to probe for a match */
+		if (hash_search(pendingNotifies->hashtab,
+						&n,
+						HASH_FIND,
+						NULL))
+			return true;
+	}
+	else
+	{
+		/* Must scan the event list */
+		ListCell   *l;
+
+		foreach(l, pendingNotifies->events)
+		{
+			Notification *oldn = (Notification *) lfirst(l);
+
+			if (n->channel_len == oldn->channel_len &&
+				n->payload_len == oldn->payload_len &&
+				memcmp(n->data, oldn->data,
+					   n->channel_len + n->payload_len + 2) == 0)
+				return true;
+		}
+	}
+
+	return false;
+}
+
+/*
+ * Add a notification event to a pre-existing pendingNotifies list.
+ *
+ * Because pendingNotifies->events is already nonempty, this works
+ * correctly no matter what CurrentMemoryContext is.
+ */
+static void
+AddEventToPendingNotifies(Notification *n)
+{
+	Assert(pendingNotifies->events != NIL);
+
+	/* Create the hash table if it's time to */
+	if (list_length(pendingNotifies->events) >= MIN_HASHABLE_NOTIFIES &&
+		pendingNotifies->hashtab == NULL)
+	{
+		HASHCTL		hash_ctl;
+		ListCell   *l;
+
+		/* Create the hash table */
+		hash_ctl.keysize = sizeof(Notification *);
+		hash_ctl.entrysize = sizeof(NotificationHash);
+		hash_ctl.hash = notification_hash;
+		hash_ctl.match = notification_match;
+		hash_ctl.hcxt = CurTransactionContext;
+		pendingNotifies->hashtab =
+			hash_create("Pending Notifies",
+						256L,
+						&hash_ctl,
+						HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+
+		/* Insert all the already-existing events */
+		foreach(l, pendingNotifies->events)
+		{
+			Notification *oldn = (Notification *) lfirst(l);
+			NotificationHash *hentry;
+			bool		found;
+
+			hentry = (NotificationHash *) hash_search(pendingNotifies->hashtab,
+													  &oldn,
+													  HASH_ENTER,
+													  &found);
+			Assert(!found);
+			hentry->event = oldn;
+		}
+	}
+
+	/* Add new event to the list, in order */
+	pendingNotifies->events = lappend(pendingNotifies->events, n);
+
+	/* Add event to the hash table if needed */
+	if (pendingNotifies->hashtab != NULL)
+	{
+		NotificationHash *hentry;
+		bool		found;
+
+		hentry = (NotificationHash *) hash_search(pendingNotifies->hashtab,
+												  &n,
+												  HASH_ENTER,
+												  &found);
+		Assert(!found);
+		hentry->event = n;
+	}
+}
+
+/*
+ * notification_hash: hash function for notification hash table
+ *
+ * The hash "keys" are pointers to Notification structs.
+ */
+static uint32
+notification_hash(const void *key, Size keysize)
+{
+	const Notification *k = *(const Notification *const *) key;
+
+	Assert(keysize == sizeof(Notification *));
+	/* We don't bother to include the payload's trailing null in the hash */
+	return DatumGetUInt32(hash_any((const unsigned char *) k->data,
+								   k->channel_len + k->payload_len + 1));
+}
+
+/*
+ * notification_match: match function to use with notification_hash
+ */
+static int
+notification_match(const void *key1, const void *key2, Size keysize)
+{
+	const Notification *k1 = *(const Notification *const *) key1;
+	const Notification *k2 = *(const Notification *const *) key2;
+
+	Assert(keysize == sizeof(Notification *));
+	if (k1->channel_len == k2->channel_len &&
+		k1->payload_len == k2->payload_len &&
+		memcmp(k1->data, k2->data,
+			   k1->channel_len + k1->payload_len + 2) == 0)
+		return 0;				/* equal */
+	return 1;					/* not equal */
+}
+
+/* Clear the pendingActions and pendingNotifies lists. */
+static void
+ClearPendingActionsAndNotifies(void)
+{
+	/*
+	 * Everything's allocated in either TopTransactionContext or the context
+	 * for the subtransaction to which it corresponds.  So, there's nothing to
+	 * do here except reset the pointers; the space will be reclaimed when the
+	 * contexts are deleted.
+	 */
+	pendingActions = NULL;
+	pendingNotifies = NULL;
+}
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
new file mode 100644
index 0000000..e4b7ffd
--- /dev/null
+++ b/src/backend/commands/cluster.c
@@ -0,0 +1,1736 @@
+/*-------------------------------------------------------------------------
+ *
+ * cluster.c
+ *	  CLUSTER a table on an index.  This is now also used for VACUUM FULL.
+ *
+ * There is hardly anything left of Paul Brown's original implementation...
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994-5, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/cluster.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/amapi.h"
+#include "access/heapam.h"
+#include "access/multixact.h"
+#include "access/relscan.h"
+#include "access/tableam.h"
+#include "access/toast_internals.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/heap.h"
+#include "catalog/index.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/partition.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/toasting.h"
+#include "commands/cluster.h"
+#include "commands/defrem.h"
+#include "commands/progress.h"
+#include "commands/tablecmds.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "optimizer/optimizer.h"
+#include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/predicate.h"
+#include "utils/acl.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/pg_rusage.h"
+#include "utils/relmapper.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/tuplesort.h"
+
+/*
+ * This struct is used to pass around the information on tables to be
+ * clustered. We need this so we can make a list of them when invoked without
+ * a specific table/index pair.
+ */
+typedef struct
+{
+	Oid			tableOid;
+	Oid			indexOid;
+} RelToCluster;
+
+
+static void cluster_multiple_rels(List *rtcs, ClusterParams *params);
+static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
+static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
+							bool verbose, bool *pSwapToastByContent,
+							TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
+static List *get_tables_to_cluster(MemoryContext cluster_context);
+static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
+											   Oid indexOid);
+
+
+/*---------------------------------------------------------------------------
+ * This cluster code allows for clustering multiple tables at once. Because
+ * of this, we cannot just run everything on a single transaction, or we
+ * would be forced to acquire exclusive locks on all the tables being
+ * clustered, simultaneously --- very likely leading to deadlock.
+ *
+ * To solve this we follow a similar strategy to VACUUM code,
+ * clustering each relation in a separate transaction. For this to work,
+ * we need to:
+ *	- provide a separate memory context so that we can pass information in
+ *	  a way that survives across transactions
+ *	- start a new transaction every time a new relation is clustered
+ *	- check for validity of the information on to-be-clustered relations,
+ *	  as someone might have deleted a relation behind our back, or
+ *	  clustered one on a different index
+ *	- end the transaction
+ *
+ * The single-relation case does not have any such overhead.
+ *
+ * We also allow a relation to be specified without index.  In that case,
+ * the indisclustered bit will be looked up, and an ERROR will be thrown
+ * if there is no index with the bit set.
+ *---------------------------------------------------------------------------
+ */
+void
+cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
+{
+	ListCell   *lc;
+	ClusterParams params = {0};
+	bool		verbose = false;
+	Relation	rel = NULL;
+	Oid			indexOid = InvalidOid;
+	MemoryContext cluster_context;
+	List	   *rtcs;
+
+	/* Parse option list */
+	foreach(lc, stmt->params)
+	{
+		DefElem    *opt = (DefElem *) lfirst(lc);
+
+		if (strcmp(opt->defname, "verbose") == 0)
+			verbose = defGetBoolean(opt);
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized CLUSTER option \"%s\"",
+							opt->defname),
+					 parser_errposition(pstate, opt->location)));
+	}
+
+	params.options = (verbose ? CLUOPT_VERBOSE : 0);
+
+	if (stmt->relation != NULL)
+	{
+		/* This is the single-relation case. */
+		Oid			tableOid;
+
+		/*
+		 * Find, lock, and check permissions on the table.  We obtain
+		 * AccessExclusiveLock right away to avoid lock-upgrade hazard in the
+		 * single-transaction case.
+		 */
+		tableOid = RangeVarGetRelidExtended(stmt->relation,
+											AccessExclusiveLock,
+											0,
+											RangeVarCallbackOwnsTable, NULL);
+		rel = table_open(tableOid, NoLock);
+
+		/*
+		 * Reject clustering a remote temp table ... their local buffer
+		 * manager is not going to cope.
+		 */
+		if (RELATION_IS_OTHER_TEMP(rel))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot cluster temporary tables of other sessions")));
+
+		if (stmt->indexname == NULL)
+		{
+			ListCell   *index;
+
+			/* We need to find the index that has indisclustered set. */
+			foreach(index, RelationGetIndexList(rel))
+			{
+				indexOid = lfirst_oid(index);
+				if (get_index_isclustered(indexOid))
+					break;
+				indexOid = InvalidOid;
+			}
+
+			if (!OidIsValid(indexOid))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_OBJECT),
+						 errmsg("there is no previously clustered index for table \"%s\"",
+								stmt->relation->relname)));
+		}
+		else
+		{
+			/*
+			 * The index is expected to be in the same namespace as the
+			 * relation.
+			 */
+			indexOid = get_relname_relid(stmt->indexname,
+										 rel->rd_rel->relnamespace);
+			if (!OidIsValid(indexOid))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_OBJECT),
+						 errmsg("index \"%s\" for table \"%s\" does not exist",
+								stmt->indexname, stmt->relation->relname)));
+		}
+
+		if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		{
+			/* close relation, keep lock till commit */
+			table_close(rel, NoLock);
+
+			/* Do the job. */
+			cluster_rel(tableOid, indexOid, &params);
+
+			return;
+		}
+	}
+
+	/*
+	 * By here, we know we are in a multi-table situation.  In order to avoid
+	 * holding locks for too long, we want to process each table in its own
+	 * transaction.  This forces us to disallow running inside a user
+	 * transaction block.
+	 */
+	PreventInTransactionBlock(isTopLevel, "CLUSTER");
+
+	/* Also, we need a memory context to hold our list of relations */
+	cluster_context = AllocSetContextCreate(PortalContext,
+											"Cluster",
+											ALLOCSET_DEFAULT_SIZES);
+
+	/*
+	 * Either we're processing a partitioned table, or we were not given any
+	 * table name at all.  In either case, obtain a list of relations to
+	 * process.
+	 *
+	 * In the former case, an index name must have been given, so we don't
+	 * need to recheck its "indisclustered" bit, but we have to check that it
+	 * is an index that we can cluster on.  In the latter case, we set the
+	 * option bit to have indisclustered verified.
+	 *
+	 * Rechecking the relation itself is necessary here in all cases.
+	 */
+	params.options |= CLUOPT_RECHECK;
+	if (rel != NULL)
+	{
+		Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+		check_index_is_clusterable(rel, indexOid, AccessShareLock);
+		rtcs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
+
+		/* close relation, releasing lock on parent table */
+		table_close(rel, AccessExclusiveLock);
+	}
+	else
+	{
+		rtcs = get_tables_to_cluster(cluster_context);
+		params.options |= CLUOPT_RECHECK_ISCLUSTERED;
+	}
+
+	/* Do the job. */
+	cluster_multiple_rels(rtcs, &params);
+
+	/* Start a new transaction for the cleanup work. */
+	StartTransactionCommand();
+
+	/* Clean up working storage */
+	MemoryContextDelete(cluster_context);
+}
+
+/*
+ * Given a list of relations to cluster, process each of them in a separate
+ * transaction.
+ *
+ * We expect to be in a transaction at start, but there isn't one when we
+ * return.
+ */
+static void
+cluster_multiple_rels(List *rtcs, ClusterParams *params)
+{
+	ListCell   *lc;
+
+	/* Commit to get out of starting transaction */
+	PopActiveSnapshot();
+	CommitTransactionCommand();
+
+	/* Cluster the tables, each in a separate transaction */
+	foreach(lc, rtcs)
+	{
+		RelToCluster *rtc = (RelToCluster *) lfirst(lc);
+
+		/* Start a new transaction for each relation. */
+		StartTransactionCommand();
+
+		/* functions in indexes may want a snapshot set */
+		PushActiveSnapshot(GetTransactionSnapshot());
+
+		/* Do the job. */
+		cluster_rel(rtc->tableOid, rtc->indexOid, params);
+
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+	}
+}
+
+/*
+ * cluster_rel
+ *
+ * This clusters the table by creating a new, clustered table and
+ * swapping the relfilenodes of the new table and the old table, so
+ * the OID of the original table is preserved.  Thus we do not lose
+ * GRANT, inheritance nor references to this table (this was a bug
+ * in releases through 7.3).
+ *
+ * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
+ * the new table, it's better to create the indexes afterwards than to fill
+ * them incrementally while we load the table.
+ *
+ * If indexOid is InvalidOid, the table will be rewritten in physical order
+ * instead of index order.  This is the new implementation of VACUUM FULL,
+ * and error messages should refer to the operation as VACUUM not CLUSTER.
+ */
+void
+cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
+{
+	Relation	OldHeap;
+	Oid			save_userid;
+	int			save_sec_context;
+	int			save_nestlevel;
+	bool		verbose = ((params->options & CLUOPT_VERBOSE) != 0);
+	bool		recheck = ((params->options & CLUOPT_RECHECK) != 0);
+
+	/* Check for user-requested abort. */
+	CHECK_FOR_INTERRUPTS();
+
+	pgstat_progress_start_command(PROGRESS_COMMAND_CLUSTER, tableOid);
+	if (OidIsValid(indexOid))
+		pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
+									 PROGRESS_CLUSTER_COMMAND_CLUSTER);
+	else
+		pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
+									 PROGRESS_CLUSTER_COMMAND_VACUUM_FULL);
+
+	/*
+	 * We grab exclusive access to the target rel and index for the duration
+	 * of the transaction.  (This is redundant for the single-transaction
+	 * case, since cluster() already did it.)  The index lock is taken inside
+	 * check_index_is_clusterable.
+	 */
+	OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
+
+	/* If the table has gone away, we can skip processing it */
+	if (!OldHeap)
+	{
+		pgstat_progress_end_command();
+		return;
+	}
+
+	/*
+	 * Switch to the table owner's userid, so that any index functions are run
+	 * as that user.  Also lock down security-restricted operations and
+	 * arrange to make GUC variable changes local to this command.
+	 */
+	GetUserIdAndSecContext(&save_userid, &save_sec_context);
+	SetUserIdAndSecContext(OldHeap->rd_rel->relowner,
+						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
+	save_nestlevel = NewGUCNestLevel();
+
+	/*
+	 * Since we may open a new transaction for each relation, we have to check
+	 * that the relation still is what we think it is.
+	 *
+	 * If this is a single-transaction CLUSTER, we can skip these tests. We
+	 * *must* skip the one on indisclustered since it would reject an attempt
+	 * to cluster a not-previously-clustered index.
+	 */
+	if (recheck)
+	{
+		/* Check that the user still owns the relation */
+		if (!pg_class_ownercheck(tableOid, save_userid))
+		{
+			relation_close(OldHeap, AccessExclusiveLock);
+			goto out;
+		}
+
+		/*
+		 * Silently skip a temp table for a remote session.  Only doing this
+		 * check in the "recheck" case is appropriate (which currently means
+		 * somebody is executing a database-wide CLUSTER or on a partitioned
+		 * table), because there is another check in cluster() which will stop
+		 * any attempt to cluster remote temp tables by name.  There is
+		 * another check in cluster_rel which is redundant, but we leave it
+		 * for extra safety.
+		 */
+		if (RELATION_IS_OTHER_TEMP(OldHeap))
+		{
+			relation_close(OldHeap, AccessExclusiveLock);
+			goto out;
+		}
+
+		if (OidIsValid(indexOid))
+		{
+			/*
+			 * Check that the index still exists
+			 */
+			if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid)))
+			{
+				relation_close(OldHeap, AccessExclusiveLock);
+				goto out;
+			}
+
+			/*
+			 * Check that the index is still the one with indisclustered set,
+			 * if needed.
+			 */
+			if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
+				!get_index_isclustered(indexOid))
+			{
+				relation_close(OldHeap, AccessExclusiveLock);
+				goto out;
+			}
+		}
+	}
+
+	/*
+	 * We allow VACUUM FULL, but not CLUSTER, on shared catalogs.  CLUSTER
+	 * would work in most respects, but the index would only get marked as
+	 * indisclustered in the current database, leading to unexpected behavior
+	 * if CLUSTER were later invoked in another database.
+	 */
+	if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot cluster a shared catalog")));
+
+	/*
+	 * Don't process temp tables of other backends ... their local buffer
+	 * manager is not going to cope.
+	 */
+	if (RELATION_IS_OTHER_TEMP(OldHeap))
+	{
+		if (OidIsValid(indexOid))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot cluster temporary tables of other sessions")));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot vacuum temporary tables of other sessions")));
+	}
+
+	/*
+	 * Also check for active uses of the relation in the current transaction,
+	 * including open scans and pending AFTER trigger events.
+	 */
+	CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
+
+	/* Check heap and index are valid to cluster on */
+	if (OidIsValid(indexOid))
+		check_index_is_clusterable(OldHeap, indexOid, AccessExclusiveLock);
+
+	/*
+	 * Quietly ignore the request if this is a materialized view which has not
+	 * been populated from its query. No harm is done because there is no data
+	 * to deal with, and we don't want to throw an error if this is part of a
+	 * multi-relation request -- for example, CLUSTER was run on the entire
+	 * database.
+	 */
+	if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
+		!RelationIsPopulated(OldHeap))
+	{
+		relation_close(OldHeap, AccessExclusiveLock);
+		goto out;
+	}
+
+	Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION ||
+		   OldHeap->rd_rel->relkind == RELKIND_MATVIEW ||
+		   OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE);
+
+	/*
+	 * All predicate locks on the tuples or pages are about to be made
+	 * invalid, because we move tuples around.  Promote them to relation
+	 * locks.  Predicate locks on indexes will be promoted when they are
+	 * reindexed.
+	 */
+	TransferPredicateLocksToHeapRelation(OldHeap);
+
+	/* rebuild_relation does all the dirty work */
+	rebuild_relation(OldHeap, indexOid, verbose);
+
+	/* NB: rebuild_relation does table_close() on OldHeap */
+
+out:
+	/* Roll back any GUC changes executed by index functions */
+	AtEOXact_GUC(false, save_nestlevel);
+
+	/* Restore userid and security context */
+	SetUserIdAndSecContext(save_userid, save_sec_context);
+
+	pgstat_progress_end_command();
+}
+
+/*
+ * Verify that the specified heap and index are valid to cluster on
+ *
+ * Side effect: obtains lock on the index.  The caller may
+ * in some cases already have AccessExclusiveLock on the table, but
+ * not in all cases so we can't rely on the table-level lock for
+ * protection here.
+ */
+void
+check_index_is_clusterable(Relation OldHeap, Oid indexOid, LOCKMODE lockmode)
+{
+	Relation	OldIndex;
+
+	OldIndex = index_open(indexOid, lockmode);
+
+	/*
+	 * Check that index is in fact an index on the given relation
+	 */
+	if (OldIndex->rd_index == NULL ||
+		OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not an index for table \"%s\"",
+						RelationGetRelationName(OldIndex),
+						RelationGetRelationName(OldHeap))));
+
+	/* Index AM must allow clustering */
+	if (!OldIndex->rd_indam->amclusterable)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
+						RelationGetRelationName(OldIndex))));
+
+	/*
+	 * Disallow clustering on incomplete indexes (those that might not index
+	 * every row of the relation).  We could relax this by making a separate
+	 * seqscan pass over the table to copy the missing rows, but that seems
+	 * expensive and tedious.
+	 */
+	if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot cluster on partial index \"%s\"",
+						RelationGetRelationName(OldIndex))));
+
+	/*
+	 * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
+	 * it might well not contain entries for every heap row, or might not even
+	 * be internally consistent.  (But note that we don't check indcheckxmin;
+	 * the worst consequence of following broken HOT chains would be that we
+	 * might put recently-dead tuples out-of-order in the new table, and there
+	 * is little harm in that.)
+	 */
+	if (!OldIndex->rd_index->indisvalid)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot cluster on invalid index \"%s\"",
+						RelationGetRelationName(OldIndex))));
+
+	/* Drop relcache refcnt on OldIndex, but keep lock */
+	index_close(OldIndex, NoLock);
+}
+
+/*
+ * mark_index_clustered: mark the specified index as the one clustered on
+ *
+ * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
+ */
+void
+mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
+{
+	HeapTuple	indexTuple;
+	Form_pg_index indexForm;
+	Relation	pg_index;
+	ListCell   *index;
+
+	/* Disallow applying to a partitioned table */
+	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot mark index clustered in partitioned table")));
+
+	/*
+	 * If the index is already marked clustered, no need to do anything.
+	 */
+	if (OidIsValid(indexOid))
+	{
+		if (get_index_isclustered(indexOid))
+			return;
+	}
+
+	/*
+	 * Check each index of the relation and set/clear the bit as needed.
+	 */
+	pg_index = table_open(IndexRelationId, RowExclusiveLock);
+
+	foreach(index, RelationGetIndexList(rel))
+	{
+		Oid			thisIndexOid = lfirst_oid(index);
+
+		indexTuple = SearchSysCacheCopy1(INDEXRELID,
+										 ObjectIdGetDatum(thisIndexOid));
+		if (!HeapTupleIsValid(indexTuple))
+			elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
+		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+
+		/*
+		 * Unset the bit if set.  We know it's wrong because we checked this
+		 * earlier.
+		 */
+		if (indexForm->indisclustered)
+		{
+			indexForm->indisclustered = false;
+			CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
+		}
+		else if (thisIndexOid == indexOid)
+		{
+			/* this was checked earlier, but let's be real sure */
+			if (!indexForm->indisvalid)
+				elog(ERROR, "cannot cluster on invalid index %u", indexOid);
+			indexForm->indisclustered = true;
+			CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
+		}
+
+		InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
+									 InvalidOid, is_internal);
+
+		heap_freetuple(indexTuple);
+	}
+
+	table_close(pg_index, RowExclusiveLock);
+}
+
+/*
+ * rebuild_relation: rebuild an existing relation in index or physical order
+ *
+ * OldHeap: table to rebuild --- must be opened and exclusive-locked!
+ * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
+ *
+ * NB: this routine closes OldHeap at the right time; caller should not.
+ */
+static void
+rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
+{
+	Oid			tableOid = RelationGetRelid(OldHeap);
+	Oid			accessMethod = OldHeap->rd_rel->relam;
+	Oid			tableSpace = OldHeap->rd_rel->reltablespace;
+	Oid			OIDNewHeap;
+	char		relpersistence;
+	bool		is_system_catalog;
+	bool		swap_toast_by_content;
+	TransactionId frozenXid;
+	MultiXactId cutoffMulti;
+
+	if (OidIsValid(indexOid))
+		/* Mark the correct index as clustered */
+		mark_index_clustered(OldHeap, indexOid, true);
+
+	/* Remember info about rel before closing OldHeap */
+	relpersistence = OldHeap->rd_rel->relpersistence;
+	is_system_catalog = IsSystemRelation(OldHeap);
+
+	/* Close relcache entry, but keep lock until transaction commit */
+	table_close(OldHeap, NoLock);
+
+	/* Create the transient table that will receive the re-ordered data */
+	OIDNewHeap = make_new_heap(tableOid, tableSpace,
+							   accessMethod,
+							   relpersistence,
+							   AccessExclusiveLock);
+
+	/* Copy the heap data into the new table in the desired order */
+	copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
+					&swap_toast_by_content, &frozenXid, &cutoffMulti);
+
+	/*
+	 * Swap the physical files of the target and transient tables, then
+	 * rebuild the target's indexes and throw away the transient table.
+	 */
+	finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
+					 swap_toast_by_content, false, true,
+					 frozenXid, cutoffMulti,
+					 relpersistence);
+}
+
+
+/*
+ * Create the transient table that will be filled with new data during
+ * CLUSTER, ALTER TABLE, and similar operations.  The transient table
+ * duplicates the logical structure of the OldHeap; but will have the
+ * specified physical storage properties NewTableSpace, NewAccessMethod, and
+ * relpersistence.
+ *
+ * After this, the caller should load the new heap with transferred/modified
+ * data, then call finish_heap_swap to complete the operation.
+ */
+Oid
+make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
+			  char relpersistence, LOCKMODE lockmode)
+{
+	TupleDesc	OldHeapDesc;
+	char		NewHeapName[NAMEDATALEN];
+	Oid			OIDNewHeap;
+	Oid			toastid;
+	Relation	OldHeap;
+	HeapTuple	tuple;
+	Datum		reloptions;
+	bool		isNull;
+	Oid			namespaceid;
+
+	OldHeap = table_open(OIDOldHeap, lockmode);
+	OldHeapDesc = RelationGetDescr(OldHeap);
+
+	/*
+	 * Note that the NewHeap will not receive any of the defaults or
+	 * constraints associated with the OldHeap; we don't need 'em, and there's
+	 * no reason to spend cycles inserting them into the catalogs only to
+	 * delete them.
+	 */
+
+	/*
+	 * But we do want to use reloptions of the old heap for new heap.
+	 */
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
+	reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
+								 &isNull);
+	if (isNull)
+		reloptions = (Datum) 0;
+
+	if (relpersistence == RELPERSISTENCE_TEMP)
+		namespaceid = LookupCreationNamespace("pg_temp");
+	else
+		namespaceid = RelationGetNamespace(OldHeap);
+
+	/*
+	 * Create the new heap, using a temporary name in the same namespace as
+	 * the existing table.  NOTE: there is some risk of collision with user
+	 * relnames.  Working around this seems more trouble than it's worth; in
+	 * particular, we can't create the new heap in a different namespace from
+	 * the old, or we will have problems with the TEMP status of temp tables.
+	 *
+	 * Note: the new heap is not a shared relation, even if we are rebuilding
+	 * a shared rel.  However, we do make the new heap mapped if the source is
+	 * mapped.  This simplifies swap_relation_files, and is absolutely
+	 * necessary for rebuilding pg_class, for reasons explained there.
+	 */
+	snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
+
+	OIDNewHeap = heap_create_with_catalog(NewHeapName,
+										  namespaceid,
+										  NewTableSpace,
+										  InvalidOid,
+										  InvalidOid,
+										  InvalidOid,
+										  OldHeap->rd_rel->relowner,
+										  NewAccessMethod,
+										  OldHeapDesc,
+										  NIL,
+										  RELKIND_RELATION,
+										  relpersistence,
+										  false,
+										  RelationIsMapped(OldHeap),
+										  ONCOMMIT_NOOP,
+										  reloptions,
+										  false,
+										  true,
+										  true,
+										  OIDOldHeap,
+										  NULL);
+	Assert(OIDNewHeap != InvalidOid);
+
+	ReleaseSysCache(tuple);
+
+	/*
+	 * Advance command counter so that the newly-created relation's catalog
+	 * tuples will be visible to table_open.
+	 */
+	CommandCounterIncrement();
+
+	/*
+	 * If necessary, create a TOAST table for the new relation.
+	 *
+	 * If the relation doesn't have a TOAST table already, we can't need one
+	 * for the new relation.  The other way around is possible though: if some
+	 * wide columns have been dropped, NewHeapCreateToastTable can decide that
+	 * no TOAST table is needed for the new table.
+	 *
+	 * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
+	 * that the TOAST table will be visible for insertion.
+	 */
+	toastid = OldHeap->rd_rel->reltoastrelid;
+	if (OidIsValid(toastid))
+	{
+		/* keep the existing toast table's reloptions, if any */
+		tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "cache lookup failed for relation %u", toastid);
+		reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
+									 &isNull);
+		if (isNull)
+			reloptions = (Datum) 0;
+
+		NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid);
+
+		ReleaseSysCache(tuple);
+	}
+
+	table_close(OldHeap, NoLock);
+
+	return OIDNewHeap;
+}
+
+/*
+ * Do the physical copying of table data.
+ *
+ * There are three output parameters:
+ * *pSwapToastByContent is set true if toast tables must be swapped by content.
+ * *pFreezeXid receives the TransactionId used as freeze cutoff point.
+ * *pCutoffMulti receives the MultiXactId used as a cutoff point.
+ */
+static void
+copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
+				bool *pSwapToastByContent, TransactionId *pFreezeXid,
+				MultiXactId *pCutoffMulti)
+{
+	Relation	NewHeap,
+				OldHeap,
+				OldIndex;
+	Relation	relRelation;
+	HeapTuple	reltup;
+	Form_pg_class relform;
+	TupleDesc	oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
+	TupleDesc	newTupDesc PG_USED_FOR_ASSERTS_ONLY;
+	TransactionId OldestXmin,
+				FreezeXid;
+	MultiXactId OldestMxact,
+				MultiXactCutoff;
+	bool		use_sort;
+	double		num_tuples = 0,
+				tups_vacuumed = 0,
+				tups_recently_dead = 0;
+	BlockNumber num_pages;
+	int			elevel = verbose ? INFO : DEBUG2;
+	PGRUsage	ru0;
+	char	   *nspname;
+
+	pg_rusage_init(&ru0);
+
+	/*
+	 * Open the relations we need.
+	 */
+	NewHeap = table_open(OIDNewHeap, AccessExclusiveLock);
+	OldHeap = table_open(OIDOldHeap, AccessExclusiveLock);
+	if (OidIsValid(OIDOldIndex))
+		OldIndex = index_open(OIDOldIndex, AccessExclusiveLock);
+	else
+		OldIndex = NULL;
+
+	/* Store a copy of the namespace name for logging purposes */
+	nspname = get_namespace_name(RelationGetNamespace(OldHeap));
+
+	/*
+	 * Their tuple descriptors should be exactly alike, but here we only need
+	 * assume that they have the same number of columns.
+	 */
+	oldTupDesc = RelationGetDescr(OldHeap);
+	newTupDesc = RelationGetDescr(NewHeap);
+	Assert(newTupDesc->natts == oldTupDesc->natts);
+
+	/*
+	 * If the OldHeap has a toast table, get lock on the toast table to keep
+	 * it from being vacuumed.  This is needed because autovacuum processes
+	 * toast tables independently of their main tables, with no lock on the
+	 * latter.  If an autovacuum were to start on the toast table after we
+	 * compute our OldestXmin below, it would use a later OldestXmin, and then
+	 * possibly remove as DEAD toast tuples belonging to main tuples we think
+	 * are only RECENTLY_DEAD.  Then we'd fail while trying to copy those
+	 * tuples.
+	 *
+	 * We don't need to open the toast relation here, just lock it.  The lock
+	 * will be held till end of transaction.
+	 */
+	if (OldHeap->rd_rel->reltoastrelid)
+		LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
+
+	/*
+	 * If both tables have TOAST tables, perform toast swap by content.  It is
+	 * possible that the old table has a toast table but the new one doesn't,
+	 * if toastable columns have been dropped.  In that case we have to do
+	 * swap by links.  This is okay because swap by content is only essential
+	 * for system catalogs, and we don't support schema changes for them.
+	 */
+	if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
+	{
+		*pSwapToastByContent = true;
+
+		/*
+		 * When doing swap by content, any toast pointers written into NewHeap
+		 * must use the old toast table's OID, because that's where the toast
+		 * data will eventually be found.  Set this up by setting rd_toastoid.
+		 * This also tells toast_save_datum() to preserve the toast value
+		 * OIDs, which we want so as not to invalidate toast pointers in
+		 * system catalog caches, and to avoid making multiple copies of a
+		 * single toast value.
+		 *
+		 * Note that we must hold NewHeap open until we are done writing data,
+		 * since the relcache will not guarantee to remember this setting once
+		 * the relation is closed.  Also, this technique depends on the fact
+		 * that no one will try to read from the NewHeap until after we've
+		 * finished writing it and swapping the rels --- otherwise they could
+		 * follow the toast pointers to the wrong place.  (It would actually
+		 * work for values copied over from the old toast table, but not for
+		 * any values that we toast which were previously not toasted.)
+		 */
+		NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
+	}
+	else
+		*pSwapToastByContent = false;
+
+	/*
+	 * Compute xids used to freeze and weed out dead tuples and multixacts.
+	 * Since we're going to rewrite the whole table anyway, there's no reason
+	 * not to be aggressive about this.
+	 */
+	vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, &OldestXmin, &OldestMxact,
+						  &FreezeXid, &MultiXactCutoff);
+
+	/*
+	 * FreezeXid will become the table's new relfrozenxid, and that mustn't go
+	 * backwards, so take the max.
+	 */
+	if (TransactionIdIsValid(OldHeap->rd_rel->relfrozenxid) &&
+		TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid))
+		FreezeXid = OldHeap->rd_rel->relfrozenxid;
+
+	/*
+	 * MultiXactCutoff, similarly, shouldn't go backwards either.
+	 */
+	if (MultiXactIdIsValid(OldHeap->rd_rel->relminmxid) &&
+		MultiXactIdPrecedes(MultiXactCutoff, OldHeap->rd_rel->relminmxid))
+		MultiXactCutoff = OldHeap->rd_rel->relminmxid;
+
+	/*
+	 * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
+	 * the OldHeap.  We know how to use a sort to duplicate the ordering of a
+	 * btree index, and will use seqscan-and-sort for that case if the planner
+	 * tells us it's cheaper.  Otherwise, always indexscan if an index is
+	 * provided, else plain seqscan.
+	 */
+	if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
+		use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex);
+	else
+		use_sort = false;
+
+	/* Log what we're doing */
+	if (OldIndex != NULL && !use_sort)
+		ereport(elevel,
+				(errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
+						nspname,
+						RelationGetRelationName(OldHeap),
+						RelationGetRelationName(OldIndex))));
+	else if (use_sort)
+		ereport(elevel,
+				(errmsg("clustering \"%s.%s\" using sequential scan and sort",
+						nspname,
+						RelationGetRelationName(OldHeap))));
+	else
+		ereport(elevel,
+				(errmsg("vacuuming \"%s.%s\"",
+						nspname,
+						RelationGetRelationName(OldHeap))));
+
+	/*
+	 * Hand off the actual copying to AM specific function, the generic code
+	 * cannot know how to deal with visibility across AMs. Note that this
+	 * routine is allowed to set FreezeXid / MultiXactCutoff to different
+	 * values (e.g. because the AM doesn't use freezing).
+	 */
+	table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
+									OldestXmin, &FreezeXid, &MultiXactCutoff,
+									&num_tuples, &tups_vacuumed,
+									&tups_recently_dead);
+
+	/* return selected values to caller, get set as relfrozenxid/minmxid */
+	*pFreezeXid = FreezeXid;
+	*pCutoffMulti = MultiXactCutoff;
+
+	/* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
+	NewHeap->rd_toastoid = InvalidOid;
+
+	num_pages = RelationGetNumberOfBlocks(NewHeap);
+
+	/* Log what we did */
+	ereport(elevel,
+			(errmsg("\"%s.%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
+					nspname,
+					RelationGetRelationName(OldHeap),
+					tups_vacuumed, num_tuples,
+					RelationGetNumberOfBlocks(OldHeap)),
+			 errdetail("%.0f dead row versions cannot be removed yet.\n"
+					   "%s.",
+					   tups_recently_dead,
+					   pg_rusage_show(&ru0))));
+
+	if (OldIndex != NULL)
+		index_close(OldIndex, NoLock);
+	table_close(OldHeap, NoLock);
+	table_close(NewHeap, NoLock);
+
+	/* Update pg_class to reflect the correct values of pages and tuples. */
+	relRelation = table_open(RelationRelationId, RowExclusiveLock);
+
+	reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap));
+	if (!HeapTupleIsValid(reltup))
+		elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap);
+	relform = (Form_pg_class) GETSTRUCT(reltup);
+
+	relform->relpages = num_pages;
+	relform->reltuples = num_tuples;
+
+	/* Don't update the stats for pg_class.  See swap_relation_files. */
+	if (OIDOldHeap != RelationRelationId)
+		CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
+	else
+		CacheInvalidateRelcacheByTuple(reltup);
+
+	/* Clean up. */
+	heap_freetuple(reltup);
+	table_close(relRelation, RowExclusiveLock);
+
+	/* Make the update visible */
+	CommandCounterIncrement();
+}
+
+/*
+ * Swap the physical files of two given relations.
+ *
+ * We swap the physical identity (reltablespace, relfilenode) while keeping the
+ * same logical identities of the two relations.  relpersistence is also
+ * swapped, which is critical since it determines where buffers live for each
+ * relation.
+ *
+ * We can swap associated TOAST data in either of two ways: recursively swap
+ * the physical content of the toast tables (and their indexes), or swap the
+ * TOAST links in the given relations' pg_class entries.  The former is needed
+ * to manage rewrites of shared catalogs (where we cannot change the pg_class
+ * links) while the latter is the only way to handle cases in which a toast
+ * table is added or removed altogether.
+ *
+ * Additionally, the first relation is marked with relfrozenxid set to
+ * frozenXid.  It seems a bit ugly to have this here, but the caller would
+ * have to do it anyway, so having it here saves a heap_update.  Note: in
+ * the swap-toast-links case, we assume we don't need to change the toast
+ * table's relfrozenxid: the new version of the toast table should already
+ * have relfrozenxid set to RecentXmin, which is good enough.
+ *
+ * Lastly, if r2 and its toast table and toast index (if any) are mapped,
+ * their OIDs are emitted into mapped_tables[].  This is hacky but beats
+ * having to look the information up again later in finish_heap_swap.
+ */
+static void
+swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
+					bool swap_toast_by_content,
+					bool is_internal,
+					TransactionId frozenXid,
+					MultiXactId cutoffMulti,
+					Oid *mapped_tables)
+{
+	Relation	relRelation;
+	HeapTuple	reltup1,
+				reltup2;
+	Form_pg_class relform1,
+				relform2;
+	Oid			relfilenode1,
+				relfilenode2;
+	Oid			swaptemp;
+	char		swptmpchr;
+	Oid			relam1,
+				relam2;
+
+	/* We need writable copies of both pg_class tuples. */
+	relRelation = table_open(RelationRelationId, RowExclusiveLock);
+
+	reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1));
+	if (!HeapTupleIsValid(reltup1))
+		elog(ERROR, "cache lookup failed for relation %u", r1);
+	relform1 = (Form_pg_class) GETSTRUCT(reltup1);
+
+	reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2));
+	if (!HeapTupleIsValid(reltup2))
+		elog(ERROR, "cache lookup failed for relation %u", r2);
+	relform2 = (Form_pg_class) GETSTRUCT(reltup2);
+
+	relfilenode1 = relform1->relfilenode;
+	relfilenode2 = relform2->relfilenode;
+	relam1 = relform1->relam;
+	relam2 = relform2->relam;
+
+	if (OidIsValid(relfilenode1) && OidIsValid(relfilenode2))
+	{
+		/*
+		 * Normal non-mapped relations: swap relfilenodes, reltablespaces,
+		 * relpersistence
+		 */
+		Assert(!target_is_pg_class);
+
+		swaptemp = relform1->relfilenode;
+		relform1->relfilenode = relform2->relfilenode;
+		relform2->relfilenode = swaptemp;
+
+		swaptemp = relform1->reltablespace;
+		relform1->reltablespace = relform2->reltablespace;
+		relform2->reltablespace = swaptemp;
+
+		swaptemp = relform1->relam;
+		relform1->relam = relform2->relam;
+		relform2->relam = swaptemp;
+
+		swptmpchr = relform1->relpersistence;
+		relform1->relpersistence = relform2->relpersistence;
+		relform2->relpersistence = swptmpchr;
+
+		/* Also swap toast links, if we're swapping by links */
+		if (!swap_toast_by_content)
+		{
+			swaptemp = relform1->reltoastrelid;
+			relform1->reltoastrelid = relform2->reltoastrelid;
+			relform2->reltoastrelid = swaptemp;
+		}
+	}
+	else
+	{
+		/*
+		 * Mapped-relation case.  Here we have to swap the relation mappings
+		 * instead of modifying the pg_class columns.  Both must be mapped.
+		 */
+		if (OidIsValid(relfilenode1) || OidIsValid(relfilenode2))
+			elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
+				 NameStr(relform1->relname));
+
+		/*
+		 * We can't change the tablespace nor persistence of a mapped rel, and
+		 * we can't handle toast link swapping for one either, because we must
+		 * not apply any critical changes to its pg_class row.  These cases
+		 * should be prevented by upstream permissions tests, so these checks
+		 * are non-user-facing emergency backstop.
+		 */
+		if (relform1->reltablespace != relform2->reltablespace)
+			elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
+				 NameStr(relform1->relname));
+		if (relform1->relpersistence != relform2->relpersistence)
+			elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
+				 NameStr(relform1->relname));
+		if (relform1->relam != relform2->relam)
+			elog(ERROR, "cannot change access method of mapped relation \"%s\"",
+				 NameStr(relform1->relname));
+		if (!swap_toast_by_content &&
+			(relform1->reltoastrelid || relform2->reltoastrelid))
+			elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
+				 NameStr(relform1->relname));
+
+		/*
+		 * Fetch the mappings --- shouldn't fail, but be paranoid
+		 */
+		relfilenode1 = RelationMapOidToFilenode(r1, relform1->relisshared);
+		if (!OidIsValid(relfilenode1))
+			elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
+				 NameStr(relform1->relname), r1);
+		relfilenode2 = RelationMapOidToFilenode(r2, relform2->relisshared);
+		if (!OidIsValid(relfilenode2))
+			elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
+				 NameStr(relform2->relname), r2);
+
+		/*
+		 * Send replacement mappings to relmapper.  Note these won't actually
+		 * take effect until CommandCounterIncrement.
+		 */
+		RelationMapUpdateMap(r1, relfilenode2, relform1->relisshared, false);
+		RelationMapUpdateMap(r2, relfilenode1, relform2->relisshared, false);
+
+		/* Pass OIDs of mapped r2 tables back to caller */
+		*mapped_tables++ = r2;
+	}
+
+	/*
+	 * Recognize that rel1's relfilenode (swapped from rel2) is new in this
+	 * subtransaction. The rel2 storage (swapped from rel1) may or may not be
+	 * new.
+	 */
+	{
+		Relation	rel1,
+					rel2;
+
+		rel1 = relation_open(r1, NoLock);
+		rel2 = relation_open(r2, NoLock);
+		rel2->rd_createSubid = rel1->rd_createSubid;
+		rel2->rd_newRelfilenodeSubid = rel1->rd_newRelfilenodeSubid;
+		rel2->rd_firstRelfilenodeSubid = rel1->rd_firstRelfilenodeSubid;
+		RelationAssumeNewRelfilenode(rel1);
+		relation_close(rel1, NoLock);
+		relation_close(rel2, NoLock);
+	}
+
+	/*
+	 * In the case of a shared catalog, these next few steps will only affect
+	 * our own database's pg_class row; but that's okay, because they are all
+	 * noncritical updates.  That's also an important fact for the case of a
+	 * mapped catalog, because it's possible that we'll commit the map change
+	 * and then fail to commit the pg_class update.
+	 */
+
+	/* set rel1's frozen Xid and minimum MultiXid */
+	if (relform1->relkind != RELKIND_INDEX)
+	{
+		Assert(!TransactionIdIsValid(frozenXid) ||
+			   TransactionIdIsNormal(frozenXid));
+		relform1->relfrozenxid = frozenXid;
+		relform1->relminmxid = cutoffMulti;
+	}
+
+	/* swap size statistics too, since new rel has freshly-updated stats */
+	{
+		int32		swap_pages;
+		float4		swap_tuples;
+		int32		swap_allvisible;
+
+		swap_pages = relform1->relpages;
+		relform1->relpages = relform2->relpages;
+		relform2->relpages = swap_pages;
+
+		swap_tuples = relform1->reltuples;
+		relform1->reltuples = relform2->reltuples;
+		relform2->reltuples = swap_tuples;
+
+		swap_allvisible = relform1->relallvisible;
+		relform1->relallvisible = relform2->relallvisible;
+		relform2->relallvisible = swap_allvisible;
+	}
+
+	/*
+	 * Update the tuples in pg_class --- unless the target relation of the
+	 * swap is pg_class itself.  In that case, there is zero point in making
+	 * changes because we'd be updating the old data that we're about to throw
+	 * away.  Because the real work being done here for a mapped relation is
+	 * just to change the relation map settings, it's all right to not update
+	 * the pg_class rows in this case. The most important changes will instead
+	 * performed later, in finish_heap_swap() itself.
+	 */
+	if (!target_is_pg_class)
+	{
+		CatalogIndexState indstate;
+
+		indstate = CatalogOpenIndexes(relRelation);
+		CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1,
+								   indstate);
+		CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2,
+								   indstate);
+		CatalogCloseIndexes(indstate);
+	}
+	else
+	{
+		/* no update ... but we do still need relcache inval */
+		CacheInvalidateRelcacheByTuple(reltup1);
+		CacheInvalidateRelcacheByTuple(reltup2);
+	}
+
+	/*
+	 * Now that pg_class has been updated with its relevant information for
+	 * the swap, update the dependency of the relations to point to their new
+	 * table AM, if it has changed.
+	 */
+	if (relam1 != relam2)
+	{
+		if (changeDependencyFor(RelationRelationId,
+								r1,
+								AccessMethodRelationId,
+								relam1,
+								relam2) != 1)
+			elog(ERROR, "failed to change access method dependency for relation \"%s.%s\"",
+				 get_namespace_name(get_rel_namespace(r1)),
+				 get_rel_name(r1));
+		if (changeDependencyFor(RelationRelationId,
+								r2,
+								AccessMethodRelationId,
+								relam2,
+								relam1) != 1)
+			elog(ERROR, "failed to change access method dependency for relation \"%s.%s\"",
+				 get_namespace_name(get_rel_namespace(r2)),
+				 get_rel_name(r2));
+	}
+
+	/*
+	 * Post alter hook for modified relations. The change to r2 is always
+	 * internal, but r1 depends on the invocation context.
+	 */
+	InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0,
+								 InvalidOid, is_internal);
+	InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0,
+								 InvalidOid, true);
+
+	/*
+	 * If we have toast tables associated with the relations being swapped,
+	 * deal with them too.
+	 */
+	if (relform1->reltoastrelid || relform2->reltoastrelid)
+	{
+		if (swap_toast_by_content)
+		{
+			if (relform1->reltoastrelid && relform2->reltoastrelid)
+			{
+				/* Recursively swap the contents of the toast tables */
+				swap_relation_files(relform1->reltoastrelid,
+									relform2->reltoastrelid,
+									target_is_pg_class,
+									swap_toast_by_content,
+									is_internal,
+									frozenXid,
+									cutoffMulti,
+									mapped_tables);
+			}
+			else
+			{
+				/* caller messed up */
+				elog(ERROR, "cannot swap toast files by content when there's only one");
+			}
+		}
+		else
+		{
+			/*
+			 * We swapped the ownership links, so we need to change dependency
+			 * data to match.
+			 *
+			 * NOTE: it is possible that only one table has a toast table.
+			 *
+			 * NOTE: at present, a TOAST table's only dependency is the one on
+			 * its owning table.  If more are ever created, we'd need to use
+			 * something more selective than deleteDependencyRecordsFor() to
+			 * get rid of just the link we want.
+			 */
+			ObjectAddress baseobject,
+						toastobject;
+			long		count;
+
+			/*
+			 * We disallow this case for system catalogs, to avoid the
+			 * possibility that the catalog we're rebuilding is one of the
+			 * ones the dependency changes would change.  It's too late to be
+			 * making any data changes to the target catalog.
+			 */
+			if (IsSystemClass(r1, relform1))
+				elog(ERROR, "cannot swap toast files by links for system catalogs");
+
+			/* Delete old dependencies */
+			if (relform1->reltoastrelid)
+			{
+				count = deleteDependencyRecordsFor(RelationRelationId,
+												   relform1->reltoastrelid,
+												   false);
+				if (count != 1)
+					elog(ERROR, "expected one dependency record for TOAST table, found %ld",
+						 count);
+			}
+			if (relform2->reltoastrelid)
+			{
+				count = deleteDependencyRecordsFor(RelationRelationId,
+												   relform2->reltoastrelid,
+												   false);
+				if (count != 1)
+					elog(ERROR, "expected one dependency record for TOAST table, found %ld",
+						 count);
+			}
+
+			/* Register new dependencies */
+			baseobject.classId = RelationRelationId;
+			baseobject.objectSubId = 0;
+			toastobject.classId = RelationRelationId;
+			toastobject.objectSubId = 0;
+
+			if (relform1->reltoastrelid)
+			{
+				baseobject.objectId = r1;
+				toastobject.objectId = relform1->reltoastrelid;
+				recordDependencyOn(&toastobject, &baseobject,
+								   DEPENDENCY_INTERNAL);
+			}
+
+			if (relform2->reltoastrelid)
+			{
+				baseobject.objectId = r2;
+				toastobject.objectId = relform2->reltoastrelid;
+				recordDependencyOn(&toastobject, &baseobject,
+								   DEPENDENCY_INTERNAL);
+			}
+		}
+	}
+
+	/*
+	 * If we're swapping two toast tables by content, do the same for their
+	 * valid index. The swap can actually be safely done only if the relations
+	 * have indexes.
+	 */
+	if (swap_toast_by_content &&
+		relform1->relkind == RELKIND_TOASTVALUE &&
+		relform2->relkind == RELKIND_TOASTVALUE)
+	{
+		Oid			toastIndex1,
+					toastIndex2;
+
+		/* Get valid index for each relation */
+		toastIndex1 = toast_get_valid_index(r1,
+											AccessExclusiveLock);
+		toastIndex2 = toast_get_valid_index(r2,
+											AccessExclusiveLock);
+
+		swap_relation_files(toastIndex1,
+							toastIndex2,
+							target_is_pg_class,
+							swap_toast_by_content,
+							is_internal,
+							InvalidTransactionId,
+							InvalidMultiXactId,
+							mapped_tables);
+	}
+
+	/* Clean up. */
+	heap_freetuple(reltup1);
+	heap_freetuple(reltup2);
+
+	table_close(relRelation, RowExclusiveLock);
+
+	/*
+	 * Close both relcache entries' smgr links.  We need this kluge because
+	 * both links will be invalidated during upcoming CommandCounterIncrement.
+	 * Whichever of the rels is the second to be cleared will have a dangling
+	 * reference to the other's smgr entry.  Rather than trying to avoid this
+	 * by ordering operations just so, it's easiest to close the links first.
+	 * (Fortunately, since one of the entries is local in our transaction,
+	 * it's sufficient to clear out our own relcache this way; the problem
+	 * cannot arise for other backends when they see our update on the
+	 * non-transient relation.)
+	 *
+	 * Caution: the placement of this step interacts with the decision to
+	 * handle toast rels by recursion.  When we are trying to rebuild pg_class
+	 * itself, the smgr close on pg_class must happen after all accesses in
+	 * this function.
+	 */
+	RelationCloseSmgrByOid(r1);
+	RelationCloseSmgrByOid(r2);
+}
+
+/*
+ * Remove the transient table that was built by make_new_heap, and finish
+ * cleaning up (including rebuilding all indexes on the old heap).
+ */
+void
+finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
+				 bool is_system_catalog,
+				 bool swap_toast_by_content,
+				 bool check_constraints,
+				 bool is_internal,
+				 TransactionId frozenXid,
+				 MultiXactId cutoffMulti,
+				 char newrelpersistence)
+{
+	ObjectAddress object;
+	Oid			mapped_tables[4];
+	int			reindex_flags;
+	ReindexParams reindex_params = {0};
+	int			i;
+
+	/* Report that we are now swapping relation files */
+	pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
+								 PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES);
+
+	/* Zero out possible results from swapped_relation_files */
+	memset(mapped_tables, 0, sizeof(mapped_tables));
+
+	/*
+	 * Swap the contents of the heap relations (including any toast tables).
+	 * Also set old heap's relfrozenxid to frozenXid.
+	 */
+	swap_relation_files(OIDOldHeap, OIDNewHeap,
+						(OIDOldHeap == RelationRelationId),
+						swap_toast_by_content, is_internal,
+						frozenXid, cutoffMulti, mapped_tables);
+
+	/*
+	 * If it's a system catalog, queue a sinval message to flush all catcaches
+	 * on the catalog when we reach CommandCounterIncrement.
+	 */
+	if (is_system_catalog)
+		CacheInvalidateCatalog(OIDOldHeap);
+
+	/*
+	 * Rebuild each index on the relation (but not the toast table, which is
+	 * all-new at this point).  It is important to do this before the DROP
+	 * step because if we are processing a system catalog that will be used
+	 * during DROP, we want to have its indexes available.  There is no
+	 * advantage to the other order anyway because this is all transactional,
+	 * so no chance to reclaim disk space before commit.  We do not need a
+	 * final CommandCounterIncrement() because reindex_relation does it.
+	 *
+	 * Note: because index_build is called via reindex_relation, it will never
+	 * set indcheckxmin true for the indexes.  This is OK even though in some
+	 * sense we are building new indexes rather than rebuilding existing ones,
+	 * because the new heap won't contain any HOT chains at all, let alone
+	 * broken ones, so it can't be necessary to set indcheckxmin.
+	 */
+	reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
+	if (check_constraints)
+		reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
+
+	/*
+	 * Ensure that the indexes have the same persistence as the parent
+	 * relation.
+	 */
+	if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
+		reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED;
+	else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
+		reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT;
+
+	/* Report that we are now reindexing relations */
+	pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
+								 PROGRESS_CLUSTER_PHASE_REBUILD_INDEX);
+
+	reindex_relation(OIDOldHeap, reindex_flags, &reindex_params);
+
+	/* Report that we are now doing clean up */
+	pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
+								 PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP);
+
+	/*
+	 * If the relation being rebuilt is pg_class, swap_relation_files()
+	 * couldn't update pg_class's own pg_class entry (check comments in
+	 * swap_relation_files()), thus relfrozenxid was not updated. That's
+	 * annoying because a potential reason for doing a VACUUM FULL is a
+	 * imminent or actual anti-wraparound shutdown.  So, now that we can
+	 * access the new relation using its indices, update relfrozenxid.
+	 * pg_class doesn't have a toast relation, so we don't need to update the
+	 * corresponding toast relation. Not that there's little point moving all
+	 * relfrozenxid updates here since swap_relation_files() needs to write to
+	 * pg_class for non-mapped relations anyway.
+	 */
+	if (OIDOldHeap == RelationRelationId)
+	{
+		Relation	relRelation;
+		HeapTuple	reltup;
+		Form_pg_class relform;
+
+		relRelation = table_open(RelationRelationId, RowExclusiveLock);
+
+		reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap));
+		if (!HeapTupleIsValid(reltup))
+			elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
+		relform = (Form_pg_class) GETSTRUCT(reltup);
+
+		relform->relfrozenxid = frozenXid;
+		relform->relminmxid = cutoffMulti;
+
+		CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
+
+		table_close(relRelation, RowExclusiveLock);
+	}
+
+	/* Destroy new heap with old filenode */
+	object.classId = RelationRelationId;
+	object.objectId = OIDNewHeap;
+	object.objectSubId = 0;
+
+	/*
+	 * The new relation is local to our transaction and we know nothing
+	 * depends on it, so DROP_RESTRICT should be OK.
+	 */
+	performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
+
+	/* performDeletion does CommandCounterIncrement at end */
+
+	/*
+	 * Now we must remove any relation mapping entries that we set up for the
+	 * transient table, as well as its toast table and toast index if any. If
+	 * we fail to do this before commit, the relmapper will complain about new
+	 * permanent map entries being added post-bootstrap.
+	 */
+	for (i = 0; OidIsValid(mapped_tables[i]); i++)
+		RelationMapRemoveMapping(mapped_tables[i]);
+
+	/*
+	 * At this point, everything is kosher except that, if we did toast swap
+	 * by links, the toast table's name corresponds to the transient table.
+	 * The name is irrelevant to the backend because it's referenced by OID,
+	 * but users looking at the catalogs could be confused.  Rename it to
+	 * prevent this problem.
+	 *
+	 * Note no lock required on the relation, because we already hold an
+	 * exclusive lock on it.
+	 */
+	if (!swap_toast_by_content)
+	{
+		Relation	newrel;
+
+		newrel = table_open(OIDOldHeap, NoLock);
+		if (OidIsValid(newrel->rd_rel->reltoastrelid))
+		{
+			Oid			toastidx;
+			char		NewToastName[NAMEDATALEN];
+
+			/* Get the associated valid index to be renamed */
+			toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
+											 NoLock);
+
+			/* rename the toast table ... */
+			snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
+					 OIDOldHeap);
+			RenameRelationInternal(newrel->rd_rel->reltoastrelid,
+								   NewToastName, true, false);
+
+			/* ... and its valid index too. */
+			snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
+					 OIDOldHeap);
+
+			RenameRelationInternal(toastidx,
+								   NewToastName, true, true);
+
+			/*
+			 * Reset the relrewrite for the toast. The command-counter
+			 * increment is required here as we are about to update the tuple
+			 * that is updated as part of RenameRelationInternal.
+			 */
+			CommandCounterIncrement();
+			ResetRelRewrite(newrel->rd_rel->reltoastrelid);
+		}
+		relation_close(newrel, NoLock);
+	}
+
+	/* if it's not a catalog table, clear any missing attribute settings */
+	if (!is_system_catalog)
+	{
+		Relation	newrel;
+
+		newrel = table_open(OIDOldHeap, NoLock);
+		RelationClearMissing(newrel);
+		relation_close(newrel, NoLock);
+	}
+}
+
+
+/*
+ * Get a list of tables that the current user owns and
+ * have indisclustered set.  Return the list in a List * of RelToCluster
+ * (stored in the specified memory context), each one giving the tableOid
+ * and the indexOid on which the table is already clustered.
+ */
+static List *
+get_tables_to_cluster(MemoryContext cluster_context)
+{
+	Relation	indRelation;
+	TableScanDesc scan;
+	ScanKeyData entry;
+	HeapTuple	indexTuple;
+	Form_pg_index index;
+	MemoryContext old_context;
+	List	   *rtcs = NIL;
+
+	/*
+	 * Get all indexes that have indisclustered set and are owned by
+	 * appropriate user.
+	 */
+	indRelation = table_open(IndexRelationId, AccessShareLock);
+	ScanKeyInit(&entry,
+				Anum_pg_index_indisclustered,
+				BTEqualStrategyNumber, F_BOOLEQ,
+				BoolGetDatum(true));
+	scan = table_beginscan_catalog(indRelation, 1, &entry);
+	while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	{
+		RelToCluster *rtc;
+
+		index = (Form_pg_index) GETSTRUCT(indexTuple);
+
+		if (!pg_class_ownercheck(index->indrelid, GetUserId()))
+			continue;
+
+		/* Use a permanent memory context for the result list */
+		old_context = MemoryContextSwitchTo(cluster_context);
+
+		rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
+		rtc->tableOid = index->indrelid;
+		rtc->indexOid = index->indexrelid;
+		rtcs = lappend(rtcs, rtc);
+
+		MemoryContextSwitchTo(old_context);
+	}
+	table_endscan(scan);
+
+	relation_close(indRelation, AccessShareLock);
+
+	return rtcs;
+}
+
+/*
+ * Given an index on a partitioned table, return a list of RelToCluster for
+ * all the children leaves tables/indexes.
+ *
+ * Like expand_vacuum_rel, but here caller must hold AccessExclusiveLock
+ * on the table containing the index.
+ */
+static List *
+get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
+{
+	List	   *inhoids;
+	ListCell   *lc;
+	List	   *rtcs = NIL;
+	MemoryContext old_context;
+
+	/* Do not lock the children until they're processed */
+	inhoids = find_all_inheritors(indexOid, NoLock, NULL);
+
+	foreach(lc, inhoids)
+	{
+		Oid			indexrelid = lfirst_oid(lc);
+		Oid			relid = IndexGetRelation(indexrelid, false);
+		RelToCluster *rtc;
+
+		/* consider only leaf indexes */
+		if (get_rel_relkind(indexrelid) != RELKIND_INDEX)
+			continue;
+
+		/* Silently skip partitions which the user has no access to. */
+		if (!pg_class_ownercheck(relid, GetUserId()) &&
+			(!pg_database_ownercheck(MyDatabaseId, GetUserId()) ||
+			 IsSharedRelation(relid)))
+			continue;
+
+		/* Use a permanent memory context for the result list */
+		old_context = MemoryContextSwitchTo(cluster_context);
+
+		rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
+		rtc->tableOid = relid;
+		rtc->indexOid = indexrelid;
+		rtcs = lappend(rtcs, rtc);
+
+		MemoryContextSwitchTo(old_context);
+	}
+
+	return rtcs;
+}
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
new file mode 100644
index 0000000..fcfc02d
--- /dev/null
+++ b/src/backend/commands/collationcmds.c
@@ -0,0 +1,820 @@
+/*-------------------------------------------------------------------------
+ *
+ * collationcmds.c
+ *	  collation-related commands support code
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/collationcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_collation.h"
+#include "commands/alter.h"
+#include "commands/collationcmds.h"
+#include "commands/comment.h"
+#include "commands/dbcommands.h"
+#include "commands/defrem.h"
+#include "common/string.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/pg_locale.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+typedef struct
+{
+	char	   *localename;		/* name of locale, as per "locale -a" */
+	char	   *alias;			/* shortened alias for same */
+	int			enc;			/* encoding */
+} CollAliasData;
+
+
+/*
+ * CREATE COLLATION
+ */
+ObjectAddress
+DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists)
+{
+	char	   *collName;
+	Oid			collNamespace;
+	AclResult	aclresult;
+	ListCell   *pl;
+	DefElem    *fromEl = NULL;
+	DefElem    *localeEl = NULL;
+	DefElem    *lccollateEl = NULL;
+	DefElem    *lcctypeEl = NULL;
+	DefElem    *providerEl = NULL;
+	DefElem    *deterministicEl = NULL;
+	DefElem    *versionEl = NULL;
+	char	   *collcollate;
+	char	   *collctype;
+	char	   *colliculocale;
+	bool		collisdeterministic;
+	int			collencoding;
+	char		collprovider;
+	char	   *collversion = NULL;
+	Oid			newoid;
+	ObjectAddress address;
+
+	collNamespace = QualifiedNameGetCreationNamespace(names, &collName);
+
+	aclresult = pg_namespace_aclcheck(collNamespace, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_SCHEMA,
+					   get_namespace_name(collNamespace));
+
+	foreach(pl, parameters)
+	{
+		DefElem    *defel = lfirst_node(DefElem, pl);
+		DefElem   **defelp;
+
+		if (strcmp(defel->defname, "from") == 0)
+			defelp = &fromEl;
+		else if (strcmp(defel->defname, "locale") == 0)
+			defelp = &localeEl;
+		else if (strcmp(defel->defname, "lc_collate") == 0)
+			defelp = &lccollateEl;
+		else if (strcmp(defel->defname, "lc_ctype") == 0)
+			defelp = &lcctypeEl;
+		else if (strcmp(defel->defname, "provider") == 0)
+			defelp = &providerEl;
+		else if (strcmp(defel->defname, "deterministic") == 0)
+			defelp = &deterministicEl;
+		else if (strcmp(defel->defname, "version") == 0)
+			defelp = &versionEl;
+		else
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("collation attribute \"%s\" not recognized",
+							defel->defname),
+					 parser_errposition(pstate, defel->location)));
+			break;
+		}
+		if (*defelp != NULL)
+			errorConflictingDefElem(defel, pstate);
+		*defelp = defel;
+	}
+
+	if (localeEl && (lccollateEl || lcctypeEl))
+		ereport(ERROR,
+				errcode(ERRCODE_SYNTAX_ERROR),
+				errmsg("conflicting or redundant options"),
+				errdetail("LOCALE cannot be specified together with LC_COLLATE or LC_CTYPE."));
+
+	if (fromEl && list_length(parameters) != 1)
+		ereport(ERROR,
+				errcode(ERRCODE_SYNTAX_ERROR),
+				errmsg("conflicting or redundant options"),
+				errdetail("FROM cannot be specified together with any other options."));
+
+	if (fromEl)
+	{
+		Oid			collid;
+		HeapTuple	tp;
+		Datum		datum;
+		bool		isnull;
+
+		collid = get_collation_oid(defGetQualifiedName(fromEl), false);
+		tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
+		if (!HeapTupleIsValid(tp))
+			elog(ERROR, "cache lookup failed for collation %u", collid);
+
+		collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
+		collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic;
+		collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding;
+
+		datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull);
+		if (!isnull)
+			collcollate = TextDatumGetCString(datum);
+		else
+			collcollate = NULL;
+
+		datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull);
+		if (!isnull)
+			collctype = TextDatumGetCString(datum);
+		else
+			collctype = NULL;
+
+		datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colliculocale, &isnull);
+		if (!isnull)
+			colliculocale = TextDatumGetCString(datum);
+		else
+			colliculocale = NULL;
+
+		ReleaseSysCache(tp);
+
+		/*
+		 * Copying the "default" collation is not allowed because most code
+		 * checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT,
+		 * and so having a second collation with COLLPROVIDER_DEFAULT would
+		 * not work and potentially confuse or crash some code.  This could be
+		 * fixed with some legwork.
+		 */
+		if (collprovider == COLLPROVIDER_DEFAULT)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("collation \"default\" cannot be copied")));
+	}
+	else
+	{
+		char	   *collproviderstr = NULL;
+
+		collcollate = NULL;
+		collctype = NULL;
+		colliculocale = NULL;
+
+		if (providerEl)
+			collproviderstr = defGetString(providerEl);
+
+		if (deterministicEl)
+			collisdeterministic = defGetBoolean(deterministicEl);
+		else
+			collisdeterministic = true;
+
+		if (versionEl)
+			collversion = defGetString(versionEl);
+
+		if (collproviderstr)
+		{
+			if (pg_strcasecmp(collproviderstr, "icu") == 0)
+				collprovider = COLLPROVIDER_ICU;
+			else if (pg_strcasecmp(collproviderstr, "libc") == 0)
+				collprovider = COLLPROVIDER_LIBC;
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("unrecognized collation provider: %s",
+								collproviderstr)));
+		}
+		else
+			collprovider = COLLPROVIDER_LIBC;
+
+		if (localeEl)
+		{
+			if (collprovider == COLLPROVIDER_LIBC)
+			{
+				collcollate = defGetString(localeEl);
+				collctype = defGetString(localeEl);
+			}
+			else
+				colliculocale = defGetString(localeEl);
+		}
+
+		if (lccollateEl)
+			collcollate = defGetString(lccollateEl);
+
+		if (lcctypeEl)
+			collctype = defGetString(lcctypeEl);
+
+		if (collprovider == COLLPROVIDER_LIBC)
+		{
+			if (!collcollate)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("parameter \"lc_collate\" must be specified")));
+
+			if (!collctype)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("parameter \"lc_ctype\" must be specified")));
+		}
+		else if (collprovider == COLLPROVIDER_ICU)
+		{
+			if (!colliculocale)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("parameter \"locale\" must be specified")));
+		}
+
+		/*
+		 * Nondeterministic collations are currently only supported with ICU
+		 * because that's the only case where it can actually make a
+		 * difference. So we can save writing the code for the other
+		 * providers.
+		 */
+		if (!collisdeterministic && collprovider != COLLPROVIDER_ICU)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("nondeterministic collations not supported with this provider")));
+
+		if (collprovider == COLLPROVIDER_ICU)
+		{
+#ifdef USE_ICU
+			/*
+			 * We could create ICU collations with collencoding == database
+			 * encoding, but it seems better to use -1 so that it matches the
+			 * way initdb would create ICU collations.  However, only allow
+			 * one to be created when the current database's encoding is
+			 * supported.  Otherwise the collation is useless, plus we get
+			 * surprising behaviors like not being able to drop the collation.
+			 *
+			 * Skip this test when !USE_ICU, because the error we want to
+			 * throw for that isn't thrown till later.
+			 */
+			if (!is_encoding_supported_by_icu(GetDatabaseEncoding()))
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("current database's encoding is not supported with this provider")));
+#endif
+			collencoding = -1;
+		}
+		else
+		{
+			collencoding = GetDatabaseEncoding();
+			check_encoding_locale_matches(collencoding, collcollate, collctype);
+		}
+	}
+
+	if (!collversion)
+		collversion = get_collation_actual_version(collprovider, collprovider == COLLPROVIDER_ICU ? colliculocale : collcollate);
+
+	newoid = CollationCreate(collName,
+							 collNamespace,
+							 GetUserId(),
+							 collprovider,
+							 collisdeterministic,
+							 collencoding,
+							 collcollate,
+							 collctype,
+							 colliculocale,
+							 collversion,
+							 if_not_exists,
+							 false);	/* not quiet */
+
+	if (!OidIsValid(newoid))
+		return InvalidObjectAddress;
+
+	/*
+	 * Check that the locales can be loaded.  NB: pg_newlocale_from_collation
+	 * is only supposed to be called on non-C-equivalent locales.
+	 */
+	CommandCounterIncrement();
+	if (!lc_collate_is_c(newoid) || !lc_ctype_is_c(newoid))
+		(void) pg_newlocale_from_collation(newoid);
+
+	ObjectAddressSet(address, CollationRelationId, newoid);
+
+	return address;
+}
+
+/*
+ * Subroutine for ALTER COLLATION SET SCHEMA and RENAME
+ *
+ * Is there a collation with the same name of the given collation already in
+ * the given namespace?  If so, raise an appropriate error message.
+ */
+void
+IsThereCollationInNamespace(const char *collname, Oid nspOid)
+{
+	/* make sure the name doesn't already exist in new schema */
+	if (SearchSysCacheExists3(COLLNAMEENCNSP,
+							  CStringGetDatum(collname),
+							  Int32GetDatum(GetDatabaseEncoding()),
+							  ObjectIdGetDatum(nspOid)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"",
+						collname, GetDatabaseEncodingName(),
+						get_namespace_name(nspOid))));
+
+	/* mustn't match an any-encoding entry, either */
+	if (SearchSysCacheExists3(COLLNAMEENCNSP,
+							  CStringGetDatum(collname),
+							  Int32GetDatum(-1),
+							  ObjectIdGetDatum(nspOid)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("collation \"%s\" already exists in schema \"%s\"",
+						collname, get_namespace_name(nspOid))));
+}
+
+/*
+ * ALTER COLLATION
+ */
+ObjectAddress
+AlterCollation(AlterCollationStmt *stmt)
+{
+	Relation	rel;
+	Oid			collOid;
+	HeapTuple	tup;
+	Form_pg_collation collForm;
+	Datum		datum;
+	bool		isnull;
+	char	   *oldversion;
+	char	   *newversion;
+	ObjectAddress address;
+
+	rel = table_open(CollationRelationId, RowExclusiveLock);
+	collOid = get_collation_oid(stmt->collname, false);
+
+	if (!pg_collation_ownercheck(collOid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_COLLATION,
+					   NameListToString(stmt->collname));
+
+	tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for collation %u", collOid);
+
+	collForm = (Form_pg_collation) GETSTRUCT(tup);
+	datum = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, &isnull);
+	oldversion = isnull ? NULL : TextDatumGetCString(datum);
+
+	datum = SysCacheGetAttr(COLLOID, tup, collForm->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate, &isnull);
+	if (isnull)
+		elog(ERROR, "unexpected null in pg_collation");
+	newversion = get_collation_actual_version(collForm->collprovider, TextDatumGetCString(datum));
+
+	/* cannot change from NULL to non-NULL or vice versa */
+	if ((!oldversion && newversion) || (oldversion && !newversion))
+		elog(ERROR, "invalid collation version change");
+	else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
+	{
+		bool		nulls[Natts_pg_collation];
+		bool		replaces[Natts_pg_collation];
+		Datum		values[Natts_pg_collation];
+
+		ereport(NOTICE,
+				(errmsg("changing version from %s to %s",
+						oldversion, newversion)));
+
+		memset(values, 0, sizeof(values));
+		memset(nulls, false, sizeof(nulls));
+		memset(replaces, false, sizeof(replaces));
+
+		values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion);
+		replaces[Anum_pg_collation_collversion - 1] = true;
+
+		tup = heap_modify_tuple(tup, RelationGetDescr(rel),
+								values, nulls, replaces);
+	}
+	else
+		ereport(NOTICE,
+				(errmsg("version has not changed")));
+
+	CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+	InvokeObjectPostAlterHook(CollationRelationId, collOid, 0);
+
+	ObjectAddressSet(address, CollationRelationId, collOid);
+
+	heap_freetuple(tup);
+	table_close(rel, NoLock);
+
+	return address;
+}
+
+
+Datum
+pg_collation_actual_version(PG_FUNCTION_ARGS)
+{
+	Oid			collid = PG_GETARG_OID(0);
+	HeapTuple	tp;
+	char		collprovider;
+	Datum		datum;
+	bool		isnull;
+	char	   *version;
+
+	tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
+	if (!HeapTupleIsValid(tp))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("collation with OID %u does not exist", collid)));
+
+	collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
+
+	if (collprovider != COLLPROVIDER_DEFAULT)
+	{
+		datum = SysCacheGetAttr(COLLOID, tp, collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate, &isnull);
+		if (isnull)
+			elog(ERROR, "unexpected null in pg_collation");
+		version = get_collation_actual_version(collprovider, TextDatumGetCString(datum));
+	}
+	else
+		version = NULL;
+
+	ReleaseSysCache(tp);
+
+	if (version)
+		PG_RETURN_TEXT_P(cstring_to_text(version));
+	else
+		PG_RETURN_NULL();
+}
+
+
+/* will we use "locale -a" in pg_import_system_collations? */
+#if defined(HAVE_LOCALE_T) && !defined(WIN32)
+#define READ_LOCALE_A_OUTPUT
+#endif
+
+#ifdef READ_LOCALE_A_OUTPUT
+/*
+ * "Normalize" a libc locale name, stripping off encoding tags such as
+ * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
+ * -> "br_FR@euro").  Return true if a new, different name was
+ * generated.
+ */
+static bool
+normalize_libc_locale_name(char *new, const char *old)
+{
+	char	   *n = new;
+	const char *o = old;
+	bool		changed = false;
+
+	while (*o)
+	{
+		if (*o == '.')
+		{
+			/* skip over encoding tag such as ".utf8" or ".UTF-8" */
+			o++;
+			while ((*o >= 'A' && *o <= 'Z')
+				   || (*o >= 'a' && *o <= 'z')
+				   || (*o >= '0' && *o <= '9')
+				   || (*o == '-'))
+				o++;
+			changed = true;
+		}
+		else
+			*n++ = *o++;
+	}
+	*n = '\0';
+
+	return changed;
+}
+
+/*
+ * qsort comparator for CollAliasData items
+ */
+static int
+cmpaliases(const void *a, const void *b)
+{
+	const CollAliasData *ca = (const CollAliasData *) a;
+	const CollAliasData *cb = (const CollAliasData *) b;
+
+	/* comparing localename is enough because other fields are derived */
+	return strcmp(ca->localename, cb->localename);
+}
+#endif							/* READ_LOCALE_A_OUTPUT */
+
+
+#ifdef USE_ICU
+/*
+ * Get the ICU language tag for a locale name.
+ * The result is a palloc'd string.
+ */
+static char *
+get_icu_language_tag(const char *localename)
+{
+	char		buf[ULOC_FULLNAME_CAPACITY];
+	UErrorCode	status;
+
+	status = U_ZERO_ERROR;
+	uloc_toLanguageTag(localename, buf, sizeof(buf), true, &status);
+	if (U_FAILURE(status))
+		ereport(ERROR,
+				(errmsg("could not convert locale name \"%s\" to language tag: %s",
+						localename, u_errorName(status))));
+
+	return pstrdup(buf);
+}
+
+/*
+ * Get a comment (specifically, the display name) for an ICU locale.
+ * The result is a palloc'd string, or NULL if we can't get a comment
+ * or find that it's not all ASCII.  (We can *not* accept non-ASCII
+ * comments, because the contents of template0 must be encoding-agnostic.)
+ */
+static char *
+get_icu_locale_comment(const char *localename)
+{
+	UErrorCode	status;
+	UChar		displayname[128];
+	int32		len_uchar;
+	int32		i;
+	char	   *result;
+
+	status = U_ZERO_ERROR;
+	len_uchar = uloc_getDisplayName(localename, "en",
+									displayname, lengthof(displayname),
+									&status);
+	if (U_FAILURE(status))
+		return NULL;			/* no good reason to raise an error */
+
+	/* Check for non-ASCII comment (can't use pg_is_ascii for this) */
+	for (i = 0; i < len_uchar; i++)
+	{
+		if (displayname[i] > 127)
+			return NULL;
+	}
+
+	/* OK, transcribe */
+	result = palloc(len_uchar + 1);
+	for (i = 0; i < len_uchar; i++)
+		result[i] = displayname[i];
+	result[len_uchar] = '\0';
+
+	return result;
+}
+#endif							/* USE_ICU */
+
+
+/*
+ * pg_import_system_collations: add known system collations to pg_collation
+ */
+Datum
+pg_import_system_collations(PG_FUNCTION_ARGS)
+{
+	Oid			nspid = PG_GETARG_OID(0);
+	int			ncreated = 0;
+
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be superuser to import system collations")));
+
+	if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(nspid)))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_SCHEMA),
+				 errmsg("schema with OID %u does not exist", nspid)));
+
+	/* Load collations known to libc, using "locale -a" to enumerate them */
+#ifdef READ_LOCALE_A_OUTPUT
+	{
+		FILE	   *locale_a_handle;
+		char		localebuf[LOCALE_NAME_BUFLEN];
+		int			nvalid = 0;
+		Oid			collid;
+		CollAliasData *aliases;
+		int			naliases,
+					maxaliases,
+					i;
+
+		/* expansible array of aliases */
+		maxaliases = 100;
+		aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData));
+		naliases = 0;
+
+		locale_a_handle = OpenPipeStream("locale -a", "r");
+		if (locale_a_handle == NULL)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not execute command \"%s\": %m",
+							"locale -a")));
+
+		while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
+		{
+			size_t		len;
+			int			enc;
+			char		alias[LOCALE_NAME_BUFLEN];
+
+			len = strlen(localebuf);
+
+			if (len == 0 || localebuf[len - 1] != '\n')
+			{
+				elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
+				continue;
+			}
+			localebuf[len - 1] = '\0';
+
+			/*
+			 * Some systems have locale names that don't consist entirely of
+			 * ASCII letters (such as "bokm&aring;l" or "fran&ccedil;ais").
+			 * This is pretty silly, since we need the locale itself to
+			 * interpret the non-ASCII characters. We can't do much with
+			 * those, so we filter them out.
+			 */
+			if (!pg_is_ascii(localebuf))
+			{
+				elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", localebuf);
+				continue;
+			}
+
+			enc = pg_get_encoding_from_locale(localebuf, false);
+			if (enc < 0)
+			{
+				elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"",
+					 localebuf);
+				continue;
+			}
+			if (!PG_VALID_BE_ENCODING(enc))
+			{
+				elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", localebuf);
+				continue;
+			}
+			if (enc == PG_SQL_ASCII)
+				continue;		/* C/POSIX are already in the catalog */
+
+			/* count valid locales found in operating system */
+			nvalid++;
+
+			/*
+			 * Create a collation named the same as the locale, but quietly
+			 * doing nothing if it already exists.  This is the behavior we
+			 * need even at initdb time, because some versions of "locale -a"
+			 * can report the same locale name more than once.  And it's
+			 * convenient for later import runs, too, since you just about
+			 * always want to add on new locales without a lot of chatter
+			 * about existing ones.
+			 */
+			collid = CollationCreate(localebuf, nspid, GetUserId(),
+									 COLLPROVIDER_LIBC, true, enc,
+									 localebuf, localebuf, NULL,
+									 get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
+									 true, true);
+			if (OidIsValid(collid))
+			{
+				ncreated++;
+
+				/* Must do CCI between inserts to handle duplicates correctly */
+				CommandCounterIncrement();
+			}
+
+			/*
+			 * Generate aliases such as "en_US" in addition to "en_US.utf8"
+			 * for ease of use.  Note that collation names are unique per
+			 * encoding only, so this doesn't clash with "en_US" for LATIN1,
+			 * say.
+			 *
+			 * However, it might conflict with a name we'll see later in the
+			 * "locale -a" output.  So save up the aliases and try to add them
+			 * after we've read all the output.
+			 */
+			if (normalize_libc_locale_name(alias, localebuf))
+			{
+				if (naliases >= maxaliases)
+				{
+					maxaliases *= 2;
+					aliases = (CollAliasData *)
+						repalloc(aliases, maxaliases * sizeof(CollAliasData));
+				}
+				aliases[naliases].localename = pstrdup(localebuf);
+				aliases[naliases].alias = pstrdup(alias);
+				aliases[naliases].enc = enc;
+				naliases++;
+			}
+		}
+
+		ClosePipeStream(locale_a_handle);
+
+		/*
+		 * Before processing the aliases, sort them by locale name.  The point
+		 * here is that if "locale -a" gives us multiple locale names with the
+		 * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we
+		 * want to pick a deterministic one of them.  First in ASCII sort
+		 * order is a good enough rule.  (Before PG 10, the code corresponding
+		 * to this logic in initdb.c had an additional ordering rule, to
+		 * prefer the locale name exactly matching the alias, if any.  We
+		 * don't need to consider that here, because we would have already
+		 * created such a pg_collation entry above, and that one will win.)
+		 */
+		if (naliases > 1)
+			qsort((void *) aliases, naliases, sizeof(CollAliasData), cmpaliases);
+
+		/* Now add aliases, ignoring any that match pre-existing entries */
+		for (i = 0; i < naliases; i++)
+		{
+			char	   *locale = aliases[i].localename;
+			char	   *alias = aliases[i].alias;
+			int			enc = aliases[i].enc;
+
+			collid = CollationCreate(alias, nspid, GetUserId(),
+									 COLLPROVIDER_LIBC, true, enc,
+									 locale, locale, NULL,
+									 get_collation_actual_version(COLLPROVIDER_LIBC, locale),
+									 true, true);
+			if (OidIsValid(collid))
+			{
+				ncreated++;
+
+				CommandCounterIncrement();
+			}
+		}
+
+		/* Give a warning if "locale -a" seems to be malfunctioning */
+		if (nvalid == 0)
+			ereport(WARNING,
+					(errmsg("no usable system locales were found")));
+	}
+#endif							/* READ_LOCALE_A_OUTPUT */
+
+	/*
+	 * Load collations known to ICU
+	 *
+	 * We use uloc_countAvailable()/uloc_getAvailable() rather than
+	 * ucol_countAvailable()/ucol_getAvailable().  The former returns a full
+	 * set of language+region combinations, whereas the latter only returns
+	 * language+region combinations if they are distinct from the language's
+	 * base collation.  So there might not be a de-DE or en-GB, which would be
+	 * confusing.
+	 */
+#ifdef USE_ICU
+	{
+		int			i;
+
+		/*
+		 * Start the loop at -1 to sneak in the root locale without too much
+		 * code duplication.
+		 */
+		for (i = -1; i < uloc_countAvailable(); i++)
+		{
+			const char *name;
+			char	   *langtag;
+			char	   *icucomment;
+			const char *iculocstr;
+			Oid			collid;
+
+			if (i == -1)
+				name = "";		/* ICU root locale */
+			else
+				name = uloc_getAvailable(i);
+
+			langtag = get_icu_language_tag(name);
+			iculocstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
+
+			/*
+			 * Be paranoid about not allowing any non-ASCII strings into
+			 * pg_collation
+			 */
+			if (!pg_is_ascii(langtag) || !pg_is_ascii(iculocstr))
+				continue;
+
+			collid = CollationCreate(psprintf("%s-x-icu", langtag),
+									 nspid, GetUserId(),
+									 COLLPROVIDER_ICU, true, -1,
+									 NULL, NULL, iculocstr,
+									 get_collation_actual_version(COLLPROVIDER_ICU, iculocstr),
+									 true, true);
+			if (OidIsValid(collid))
+			{
+				ncreated++;
+
+				CommandCounterIncrement();
+
+				icucomment = get_icu_locale_comment(name);
+				if (icucomment)
+					CreateComments(collid, CollationRelationId, 0,
+								   icucomment);
+			}
+		}
+	}
+#endif							/* USE_ICU */
+
+	PG_RETURN_INT32(ncreated);
+}
diff --git a/src/backend/commands/comment.c b/src/backend/commands/comment.c
new file mode 100644
index 0000000..86985a9
--- /dev/null
+++ b/src/backend/commands/comment.c
@@ -0,0 +1,459 @@
+/*-------------------------------------------------------------------------
+ *
+ * comment.c
+ *
+ * PostgreSQL object comments utility code.
+ *
+ * Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/comment.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/relation.h"
+#include "access/table.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaddress.h"
+#include "catalog/pg_description.h"
+#include "catalog/pg_shdescription.h"
+#include "commands/comment.h"
+#include "commands/dbcommands.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/rel.h"
+
+
+/*
+ * CommentObject --
+ *
+ * This routine is used to add the associated comment into
+ * pg_description for the object specified by the given SQL command.
+ */
+ObjectAddress
+CommentObject(CommentStmt *stmt)
+{
+	Relation	relation;
+	ObjectAddress address = InvalidObjectAddress;
+
+	/*
+	 * When loading a dump, we may see a COMMENT ON DATABASE for the old name
+	 * of the database.  Erroring out would prevent pg_restore from completing
+	 * (which is really pg_restore's fault, but for now we will work around
+	 * the problem here).  Consensus is that the best fix is to treat wrong
+	 * database name as a WARNING not an ERROR; hence, the following special
+	 * case.
+	 */
+	if (stmt->objtype == OBJECT_DATABASE)
+	{
+		char	   *database = strVal(stmt->object);
+
+		if (!OidIsValid(get_database_oid(database, true)))
+		{
+			ereport(WARNING,
+					(errcode(ERRCODE_UNDEFINED_DATABASE),
+					 errmsg("database \"%s\" does not exist", database)));
+			return address;
+		}
+	}
+
+	/*
+	 * Translate the parser representation that identifies this object into an
+	 * ObjectAddress.  get_object_address() will throw an error if the object
+	 * does not exist, and will also acquire a lock on the target to guard
+	 * against concurrent DROP operations.
+	 */
+	address = get_object_address(stmt->objtype, stmt->object,
+								 &relation, ShareUpdateExclusiveLock, false);
+
+	/* Require ownership of the target object. */
+	check_object_ownership(GetUserId(), stmt->objtype, address,
+						   stmt->object, relation);
+
+	/* Perform other integrity checks as needed. */
+	switch (stmt->objtype)
+	{
+		case OBJECT_COLUMN:
+
+			/*
+			 * Allow comments only on columns of tables, views, materialized
+			 * views, composite types, and foreign tables (which are the only
+			 * relkinds for which pg_dump will dump per-column comments).  In
+			 * particular we wish to disallow comments on index columns,
+			 * because the naming of an index's columns may change across PG
+			 * versions, so dumping per-column comments could create reload
+			 * failures.
+			 */
+			if (relation->rd_rel->relkind != RELKIND_RELATION &&
+				relation->rd_rel->relkind != RELKIND_VIEW &&
+				relation->rd_rel->relkind != RELKIND_MATVIEW &&
+				relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE &&
+				relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+				relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+				ereport(ERROR,
+						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+						 errmsg("cannot set comment on relation \"%s\"",
+								RelationGetRelationName(relation)),
+						 errdetail_relkind_not_supported(relation->rd_rel->relkind)));
+			break;
+		default:
+			break;
+	}
+
+	/*
+	 * Databases, tablespaces, and roles are cluster-wide objects, so any
+	 * comments on those objects are recorded in the shared pg_shdescription
+	 * catalog.  Comments on all other objects are recorded in pg_description.
+	 */
+	if (stmt->objtype == OBJECT_DATABASE || stmt->objtype == OBJECT_TABLESPACE
+		|| stmt->objtype == OBJECT_ROLE)
+		CreateSharedComments(address.objectId, address.classId, stmt->comment);
+	else
+		CreateComments(address.objectId, address.classId, address.objectSubId,
+					   stmt->comment);
+
+	/*
+	 * If get_object_address() opened the relation for us, we close it to keep
+	 * the reference count correct - but we retain any locks acquired by
+	 * get_object_address() until commit time, to guard against concurrent
+	 * activity.
+	 */
+	if (relation != NULL)
+		relation_close(relation, NoLock);
+
+	return address;
+}
+
+/*
+ * CreateComments --
+ *
+ * Create a comment for the specified object descriptor.  Inserts a new
+ * pg_description tuple, or replaces an existing one with the same key.
+ *
+ * If the comment given is null or an empty string, instead delete any
+ * existing comment for the specified key.
+ */
+void
+CreateComments(Oid oid, Oid classoid, int32 subid, const char *comment)
+{
+	Relation	description;
+	ScanKeyData skey[3];
+	SysScanDesc sd;
+	HeapTuple	oldtuple;
+	HeapTuple	newtuple = NULL;
+	Datum		values[Natts_pg_description];
+	bool		nulls[Natts_pg_description];
+	bool		replaces[Natts_pg_description];
+	int			i;
+
+	/* Reduce empty-string to NULL case */
+	if (comment != NULL && strlen(comment) == 0)
+		comment = NULL;
+
+	/* Prepare to form or update a tuple, if necessary */
+	if (comment != NULL)
+	{
+		for (i = 0; i < Natts_pg_description; i++)
+		{
+			nulls[i] = false;
+			replaces[i] = true;
+		}
+		values[Anum_pg_description_objoid - 1] = ObjectIdGetDatum(oid);
+		values[Anum_pg_description_classoid - 1] = ObjectIdGetDatum(classoid);
+		values[Anum_pg_description_objsubid - 1] = Int32GetDatum(subid);
+		values[Anum_pg_description_description - 1] = CStringGetTextDatum(comment);
+	}
+
+	/* Use the index to search for a matching old tuple */
+
+	ScanKeyInit(&skey[0],
+				Anum_pg_description_objoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(oid));
+	ScanKeyInit(&skey[1],
+				Anum_pg_description_classoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(classoid));
+	ScanKeyInit(&skey[2],
+				Anum_pg_description_objsubid,
+				BTEqualStrategyNumber, F_INT4EQ,
+				Int32GetDatum(subid));
+
+	description = table_open(DescriptionRelationId, RowExclusiveLock);
+
+	sd = systable_beginscan(description, DescriptionObjIndexId, true,
+							NULL, 3, skey);
+
+	while ((oldtuple = systable_getnext(sd)) != NULL)
+	{
+		/* Found the old tuple, so delete or update it */
+
+		if (comment == NULL)
+			CatalogTupleDelete(description, &oldtuple->t_self);
+		else
+		{
+			newtuple = heap_modify_tuple(oldtuple, RelationGetDescr(description), values,
+										 nulls, replaces);
+			CatalogTupleUpdate(description, &oldtuple->t_self, newtuple);
+		}
+
+		break;					/* Assume there can be only one match */
+	}
+
+	systable_endscan(sd);
+
+	/* If we didn't find an old tuple, insert a new one */
+
+	if (newtuple == NULL && comment != NULL)
+	{
+		newtuple = heap_form_tuple(RelationGetDescr(description),
+								   values, nulls);
+		CatalogTupleInsert(description, newtuple);
+	}
+
+	if (newtuple != NULL)
+		heap_freetuple(newtuple);
+
+	/* Done */
+
+	table_close(description, NoLock);
+}
+
+/*
+ * CreateSharedComments --
+ *
+ * Create a comment for the specified shared object descriptor.  Inserts a
+ * new pg_shdescription tuple, or replaces an existing one with the same key.
+ *
+ * If the comment given is null or an empty string, instead delete any
+ * existing comment for the specified key.
+ */
+void
+CreateSharedComments(Oid oid, Oid classoid, const char *comment)
+{
+	Relation	shdescription;
+	ScanKeyData skey[2];
+	SysScanDesc sd;
+	HeapTuple	oldtuple;
+	HeapTuple	newtuple = NULL;
+	Datum		values[Natts_pg_shdescription];
+	bool		nulls[Natts_pg_shdescription];
+	bool		replaces[Natts_pg_shdescription];
+	int			i;
+
+	/* Reduce empty-string to NULL case */
+	if (comment != NULL && strlen(comment) == 0)
+		comment = NULL;
+
+	/* Prepare to form or update a tuple, if necessary */
+	if (comment != NULL)
+	{
+		for (i = 0; i < Natts_pg_shdescription; i++)
+		{
+			nulls[i] = false;
+			replaces[i] = true;
+		}
+		values[Anum_pg_shdescription_objoid - 1] = ObjectIdGetDatum(oid);
+		values[Anum_pg_shdescription_classoid - 1] = ObjectIdGetDatum(classoid);
+		values[Anum_pg_shdescription_description - 1] = CStringGetTextDatum(comment);
+	}
+
+	/* Use the index to search for a matching old tuple */
+
+	ScanKeyInit(&skey[0],
+				Anum_pg_shdescription_objoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(oid));
+	ScanKeyInit(&skey[1],
+				Anum_pg_shdescription_classoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(classoid));
+
+	shdescription = table_open(SharedDescriptionRelationId, RowExclusiveLock);
+
+	sd = systable_beginscan(shdescription, SharedDescriptionObjIndexId, true,
+							NULL, 2, skey);
+
+	while ((oldtuple = systable_getnext(sd)) != NULL)
+	{
+		/* Found the old tuple, so delete or update it */
+
+		if (comment == NULL)
+			CatalogTupleDelete(shdescription, &oldtuple->t_self);
+		else
+		{
+			newtuple = heap_modify_tuple(oldtuple, RelationGetDescr(shdescription),
+										 values, nulls, replaces);
+			CatalogTupleUpdate(shdescription, &oldtuple->t_self, newtuple);
+		}
+
+		break;					/* Assume there can be only one match */
+	}
+
+	systable_endscan(sd);
+
+	/* If we didn't find an old tuple, insert a new one */
+
+	if (newtuple == NULL && comment != NULL)
+	{
+		newtuple = heap_form_tuple(RelationGetDescr(shdescription),
+								   values, nulls);
+		CatalogTupleInsert(shdescription, newtuple);
+	}
+
+	if (newtuple != NULL)
+		heap_freetuple(newtuple);
+
+	/* Done */
+
+	table_close(shdescription, NoLock);
+}
+
+/*
+ * DeleteComments -- remove comments for an object
+ *
+ * If subid is nonzero then only comments matching it will be removed.
+ * If subid is zero, all comments matching the oid/classoid will be removed
+ * (this corresponds to deleting a whole object).
+ */
+void
+DeleteComments(Oid oid, Oid classoid, int32 subid)
+{
+	Relation	description;
+	ScanKeyData skey[3];
+	int			nkeys;
+	SysScanDesc sd;
+	HeapTuple	oldtuple;
+
+	/* Use the index to search for all matching old tuples */
+
+	ScanKeyInit(&skey[0],
+				Anum_pg_description_objoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(oid));
+	ScanKeyInit(&skey[1],
+				Anum_pg_description_classoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(classoid));
+
+	if (subid != 0)
+	{
+		ScanKeyInit(&skey[2],
+					Anum_pg_description_objsubid,
+					BTEqualStrategyNumber, F_INT4EQ,
+					Int32GetDatum(subid));
+		nkeys = 3;
+	}
+	else
+		nkeys = 2;
+
+	description = table_open(DescriptionRelationId, RowExclusiveLock);
+
+	sd = systable_beginscan(description, DescriptionObjIndexId, true,
+							NULL, nkeys, skey);
+
+	while ((oldtuple = systable_getnext(sd)) != NULL)
+		CatalogTupleDelete(description, &oldtuple->t_self);
+
+	/* Done */
+
+	systable_endscan(sd);
+	table_close(description, RowExclusiveLock);
+}
+
+/*
+ * DeleteSharedComments -- remove comments for a shared object
+ */
+void
+DeleteSharedComments(Oid oid, Oid classoid)
+{
+	Relation	shdescription;
+	ScanKeyData skey[2];
+	SysScanDesc sd;
+	HeapTuple	oldtuple;
+
+	/* Use the index to search for all matching old tuples */
+
+	ScanKeyInit(&skey[0],
+				Anum_pg_shdescription_objoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(oid));
+	ScanKeyInit(&skey[1],
+				Anum_pg_shdescription_classoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(classoid));
+
+	shdescription = table_open(SharedDescriptionRelationId, RowExclusiveLock);
+
+	sd = systable_beginscan(shdescription, SharedDescriptionObjIndexId, true,
+							NULL, 2, skey);
+
+	while ((oldtuple = systable_getnext(sd)) != NULL)
+		CatalogTupleDelete(shdescription, &oldtuple->t_self);
+
+	/* Done */
+
+	systable_endscan(sd);
+	table_close(shdescription, RowExclusiveLock);
+}
+
+/*
+ * GetComment -- get the comment for an object, or null if not found.
+ */
+char *
+GetComment(Oid oid, Oid classoid, int32 subid)
+{
+	Relation	description;
+	ScanKeyData skey[3];
+	SysScanDesc sd;
+	TupleDesc	tupdesc;
+	HeapTuple	tuple;
+	char	   *comment;
+
+	/* Use the index to search for a matching old tuple */
+
+	ScanKeyInit(&skey[0],
+				Anum_pg_description_objoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(oid));
+	ScanKeyInit(&skey[1],
+				Anum_pg_description_classoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(classoid));
+	ScanKeyInit(&skey[2],
+				Anum_pg_description_objsubid,
+				BTEqualStrategyNumber, F_INT4EQ,
+				Int32GetDatum(subid));
+
+	description = table_open(DescriptionRelationId, AccessShareLock);
+	tupdesc = RelationGetDescr(description);
+
+	sd = systable_beginscan(description, DescriptionObjIndexId, true,
+							NULL, 3, skey);
+
+	comment = NULL;
+	while ((tuple = systable_getnext(sd)) != NULL)
+	{
+		Datum		value;
+		bool		isnull;
+
+		/* Found the tuple, get description field */
+		value = heap_getattr(tuple, Anum_pg_description_description, tupdesc, &isnull);
+		if (!isnull)
+			comment = TextDatumGetCString(value);
+		break;					/* Assume there can be only one match */
+	}
+
+	systable_endscan(sd);
+
+	/* Done */
+	table_close(description, AccessShareLock);
+
+	return comment;
+}
diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c
new file mode 100644
index 0000000..721de17
--- /dev/null
+++ b/src/backend/commands/constraint.c
@@ -0,0 +1,205 @@
+/*-------------------------------------------------------------------------
+ *
+ * constraint.c
+ *	  PostgreSQL CONSTRAINT support code.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/constraint.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/tableam.h"
+#include "catalog/index.h"
+#include "commands/trigger.h"
+#include "executor/executor.h"
+#include "utils/builtins.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+
+
+/*
+ * unique_key_recheck - trigger function to do a deferred uniqueness check.
+ *
+ * This now also does deferred exclusion-constraint checks, so the name is
+ * somewhat historical.
+ *
+ * This is invoked as an AFTER ROW trigger for both INSERT and UPDATE,
+ * for any rows recorded as potentially violating a deferrable unique
+ * or exclusion constraint.
+ *
+ * This may be an end-of-statement check, a commit-time check, or a
+ * check triggered by a SET CONSTRAINTS command.
+ */
+Datum
+unique_key_recheck(PG_FUNCTION_ARGS)
+{
+	TriggerData *trigdata = (TriggerData *) fcinfo->context;
+	const char *funcname = "unique_key_recheck";
+	ItemPointerData checktid;
+	ItemPointerData tmptid;
+	Relation	indexRel;
+	IndexInfo  *indexInfo;
+	EState	   *estate;
+	ExprContext *econtext;
+	TupleTableSlot *slot;
+	Datum		values[INDEX_MAX_KEYS];
+	bool		isnull[INDEX_MAX_KEYS];
+
+	/*
+	 * Make sure this is being called as an AFTER ROW trigger.  Note:
+	 * translatable error strings are shared with ri_triggers.c, so resist the
+	 * temptation to fold the function name into them.
+	 */
+	if (!CALLED_AS_TRIGGER(fcinfo))
+		ereport(ERROR,
+				(errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+				 errmsg("function \"%s\" was not called by trigger manager",
+						funcname)));
+
+	if (!TRIGGER_FIRED_AFTER(trigdata->tg_event) ||
+		!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
+		ereport(ERROR,
+				(errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+				 errmsg("function \"%s\" must be fired AFTER ROW",
+						funcname)));
+
+	/*
+	 * Get the new data that was inserted/updated.
+	 */
+	if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+		checktid = trigdata->tg_trigslot->tts_tid;
+	else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+		checktid = trigdata->tg_newslot->tts_tid;
+	else
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+				 errmsg("function \"%s\" must be fired for INSERT or UPDATE",
+						funcname)));
+		ItemPointerSetInvalid(&checktid);	/* keep compiler quiet */
+	}
+
+	slot = table_slot_create(trigdata->tg_relation, NULL);
+
+	/*
+	 * If the row pointed at by checktid is now dead (ie, inserted and then
+	 * deleted within our transaction), we can skip the check.  However, we
+	 * have to be careful, because this trigger gets queued only in response
+	 * to index insertions; which means it does not get queued e.g. for HOT
+	 * updates.  The row we are called for might now be dead, but have a live
+	 * HOT child, in which case we still need to make the check ---
+	 * effectively, we're applying the check against the live child row,
+	 * although we can use the values from this row since by definition all
+	 * columns of interest to us are the same.
+	 *
+	 * This might look like just an optimization, because the index AM will
+	 * make this identical test before throwing an error.  But it's actually
+	 * needed for correctness, because the index AM will also throw an error
+	 * if it doesn't find the index entry for the row.  If the row's dead then
+	 * it's possible the index entry has also been marked dead, and even
+	 * removed.
+	 */
+	tmptid = checktid;
+	{
+		IndexFetchTableData *scan = table_index_fetch_begin(trigdata->tg_relation);
+		bool		call_again = false;
+
+		if (!table_index_fetch_tuple(scan, &tmptid, SnapshotSelf, slot,
+									 &call_again, NULL))
+		{
+			/*
+			 * All rows referenced by the index entry are dead, so skip the
+			 * check.
+			 */
+			ExecDropSingleTupleTableSlot(slot);
+			table_index_fetch_end(scan);
+			return PointerGetDatum(NULL);
+		}
+		table_index_fetch_end(scan);
+	}
+
+	/*
+	 * Open the index, acquiring a RowExclusiveLock, just as if we were going
+	 * to update it.  (This protects against possible changes of the index
+	 * schema, not against concurrent updates.)
+	 */
+	indexRel = index_open(trigdata->tg_trigger->tgconstrindid,
+						  RowExclusiveLock);
+	indexInfo = BuildIndexInfo(indexRel);
+
+	/*
+	 * Typically the index won't have expressions, but if it does we need an
+	 * EState to evaluate them.  We need it for exclusion constraints too,
+	 * even if they are just on simple columns.
+	 */
+	if (indexInfo->ii_Expressions != NIL ||
+		indexInfo->ii_ExclusionOps != NULL)
+	{
+		estate = CreateExecutorState();
+		econtext = GetPerTupleExprContext(estate);
+		econtext->ecxt_scantuple = slot;
+	}
+	else
+		estate = NULL;
+
+	/*
+	 * Form the index values and isnull flags for the index entry that we need
+	 * to check.
+	 *
+	 * Note: if the index uses functions that are not as immutable as they are
+	 * supposed to be, this could produce an index tuple different from the
+	 * original.  The index AM can catch such errors by verifying that it
+	 * finds a matching index entry with the tuple's TID.  For exclusion
+	 * constraints we check this in check_exclusion_constraint().
+	 */
+	FormIndexDatum(indexInfo, slot, estate, values, isnull);
+
+	/*
+	 * Now do the appropriate check.
+	 */
+	if (indexInfo->ii_ExclusionOps == NULL)
+	{
+		/*
+		 * Note: this is not a real insert; it is a check that the index entry
+		 * that has already been inserted is unique.  Passing the tuple's tid
+		 * (i.e. unmodified by table_index_fetch_tuple()) is correct even if
+		 * the row is now dead, because that is the TID the index will know
+		 * about.
+		 */
+		index_insert(indexRel, values, isnull, &checktid,
+					 trigdata->tg_relation, UNIQUE_CHECK_EXISTING,
+					 false, indexInfo);
+	}
+	else
+	{
+		/*
+		 * For exclusion constraints we just do the normal check, but now it's
+		 * okay to throw error.  In the HOT-update case, we must use the live
+		 * HOT child's TID here, else check_exclusion_constraint will think
+		 * the child is a conflict.
+		 */
+		check_exclusion_constraint(trigdata->tg_relation, indexRel, indexInfo,
+								   &tmptid, values, isnull,
+								   estate, false);
+	}
+
+	/*
+	 * If that worked, then this index entry is unique or non-excluded, and we
+	 * are done.
+	 */
+	if (estate != NULL)
+		FreeExecutorState(estate);
+
+	ExecDropSingleTupleTableSlot(slot);
+
+	index_close(indexRel, RowExclusiveLock);
+
+	return PointerGetDatum(NULL);
+}
diff --git a/src/backend/commands/conversioncmds.c b/src/backend/commands/conversioncmds.c
new file mode 100644
index 0000000..67feda3
--- /dev/null
+++ b/src/backend/commands/conversioncmds.c
@@ -0,0 +1,139 @@
+/*-------------------------------------------------------------------------
+ *
+ * conversioncmds.c
+ *	  conversion creation command support code
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/conversioncmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_conversion.h"
+#include "catalog/pg_type.h"
+#include "commands/alter.h"
+#include "commands/conversioncmds.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+/*
+ * CREATE CONVERSION
+ */
+ObjectAddress
+CreateConversionCommand(CreateConversionStmt *stmt)
+{
+	Oid			namespaceId;
+	char	   *conversion_name;
+	AclResult	aclresult;
+	int			from_encoding;
+	int			to_encoding;
+	Oid			funcoid;
+	const char *from_encoding_name = stmt->for_encoding_name;
+	const char *to_encoding_name = stmt->to_encoding_name;
+	List	   *func_name = stmt->func_name;
+	static const Oid funcargs[] = {INT4OID, INT4OID, CSTRINGOID, INTERNALOID, INT4OID, BOOLOID};
+	char		result[1];
+	Datum		funcresult;
+
+	/* Convert list of names to a name and namespace */
+	namespaceId = QualifiedNameGetCreationNamespace(stmt->conversion_name,
+													&conversion_name);
+
+	/* Check we have creation rights in target namespace */
+	aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_SCHEMA,
+					   get_namespace_name(namespaceId));
+
+	/* Check the encoding names */
+	from_encoding = pg_char_to_encoding(from_encoding_name);
+	if (from_encoding < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("source encoding \"%s\" does not exist",
+						from_encoding_name)));
+
+	to_encoding = pg_char_to_encoding(to_encoding_name);
+	if (to_encoding < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("destination encoding \"%s\" does not exist",
+						to_encoding_name)));
+
+	/*
+	 * We consider conversions to or from SQL_ASCII to be meaningless.  (If
+	 * you wish to change this, note that pg_do_encoding_conversion() and its
+	 * sister functions have hard-wired fast paths for any conversion in which
+	 * the source or target encoding is SQL_ASCII, so that an encoding
+	 * conversion function declared for such a case will never be used.)
+	 */
+	if (from_encoding == PG_SQL_ASCII || to_encoding == PG_SQL_ASCII)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("encoding conversion to or from \"SQL_ASCII\" is not supported")));
+
+	/*
+	 * Check the existence of the conversion function. Function name could be
+	 * a qualified name.
+	 */
+	funcoid = LookupFuncName(func_name, sizeof(funcargs) / sizeof(Oid),
+							 funcargs, false);
+
+	/* Check it returns int4, else it's probably the wrong function */
+	if (get_func_rettype(funcoid) != INT4OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("encoding conversion function %s must return type %s",
+						NameListToString(func_name), "integer")));
+
+	/* Check we have EXECUTE rights for the function */
+	aclresult = pg_proc_aclcheck(funcoid, GetUserId(), ACL_EXECUTE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_FUNCTION,
+					   NameListToString(func_name));
+
+	/*
+	 * Check that the conversion function is suitable for the requested source
+	 * and target encodings. We do that by calling the function with an empty
+	 * string; the conversion function should throw an error if it can't
+	 * perform the requested conversion.
+	 */
+	funcresult = OidFunctionCall6(funcoid,
+								  Int32GetDatum(from_encoding),
+								  Int32GetDatum(to_encoding),
+								  CStringGetDatum(""),
+								  CStringGetDatum(result),
+								  Int32GetDatum(0),
+								  BoolGetDatum(false));
+
+	/*
+	 * The function should return 0 for empty input. Might as well check that,
+	 * too.
+	 */
+	if (DatumGetInt32(funcresult) != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("encoding conversion function %s returned incorrect result for empty input",
+						NameListToString(func_name))));
+
+	/*
+	 * All seem ok, go ahead (possible failure would be a duplicate conversion
+	 * name)
+	 */
+	return ConversionCreate(conversion_name, namespaceId, GetUserId(),
+							from_encoding, to_encoding, funcoid, stmt->def);
+}
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
new file mode 100644
index 0000000..cc1909e
--- /dev/null
+++ b/src/backend/commands/copy.c
@@ -0,0 +1,798 @@
+/*-------------------------------------------------------------------------
+ *
+ * copy.c
+ *		Implements the COPY utility command
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/copy.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/pg_authid.h"
+#include "commands/copy.h"
+#include "commands/defrem.h"
+#include "executor/executor.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "optimizer/optimizer.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_relation.h"
+#include "rewrite/rewriteHandler.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/rls.h"
+
+/*
+ *	 DoCopy executes the SQL COPY statement
+ *
+ * Either unload or reload contents of table <relation>, depending on <from>.
+ * (<from> = true means we are inserting into the table.)  In the "TO" case
+ * we also support copying the output of an arbitrary SELECT, INSERT, UPDATE
+ * or DELETE query.
+ *
+ * If <pipe> is false, transfer is between the table and the file named
+ * <filename>.  Otherwise, transfer is between the table and our regular
+ * input/output stream. The latter could be either stdin/stdout or a
+ * socket, depending on whether we're running under Postmaster control.
+ *
+ * Do not allow a Postgres user without the 'pg_read_server_files' or
+ * 'pg_write_server_files' role to read from or write to a file.
+ *
+ * Do not allow the copy if user doesn't have proper permission to access
+ * the table or the specifically requested columns.
+ */
+void
+DoCopy(ParseState *pstate, const CopyStmt *stmt,
+	   int stmt_location, int stmt_len,
+	   uint64 *processed)
+{
+	bool		is_from = stmt->is_from;
+	bool		pipe = (stmt->filename == NULL);
+	Relation	rel;
+	Oid			relid;
+	RawStmt    *query = NULL;
+	Node	   *whereClause = NULL;
+
+	/*
+	 * Disallow COPY to/from file or program except to users with the
+	 * appropriate role.
+	 */
+	if (!pipe)
+	{
+		if (stmt->is_program)
+		{
+			if (!has_privs_of_role(GetUserId(), ROLE_PG_EXECUTE_SERVER_PROGRAM))
+				ereport(ERROR,
+						(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+						 errmsg("must be superuser or have privileges of the pg_execute_server_program role to COPY to or from an external program"),
+						 errhint("Anyone can COPY to stdout or from stdin. "
+								 "psql's \\copy command also works for anyone.")));
+		}
+		else
+		{
+			if (is_from && !has_privs_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES))
+				ereport(ERROR,
+						(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+						 errmsg("must be superuser or have privileges of the pg_read_server_files role to COPY from a file"),
+						 errhint("Anyone can COPY to stdout or from stdin. "
+								 "psql's \\copy command also works for anyone.")));
+
+			if (!is_from && !has_privs_of_role(GetUserId(), ROLE_PG_WRITE_SERVER_FILES))
+				ereport(ERROR,
+						(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+						 errmsg("must be superuser or have privileges of the pg_write_server_files role to COPY to a file"),
+						 errhint("Anyone can COPY to stdout or from stdin. "
+								 "psql's \\copy command also works for anyone.")));
+		}
+	}
+
+	if (stmt->relation)
+	{
+		LOCKMODE	lockmode = is_from ? RowExclusiveLock : AccessShareLock;
+		ParseNamespaceItem *nsitem;
+		RangeTblEntry *rte;
+		TupleDesc	tupDesc;
+		List	   *attnums;
+		ListCell   *cur;
+
+		Assert(!stmt->query);
+
+		/* Open and lock the relation, using the appropriate lock type. */
+		rel = table_openrv(stmt->relation, lockmode);
+
+		relid = RelationGetRelid(rel);
+
+		nsitem = addRangeTableEntryForRelation(pstate, rel, lockmode,
+											   NULL, false, false);
+		rte = nsitem->p_rte;
+		rte->requiredPerms = (is_from ? ACL_INSERT : ACL_SELECT);
+
+		if (stmt->whereClause)
+		{
+			/* add nsitem to query namespace */
+			addNSItemToQuery(pstate, nsitem, false, true, true);
+
+			/* Transform the raw expression tree */
+			whereClause = transformExpr(pstate, stmt->whereClause, EXPR_KIND_COPY_WHERE);
+
+			/* Make sure it yields a boolean result. */
+			whereClause = coerce_to_boolean(pstate, whereClause, "WHERE");
+
+			/* we have to fix its collations too */
+			assign_expr_collations(pstate, whereClause);
+
+			whereClause = eval_const_expressions(NULL, whereClause);
+
+			whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false);
+			whereClause = (Node *) make_ands_implicit((Expr *) whereClause);
+		}
+
+		tupDesc = RelationGetDescr(rel);
+		attnums = CopyGetAttnums(tupDesc, rel, stmt->attlist);
+		foreach(cur, attnums)
+		{
+			int			attno = lfirst_int(cur) -
+			FirstLowInvalidHeapAttributeNumber;
+
+			if (is_from)
+				rte->insertedCols = bms_add_member(rte->insertedCols, attno);
+			else
+				rte->selectedCols = bms_add_member(rte->selectedCols, attno);
+		}
+		ExecCheckRTPerms(pstate->p_rtable, true);
+
+		/*
+		 * Permission check for row security policies.
+		 *
+		 * check_enable_rls will ereport(ERROR) if the user has requested
+		 * something invalid and will otherwise indicate if we should enable
+		 * RLS (returns RLS_ENABLED) or not for this COPY statement.
+		 *
+		 * If the relation has a row security policy and we are to apply it
+		 * then perform a "query" copy and allow the normal query processing
+		 * to handle the policies.
+		 *
+		 * If RLS is not enabled for this, then just fall through to the
+		 * normal non-filtering relation handling.
+		 */
+		if (check_enable_rls(rte->relid, InvalidOid, false) == RLS_ENABLED)
+		{
+			SelectStmt *select;
+			ColumnRef  *cr;
+			ResTarget  *target;
+			RangeVar   *from;
+			List	   *targetList = NIL;
+
+			if (is_from)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("COPY FROM not supported with row-level security"),
+						 errhint("Use INSERT statements instead.")));
+
+			/*
+			 * Build target list
+			 *
+			 * If no columns are specified in the attribute list of the COPY
+			 * command, then the target list is 'all' columns. Therefore, '*'
+			 * should be used as the target list for the resulting SELECT
+			 * statement.
+			 *
+			 * In the case that columns are specified in the attribute list,
+			 * create a ColumnRef and ResTarget for each column and add them
+			 * to the target list for the resulting SELECT statement.
+			 */
+			if (!stmt->attlist)
+			{
+				cr = makeNode(ColumnRef);
+				cr->fields = list_make1(makeNode(A_Star));
+				cr->location = -1;
+
+				target = makeNode(ResTarget);
+				target->name = NULL;
+				target->indirection = NIL;
+				target->val = (Node *) cr;
+				target->location = -1;
+
+				targetList = list_make1(target);
+			}
+			else
+			{
+				ListCell   *lc;
+
+				foreach(lc, stmt->attlist)
+				{
+					/*
+					 * Build the ColumnRef for each column.  The ColumnRef
+					 * 'fields' property is a String node that corresponds to
+					 * the column name respectively.
+					 */
+					cr = makeNode(ColumnRef);
+					cr->fields = list_make1(lfirst(lc));
+					cr->location = -1;
+
+					/* Build the ResTarget and add the ColumnRef to it. */
+					target = makeNode(ResTarget);
+					target->name = NULL;
+					target->indirection = NIL;
+					target->val = (Node *) cr;
+					target->location = -1;
+
+					/* Add each column to the SELECT statement's target list */
+					targetList = lappend(targetList, target);
+				}
+			}
+
+			/*
+			 * Build RangeVar for from clause, fully qualified based on the
+			 * relation which we have opened and locked.  Use "ONLY" so that
+			 * COPY retrieves rows from only the target table not any
+			 * inheritance children, the same as when RLS doesn't apply.
+			 */
+			from = makeRangeVar(get_namespace_name(RelationGetNamespace(rel)),
+								pstrdup(RelationGetRelationName(rel)),
+								-1);
+			from->inh = false;	/* apply ONLY */
+
+			/* Build query */
+			select = makeNode(SelectStmt);
+			select->targetList = targetList;
+			select->fromClause = list_make1(from);
+
+			query = makeNode(RawStmt);
+			query->stmt = (Node *) select;
+			query->stmt_location = stmt_location;
+			query->stmt_len = stmt_len;
+
+			/*
+			 * Close the relation for now, but keep the lock on it to prevent
+			 * changes between now and when we start the query-based COPY.
+			 *
+			 * We'll reopen it later as part of the query-based COPY.
+			 */
+			table_close(rel, NoLock);
+			rel = NULL;
+		}
+	}
+	else
+	{
+		Assert(stmt->query);
+
+		/* MERGE is allowed by parser, but unimplemented. Reject for now */
+		if (IsA(stmt->query, MergeStmt))
+			ereport(ERROR,
+					errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					errmsg("MERGE not supported in COPY"));
+
+		query = makeNode(RawStmt);
+		query->stmt = stmt->query;
+		query->stmt_location = stmt_location;
+		query->stmt_len = stmt_len;
+
+		relid = InvalidOid;
+		rel = NULL;
+	}
+
+	if (is_from)
+	{
+		CopyFromState cstate;
+
+		Assert(rel);
+
+		/* check read-only transaction and parallel mode */
+		if (XactReadOnly && !rel->rd_islocaltemp)
+			PreventCommandIfReadOnly("COPY FROM");
+
+		cstate = BeginCopyFrom(pstate, rel, whereClause,
+							   stmt->filename, stmt->is_program,
+							   NULL, stmt->attlist, stmt->options);
+		*processed = CopyFrom(cstate);	/* copy from file to database */
+		EndCopyFrom(cstate);
+	}
+	else
+	{
+		CopyToState cstate;
+
+		cstate = BeginCopyTo(pstate, rel, query, relid,
+							 stmt->filename, stmt->is_program,
+							 stmt->attlist, stmt->options);
+		*processed = DoCopyTo(cstate);	/* copy from database to file */
+		EndCopyTo(cstate);
+	}
+
+	if (rel != NULL)
+		table_close(rel, NoLock);
+}
+
+/*
+ * Extract a CopyHeaderChoice value from a DefElem.  This is like
+ * defGetBoolean() but also accepts the special value "match".
+ */
+static CopyHeaderChoice
+defGetCopyHeaderChoice(DefElem *def, bool is_from)
+{
+	/*
+	 * If no parameter given, assume "true" is meant.
+	 */
+	if (def->arg == NULL)
+		return COPY_HEADER_TRUE;
+
+	/*
+	 * Allow 0, 1, "true", "false", "on", "off", or "match".
+	 */
+	switch (nodeTag(def->arg))
+	{
+		case T_Integer:
+			switch (intVal(def->arg))
+			{
+				case 0:
+					return COPY_HEADER_FALSE;
+				case 1:
+					return COPY_HEADER_TRUE;
+				default:
+					/* otherwise, error out below */
+					break;
+			}
+			break;
+		default:
+			{
+				char	   *sval = defGetString(def);
+
+				/*
+				 * The set of strings accepted here should match up with the
+				 * grammar's opt_boolean_or_string production.
+				 */
+				if (pg_strcasecmp(sval, "true") == 0)
+					return COPY_HEADER_TRUE;
+				if (pg_strcasecmp(sval, "false") == 0)
+					return COPY_HEADER_FALSE;
+				if (pg_strcasecmp(sval, "on") == 0)
+					return COPY_HEADER_TRUE;
+				if (pg_strcasecmp(sval, "off") == 0)
+					return COPY_HEADER_FALSE;
+				if (pg_strcasecmp(sval, "match") == 0)
+				{
+					if (!is_from)
+						ereport(ERROR,
+								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+								 errmsg("cannot use \"%s\" with HEADER in COPY TO",
+										sval)));
+					return COPY_HEADER_MATCH;
+				}
+			}
+			break;
+	}
+	ereport(ERROR,
+			(errcode(ERRCODE_SYNTAX_ERROR),
+			 errmsg("%s requires a Boolean value or \"match\"",
+					def->defname)));
+	return COPY_HEADER_FALSE;	/* keep compiler quiet */
+}
+
+/*
+ * Process the statement option list for COPY.
+ *
+ * Scan the options list (a list of DefElem) and transpose the information
+ * into *opts_out, applying appropriate error checking.
+ *
+ * If 'opts_out' is not NULL, it is assumed to be filled with zeroes initially.
+ *
+ * This is exported so that external users of the COPY API can sanity-check
+ * a list of options.  In that usage, 'opts_out' can be passed as NULL and
+ * the collected data is just leaked until CurrentMemoryContext is reset.
+ *
+ * Note that additional checking, such as whether column names listed in FORCE
+ * QUOTE actually exist, has to be applied later.  This just checks for
+ * self-consistency of the options list.
+ */
+void
+ProcessCopyOptions(ParseState *pstate,
+				   CopyFormatOptions *opts_out,
+				   bool is_from,
+				   List *options)
+{
+	bool		format_specified = false;
+	bool		freeze_specified = false;
+	bool		header_specified = false;
+	ListCell   *option;
+
+	/* Support external use for option sanity checking */
+	if (opts_out == NULL)
+		opts_out = (CopyFormatOptions *) palloc0(sizeof(CopyFormatOptions));
+
+	opts_out->file_encoding = -1;
+
+	/* Extract options from the statement node tree */
+	foreach(option, options)
+	{
+		DefElem    *defel = lfirst_node(DefElem, option);
+
+		if (strcmp(defel->defname, "format") == 0)
+		{
+			char	   *fmt = defGetString(defel);
+
+			if (format_specified)
+				errorConflictingDefElem(defel, pstate);
+			format_specified = true;
+			if (strcmp(fmt, "text") == 0)
+				 /* default format */ ;
+			else if (strcmp(fmt, "csv") == 0)
+				opts_out->csv_mode = true;
+			else if (strcmp(fmt, "binary") == 0)
+				opts_out->binary = true;
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("COPY format \"%s\" not recognized", fmt),
+						 parser_errposition(pstate, defel->location)));
+		}
+		else if (strcmp(defel->defname, "freeze") == 0)
+		{
+			if (freeze_specified)
+				errorConflictingDefElem(defel, pstate);
+			freeze_specified = true;
+			opts_out->freeze = defGetBoolean(defel);
+		}
+		else if (strcmp(defel->defname, "delimiter") == 0)
+		{
+			if (opts_out->delim)
+				errorConflictingDefElem(defel, pstate);
+			opts_out->delim = defGetString(defel);
+		}
+		else if (strcmp(defel->defname, "null") == 0)
+		{
+			if (opts_out->null_print)
+				errorConflictingDefElem(defel, pstate);
+			opts_out->null_print = defGetString(defel);
+		}
+		else if (strcmp(defel->defname, "header") == 0)
+		{
+			if (header_specified)
+				errorConflictingDefElem(defel, pstate);
+			header_specified = true;
+			opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
+		}
+		else if (strcmp(defel->defname, "quote") == 0)
+		{
+			if (opts_out->quote)
+				errorConflictingDefElem(defel, pstate);
+			opts_out->quote = defGetString(defel);
+		}
+		else if (strcmp(defel->defname, "escape") == 0)
+		{
+			if (opts_out->escape)
+				errorConflictingDefElem(defel, pstate);
+			opts_out->escape = defGetString(defel);
+		}
+		else if (strcmp(defel->defname, "force_quote") == 0)
+		{
+			if (opts_out->force_quote || opts_out->force_quote_all)
+				errorConflictingDefElem(defel, pstate);
+			if (defel->arg && IsA(defel->arg, A_Star))
+				opts_out->force_quote_all = true;
+			else if (defel->arg && IsA(defel->arg, List))
+				opts_out->force_quote = castNode(List, defel->arg);
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("argument to option \"%s\" must be a list of column names",
+								defel->defname),
+						 parser_errposition(pstate, defel->location)));
+		}
+		else if (strcmp(defel->defname, "force_not_null") == 0)
+		{
+			if (opts_out->force_notnull)
+				errorConflictingDefElem(defel, pstate);
+			if (defel->arg && IsA(defel->arg, List))
+				opts_out->force_notnull = castNode(List, defel->arg);
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("argument to option \"%s\" must be a list of column names",
+								defel->defname),
+						 parser_errposition(pstate, defel->location)));
+		}
+		else if (strcmp(defel->defname, "force_null") == 0)
+		{
+			if (opts_out->force_null)
+				errorConflictingDefElem(defel, pstate);
+			if (defel->arg && IsA(defel->arg, List))
+				opts_out->force_null = castNode(List, defel->arg);
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("argument to option \"%s\" must be a list of column names",
+								defel->defname),
+						 parser_errposition(pstate, defel->location)));
+		}
+		else if (strcmp(defel->defname, "convert_selectively") == 0)
+		{
+			/*
+			 * Undocumented, not-accessible-from-SQL option: convert only the
+			 * named columns to binary form, storing the rest as NULLs. It's
+			 * allowed for the column list to be NIL.
+			 */
+			if (opts_out->convert_selectively)
+				errorConflictingDefElem(defel, pstate);
+			opts_out->convert_selectively = true;
+			if (defel->arg == NULL || IsA(defel->arg, List))
+				opts_out->convert_select = castNode(List, defel->arg);
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("argument to option \"%s\" must be a list of column names",
+								defel->defname),
+						 parser_errposition(pstate, defel->location)));
+		}
+		else if (strcmp(defel->defname, "encoding") == 0)
+		{
+			if (opts_out->file_encoding >= 0)
+				errorConflictingDefElem(defel, pstate);
+			opts_out->file_encoding = pg_char_to_encoding(defGetString(defel));
+			if (opts_out->file_encoding < 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("argument to option \"%s\" must be a valid encoding name",
+								defel->defname),
+						 parser_errposition(pstate, defel->location)));
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("option \"%s\" not recognized",
+							defel->defname),
+					 parser_errposition(pstate, defel->location)));
+	}
+
+	/*
+	 * Check for incompatible options (must do these two before inserting
+	 * defaults)
+	 */
+	if (opts_out->binary && opts_out->delim)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("cannot specify DELIMITER in BINARY mode")));
+
+	if (opts_out->binary && opts_out->null_print)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("cannot specify NULL in BINARY mode")));
+
+	/* Set defaults for omitted options */
+	if (!opts_out->delim)
+		opts_out->delim = opts_out->csv_mode ? "," : "\t";
+
+	if (!opts_out->null_print)
+		opts_out->null_print = opts_out->csv_mode ? "" : "\\N";
+	opts_out->null_print_len = strlen(opts_out->null_print);
+
+	if (opts_out->csv_mode)
+	{
+		if (!opts_out->quote)
+			opts_out->quote = "\"";
+		if (!opts_out->escape)
+			opts_out->escape = opts_out->quote;
+	}
+
+	/* Only single-byte delimiter strings are supported. */
+	if (strlen(opts_out->delim) != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY delimiter must be a single one-byte character")));
+
+	/* Disallow end-of-line characters */
+	if (strchr(opts_out->delim, '\r') != NULL ||
+		strchr(opts_out->delim, '\n') != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("COPY delimiter cannot be newline or carriage return")));
+
+	if (strchr(opts_out->null_print, '\r') != NULL ||
+		strchr(opts_out->null_print, '\n') != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("COPY null representation cannot use newline or carriage return")));
+
+	/*
+	 * Disallow unsafe delimiter characters in non-CSV mode.  We can't allow
+	 * backslash because it would be ambiguous.  We can't allow the other
+	 * cases because data characters matching the delimiter must be
+	 * backslashed, and certain backslash combinations are interpreted
+	 * non-literally by COPY IN.  Disallowing all lower case ASCII letters is
+	 * more than strictly necessary, but seems best for consistency and
+	 * future-proofing.  Likewise we disallow all digits though only octal
+	 * digits are actually dangerous.
+	 */
+	if (!opts_out->csv_mode &&
+		strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789",
+			   opts_out->delim[0]) != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
+
+	/* Check header */
+	if (opts_out->binary && opts_out->header_line)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot specify HEADER in BINARY mode")));
+
+	/* Check quote */
+	if (!opts_out->csv_mode && opts_out->quote != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY quote available only in CSV mode")));
+
+	if (opts_out->csv_mode && strlen(opts_out->quote) != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY quote must be a single one-byte character")));
+
+	if (opts_out->csv_mode && opts_out->delim[0] == opts_out->quote[0])
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("COPY delimiter and quote must be different")));
+
+	/* Check escape */
+	if (!opts_out->csv_mode && opts_out->escape != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY escape available only in CSV mode")));
+
+	if (opts_out->csv_mode && strlen(opts_out->escape) != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY escape must be a single one-byte character")));
+
+	/* Check force_quote */
+	if (!opts_out->csv_mode && (opts_out->force_quote || opts_out->force_quote_all))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY force quote available only in CSV mode")));
+	if ((opts_out->force_quote || opts_out->force_quote_all) && is_from)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY force quote only available using COPY TO")));
+
+	/* Check force_notnull */
+	if (!opts_out->csv_mode && opts_out->force_notnull != NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY force not null available only in CSV mode")));
+	if (opts_out->force_notnull != NIL && !is_from)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY force not null only available using COPY FROM")));
+
+	/* Check force_null */
+	if (!opts_out->csv_mode && opts_out->force_null != NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY force null available only in CSV mode")));
+
+	if (opts_out->force_null != NIL && !is_from)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY force null only available using COPY FROM")));
+
+	/* Don't allow the delimiter to appear in the null string. */
+	if (strchr(opts_out->null_print, opts_out->delim[0]) != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY delimiter must not appear in the NULL specification")));
+
+	/* Don't allow the CSV quote char to appear in the null string. */
+	if (opts_out->csv_mode &&
+		strchr(opts_out->null_print, opts_out->quote[0]) != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("CSV quote character must not appear in the NULL specification")));
+}
+
+/*
+ * CopyGetAttnums - build an integer list of attnums to be copied
+ *
+ * The input attnamelist is either the user-specified column list,
+ * or NIL if there was none (in which case we want all the non-dropped
+ * columns).
+ *
+ * We don't include generated columns in the generated full list and we don't
+ * allow them to be specified explicitly.  They don't make sense for COPY
+ * FROM, but we could possibly allow them for COPY TO.  But this way it's at
+ * least ensured that whatever we copy out can be copied back in.
+ *
+ * rel can be NULL ... it's only used for error reports.
+ */
+List *
+CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist)
+{
+	List	   *attnums = NIL;
+
+	if (attnamelist == NIL)
+	{
+		/* Generate default column list */
+		int			attr_count = tupDesc->natts;
+		int			i;
+
+		for (i = 0; i < attr_count; i++)
+		{
+			if (TupleDescAttr(tupDesc, i)->attisdropped)
+				continue;
+			if (TupleDescAttr(tupDesc, i)->attgenerated)
+				continue;
+			attnums = lappend_int(attnums, i + 1);
+		}
+	}
+	else
+	{
+		/* Validate the user-supplied list and extract attnums */
+		ListCell   *l;
+
+		foreach(l, attnamelist)
+		{
+			char	   *name = strVal(lfirst(l));
+			int			attnum;
+			int			i;
+
+			/* Lookup column name */
+			attnum = InvalidAttrNumber;
+			for (i = 0; i < tupDesc->natts; i++)
+			{
+				Form_pg_attribute att = TupleDescAttr(tupDesc, i);
+
+				if (att->attisdropped)
+					continue;
+				if (namestrcmp(&(att->attname), name) == 0)
+				{
+					if (att->attgenerated)
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+								 errmsg("column \"%s\" is a generated column",
+										name),
+								 errdetail("Generated columns cannot be used in COPY.")));
+					attnum = att->attnum;
+					break;
+				}
+			}
+			if (attnum == InvalidAttrNumber)
+			{
+				if (rel != NULL)
+					ereport(ERROR,
+							(errcode(ERRCODE_UNDEFINED_COLUMN),
+							 errmsg("column \"%s\" of relation \"%s\" does not exist",
+									name, RelationGetRelationName(rel))));
+				else
+					ereport(ERROR,
+							(errcode(ERRCODE_UNDEFINED_COLUMN),
+							 errmsg("column \"%s\" does not exist",
+									name)));
+			}
+			/* Check for duplicates */
+			if (list_member_int(attnums, attnum))
+				ereport(ERROR,
+						(errcode(ERRCODE_DUPLICATE_COLUMN),
+						 errmsg("column \"%s\" specified more than once",
+								name)));
+			attnums = lappend_int(attnums, attnum);
+		}
+	}
+
+	return attnums;
+}
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
new file mode 100644
index 0000000..c6dbd97
--- /dev/null
+++ b/src/backend/commands/copyfrom.c
@@ -0,0 +1,1624 @@
+/*-------------------------------------------------------------------------
+ *
+ * copyfrom.c
+ *		COPY <table> FROM file/program/client
+ *
+ * This file contains routines needed to efficiently load tuples into a
+ * table.  That includes looking up the correct partition, firing triggers,
+ * calling the table AM function to insert the data, and updating indexes.
+ * Reading data from the input file or client and parsing it into Datums
+ * is handled in copyfromparse.c.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/copyfrom.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "catalog/namespace.h"
+#include "commands/copy.h"
+#include "commands/copyfrom_internal.h"
+#include "commands/progress.h"
+#include "commands/trigger.h"
+#include "executor/execPartition.h"
+#include "executor/executor.h"
+#include "executor/nodeModifyTable.h"
+#include "executor/tuptable.h"
+#include "foreign/fdwapi.h"
+#include "libpq/libpq.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "optimizer/optimizer.h"
+#include "pgstat.h"
+#include "rewrite/rewriteHandler.h"
+#include "storage/fd.h"
+#include "tcop/tcopprot.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/portal.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+
+/*
+ * No more than this many tuples per CopyMultiInsertBuffer
+ *
+ * Caution: Don't make this too big, as we could end up with this many
+ * CopyMultiInsertBuffer items stored in CopyMultiInsertInfo's
+ * multiInsertBuffers list.  Increasing this can cause quadratic growth in
+ * memory requirements during copies into partitioned tables with a large
+ * number of partitions.
+ */
+#define MAX_BUFFERED_TUPLES		1000
+
+/*
+ * Flush buffers if there are >= this many bytes, as counted by the input
+ * size, of tuples stored.
+ */
+#define MAX_BUFFERED_BYTES		65535
+
+/* Trim the list of buffers back down to this number after flushing */
+#define MAX_PARTITION_BUFFERS	32
+
+/* Stores multi-insert data related to a single relation in CopyFrom. */
+typedef struct CopyMultiInsertBuffer
+{
+	TupleTableSlot *slots[MAX_BUFFERED_TUPLES]; /* Array to store tuples */
+	ResultRelInfo *resultRelInfo;	/* ResultRelInfo for 'relid' */
+	BulkInsertState bistate;	/* BulkInsertState for this rel */
+	int			nused;			/* number of 'slots' containing tuples */
+	uint64		linenos[MAX_BUFFERED_TUPLES];	/* Line # of tuple in copy
+												 * stream */
+} CopyMultiInsertBuffer;
+
+/*
+ * Stores one or many CopyMultiInsertBuffers and details about the size and
+ * number of tuples which are stored in them.  This allows multiple buffers to
+ * exist at once when COPYing into a partitioned table.
+ */
+typedef struct CopyMultiInsertInfo
+{
+	List	   *multiInsertBuffers; /* List of tracked CopyMultiInsertBuffers */
+	int			bufferedTuples; /* number of tuples buffered over all buffers */
+	int			bufferedBytes;	/* number of bytes from all buffered tuples */
+	CopyFromState cstate;		/* Copy state for this CopyMultiInsertInfo */
+	EState	   *estate;			/* Executor state used for COPY */
+	CommandId	mycid;			/* Command Id used for COPY */
+	int			ti_options;		/* table insert options */
+} CopyMultiInsertInfo;
+
+
+/* non-export function prototypes */
+static char *limit_printout_length(const char *str);
+
+static void ClosePipeFromProgram(CopyFromState cstate);
+
+/*
+ * error context callback for COPY FROM
+ *
+ * The argument for the error context must be CopyFromState.
+ */
+void
+CopyFromErrorCallback(void *arg)
+{
+	CopyFromState cstate = (CopyFromState) arg;
+
+	if (cstate->opts.binary)
+	{
+		/* can't usefully display the data */
+		if (cstate->cur_attname)
+			errcontext("COPY %s, line %llu, column %s",
+					   cstate->cur_relname,
+					   (unsigned long long) cstate->cur_lineno,
+					   cstate->cur_attname);
+		else
+			errcontext("COPY %s, line %llu",
+					   cstate->cur_relname,
+					   (unsigned long long) cstate->cur_lineno);
+	}
+	else
+	{
+		if (cstate->cur_attname && cstate->cur_attval)
+		{
+			/* error is relevant to a particular column */
+			char	   *attval;
+
+			attval = limit_printout_length(cstate->cur_attval);
+			errcontext("COPY %s, line %llu, column %s: \"%s\"",
+					   cstate->cur_relname,
+					   (unsigned long long) cstate->cur_lineno,
+					   cstate->cur_attname,
+					   attval);
+			pfree(attval);
+		}
+		else if (cstate->cur_attname)
+		{
+			/* error is relevant to a particular column, value is NULL */
+			errcontext("COPY %s, line %llu, column %s: null input",
+					   cstate->cur_relname,
+					   (unsigned long long) cstate->cur_lineno,
+					   cstate->cur_attname);
+		}
+		else
+		{
+			/*
+			 * Error is relevant to a particular line.
+			 *
+			 * If line_buf still contains the correct line, print it.
+			 */
+			if (cstate->line_buf_valid)
+			{
+				char	   *lineval;
+
+				lineval = limit_printout_length(cstate->line_buf.data);
+				errcontext("COPY %s, line %llu: \"%s\"",
+						   cstate->cur_relname,
+						   (unsigned long long) cstate->cur_lineno, lineval);
+				pfree(lineval);
+			}
+			else
+			{
+				errcontext("COPY %s, line %llu",
+						   cstate->cur_relname,
+						   (unsigned long long) cstate->cur_lineno);
+			}
+		}
+	}
+}
+
+/*
+ * Make sure we don't print an unreasonable amount of COPY data in a message.
+ *
+ * Returns a pstrdup'd copy of the input.
+ */
+static char *
+limit_printout_length(const char *str)
+{
+#define MAX_COPY_DATA_DISPLAY 100
+
+	int			slen = strlen(str);
+	int			len;
+	char	   *res;
+
+	/* Fast path if definitely okay */
+	if (slen <= MAX_COPY_DATA_DISPLAY)
+		return pstrdup(str);
+
+	/* Apply encoding-dependent truncation */
+	len = pg_mbcliplen(str, slen, MAX_COPY_DATA_DISPLAY);
+
+	/*
+	 * Truncate, and add "..." to show we truncated the input.
+	 */
+	res = (char *) palloc(len + 4);
+	memcpy(res, str, len);
+	strcpy(res + len, "...");
+
+	return res;
+}
+
+/*
+ * Allocate memory and initialize a new CopyMultiInsertBuffer for this
+ * ResultRelInfo.
+ */
+static CopyMultiInsertBuffer *
+CopyMultiInsertBufferInit(ResultRelInfo *rri)
+{
+	CopyMultiInsertBuffer *buffer;
+
+	buffer = (CopyMultiInsertBuffer *) palloc(sizeof(CopyMultiInsertBuffer));
+	memset(buffer->slots, 0, sizeof(TupleTableSlot *) * MAX_BUFFERED_TUPLES);
+	buffer->resultRelInfo = rri;
+	buffer->bistate = GetBulkInsertState();
+	buffer->nused = 0;
+
+	return buffer;
+}
+
+/*
+ * Make a new buffer for this ResultRelInfo.
+ */
+static inline void
+CopyMultiInsertInfoSetupBuffer(CopyMultiInsertInfo *miinfo,
+							   ResultRelInfo *rri)
+{
+	CopyMultiInsertBuffer *buffer;
+
+	buffer = CopyMultiInsertBufferInit(rri);
+
+	/* Setup back-link so we can easily find this buffer again */
+	rri->ri_CopyMultiInsertBuffer = buffer;
+	/* Record that we're tracking this buffer */
+	miinfo->multiInsertBuffers = lappend(miinfo->multiInsertBuffers, buffer);
+}
+
+/*
+ * Initialize an already allocated CopyMultiInsertInfo.
+ *
+ * If rri is a non-partitioned table then a CopyMultiInsertBuffer is set up
+ * for that table.
+ */
+static void
+CopyMultiInsertInfoInit(CopyMultiInsertInfo *miinfo, ResultRelInfo *rri,
+						CopyFromState cstate, EState *estate, CommandId mycid,
+						int ti_options)
+{
+	miinfo->multiInsertBuffers = NIL;
+	miinfo->bufferedTuples = 0;
+	miinfo->bufferedBytes = 0;
+	miinfo->cstate = cstate;
+	miinfo->estate = estate;
+	miinfo->mycid = mycid;
+	miinfo->ti_options = ti_options;
+
+	/*
+	 * Only setup the buffer when not dealing with a partitioned table.
+	 * Buffers for partitioned tables will just be setup when we need to send
+	 * tuples their way for the first time.
+	 */
+	if (rri->ri_RelationDesc->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		CopyMultiInsertInfoSetupBuffer(miinfo, rri);
+}
+
+/*
+ * Returns true if the buffers are full
+ */
+static inline bool
+CopyMultiInsertInfoIsFull(CopyMultiInsertInfo *miinfo)
+{
+	if (miinfo->bufferedTuples >= MAX_BUFFERED_TUPLES ||
+		miinfo->bufferedBytes >= MAX_BUFFERED_BYTES)
+		return true;
+	return false;
+}
+
+/*
+ * Returns true if we have no buffered tuples
+ */
+static inline bool
+CopyMultiInsertInfoIsEmpty(CopyMultiInsertInfo *miinfo)
+{
+	return miinfo->bufferedTuples == 0;
+}
+
+/*
+ * Write the tuples stored in 'buffer' out to the table.
+ */
+static inline void
+CopyMultiInsertBufferFlush(CopyMultiInsertInfo *miinfo,
+						   CopyMultiInsertBuffer *buffer)
+{
+	MemoryContext oldcontext;
+	int			i;
+	uint64		save_cur_lineno;
+	CopyFromState cstate = miinfo->cstate;
+	EState	   *estate = miinfo->estate;
+	CommandId	mycid = miinfo->mycid;
+	int			ti_options = miinfo->ti_options;
+	bool		line_buf_valid = cstate->line_buf_valid;
+	int			nused = buffer->nused;
+	ResultRelInfo *resultRelInfo = buffer->resultRelInfo;
+	TupleTableSlot **slots = buffer->slots;
+
+	/*
+	 * Print error context information correctly, if one of the operations
+	 * below fails.
+	 */
+	cstate->line_buf_valid = false;
+	save_cur_lineno = cstate->cur_lineno;
+
+	/*
+	 * table_multi_insert may leak memory, so switch to short-lived memory
+	 * context before calling it.
+	 */
+	oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+	table_multi_insert(resultRelInfo->ri_RelationDesc,
+					   slots,
+					   nused,
+					   mycid,
+					   ti_options,
+					   buffer->bistate);
+	MemoryContextSwitchTo(oldcontext);
+
+	for (i = 0; i < nused; i++)
+	{
+		/*
+		 * If there are any indexes, update them for all the inserted tuples,
+		 * and run AFTER ROW INSERT triggers.
+		 */
+		if (resultRelInfo->ri_NumIndices > 0)
+		{
+			List	   *recheckIndexes;
+
+			cstate->cur_lineno = buffer->linenos[i];
+			recheckIndexes =
+				ExecInsertIndexTuples(resultRelInfo,
+									  buffer->slots[i], estate, false, false,
+									  NULL, NIL);
+			ExecARInsertTriggers(estate, resultRelInfo,
+								 slots[i], recheckIndexes,
+								 cstate->transition_capture);
+			list_free(recheckIndexes);
+		}
+
+		/*
+		 * There's no indexes, but see if we need to run AFTER ROW INSERT
+		 * triggers anyway.
+		 */
+		else if (resultRelInfo->ri_TrigDesc != NULL &&
+				 (resultRelInfo->ri_TrigDesc->trig_insert_after_row ||
+				  resultRelInfo->ri_TrigDesc->trig_insert_new_table))
+		{
+			cstate->cur_lineno = buffer->linenos[i];
+			ExecARInsertTriggers(estate, resultRelInfo,
+								 slots[i], NIL, cstate->transition_capture);
+		}
+
+		ExecClearTuple(slots[i]);
+	}
+
+	/* Mark that all slots are free */
+	buffer->nused = 0;
+
+	/* reset cur_lineno and line_buf_valid to what they were */
+	cstate->line_buf_valid = line_buf_valid;
+	cstate->cur_lineno = save_cur_lineno;
+}
+
+/*
+ * Drop used slots and free member for this buffer.
+ *
+ * The buffer must be flushed before cleanup.
+ */
+static inline void
+CopyMultiInsertBufferCleanup(CopyMultiInsertInfo *miinfo,
+							 CopyMultiInsertBuffer *buffer)
+{
+	int			i;
+
+	/* Ensure buffer was flushed */
+	Assert(buffer->nused == 0);
+
+	/* Remove back-link to ourself */
+	buffer->resultRelInfo->ri_CopyMultiInsertBuffer = NULL;
+
+	FreeBulkInsertState(buffer->bistate);
+
+	/* Since we only create slots on demand, just drop the non-null ones. */
+	for (i = 0; i < MAX_BUFFERED_TUPLES && buffer->slots[i] != NULL; i++)
+		ExecDropSingleTupleTableSlot(buffer->slots[i]);
+
+	table_finish_bulk_insert(buffer->resultRelInfo->ri_RelationDesc,
+							 miinfo->ti_options);
+
+	pfree(buffer);
+}
+
+/*
+ * Write out all stored tuples in all buffers out to the tables.
+ *
+ * Once flushed we also trim the tracked buffers list down to size by removing
+ * the buffers created earliest first.
+ *
+ * Callers should pass 'curr_rri' as the ResultRelInfo that's currently being
+ * used.  When cleaning up old buffers we'll never remove the one for
+ * 'curr_rri'.
+ */
+static inline void
+CopyMultiInsertInfoFlush(CopyMultiInsertInfo *miinfo, ResultRelInfo *curr_rri)
+{
+	ListCell   *lc;
+
+	foreach(lc, miinfo->multiInsertBuffers)
+	{
+		CopyMultiInsertBuffer *buffer = (CopyMultiInsertBuffer *) lfirst(lc);
+
+		CopyMultiInsertBufferFlush(miinfo, buffer);
+	}
+
+	miinfo->bufferedTuples = 0;
+	miinfo->bufferedBytes = 0;
+
+	/*
+	 * Trim the list of tracked buffers down if it exceeds the limit.  Here we
+	 * remove buffers starting with the ones we created first.  It seems less
+	 * likely that these older ones will be needed than the ones that were
+	 * just created.
+	 */
+	while (list_length(miinfo->multiInsertBuffers) > MAX_PARTITION_BUFFERS)
+	{
+		CopyMultiInsertBuffer *buffer;
+
+		buffer = (CopyMultiInsertBuffer *) linitial(miinfo->multiInsertBuffers);
+
+		/*
+		 * We never want to remove the buffer that's currently being used, so
+		 * if we happen to find that then move it to the end of the list.
+		 */
+		if (buffer->resultRelInfo == curr_rri)
+		{
+			miinfo->multiInsertBuffers = list_delete_first(miinfo->multiInsertBuffers);
+			miinfo->multiInsertBuffers = lappend(miinfo->multiInsertBuffers, buffer);
+			buffer = (CopyMultiInsertBuffer *) linitial(miinfo->multiInsertBuffers);
+		}
+
+		CopyMultiInsertBufferCleanup(miinfo, buffer);
+		miinfo->multiInsertBuffers = list_delete_first(miinfo->multiInsertBuffers);
+	}
+}
+
+/*
+ * Cleanup allocated buffers and free memory
+ */
+static inline void
+CopyMultiInsertInfoCleanup(CopyMultiInsertInfo *miinfo)
+{
+	ListCell   *lc;
+
+	foreach(lc, miinfo->multiInsertBuffers)
+		CopyMultiInsertBufferCleanup(miinfo, lfirst(lc));
+
+	list_free(miinfo->multiInsertBuffers);
+}
+
+/*
+ * Get the next TupleTableSlot that the next tuple should be stored in.
+ *
+ * Callers must ensure that the buffer is not full.
+ *
+ * Note: 'miinfo' is unused but has been included for consistency with the
+ * other functions in this area.
+ */
+static inline TupleTableSlot *
+CopyMultiInsertInfoNextFreeSlot(CopyMultiInsertInfo *miinfo,
+								ResultRelInfo *rri)
+{
+	CopyMultiInsertBuffer *buffer = rri->ri_CopyMultiInsertBuffer;
+	int			nused = buffer->nused;
+
+	Assert(buffer != NULL);
+	Assert(nused < MAX_BUFFERED_TUPLES);
+
+	if (buffer->slots[nused] == NULL)
+		buffer->slots[nused] = table_slot_create(rri->ri_RelationDesc, NULL);
+	return buffer->slots[nused];
+}
+
+/*
+ * Record the previously reserved TupleTableSlot that was reserved by
+ * CopyMultiInsertInfoNextFreeSlot as being consumed.
+ */
+static inline void
+CopyMultiInsertInfoStore(CopyMultiInsertInfo *miinfo, ResultRelInfo *rri,
+						 TupleTableSlot *slot, int tuplen, uint64 lineno)
+{
+	CopyMultiInsertBuffer *buffer = rri->ri_CopyMultiInsertBuffer;
+
+	Assert(buffer != NULL);
+	Assert(slot == buffer->slots[buffer->nused]);
+
+	/* Store the line number so we can properly report any errors later */
+	buffer->linenos[buffer->nused] = lineno;
+
+	/* Record this slot as being used */
+	buffer->nused++;
+
+	/* Update how many tuples are stored and their size */
+	miinfo->bufferedTuples++;
+	miinfo->bufferedBytes += tuplen;
+}
+
+/*
+ * Copy FROM file to relation.
+ */
+uint64
+CopyFrom(CopyFromState cstate)
+{
+	ResultRelInfo *resultRelInfo;
+	ResultRelInfo *target_resultRelInfo;
+	ResultRelInfo *prevResultRelInfo = NULL;
+	EState	   *estate = CreateExecutorState(); /* for ExecConstraints() */
+	ModifyTableState *mtstate;
+	ExprContext *econtext;
+	TupleTableSlot *singleslot = NULL;
+	MemoryContext oldcontext = CurrentMemoryContext;
+
+	PartitionTupleRouting *proute = NULL;
+	ErrorContextCallback errcallback;
+	CommandId	mycid = GetCurrentCommandId(true);
+	int			ti_options = 0; /* start with default options for insert */
+	BulkInsertState bistate = NULL;
+	CopyInsertMethod insertMethod;
+	CopyMultiInsertInfo multiInsertInfo = {0};	/* pacify compiler */
+	int64		processed = 0;
+	int64		excluded = 0;
+	bool		has_before_insert_row_trig;
+	bool		has_instead_insert_row_trig;
+	bool		leafpart_use_multi_insert = false;
+
+	Assert(cstate->rel);
+	Assert(list_length(cstate->range_table) == 1);
+
+	/*
+	 * The target must be a plain, foreign, or partitioned relation, or have
+	 * an INSTEAD OF INSERT row trigger.  (Currently, such triggers are only
+	 * allowed on views, so we only hint about them in the view case.)
+	 */
+	if (cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
+		cstate->rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+		cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
+		!(cstate->rel->trigdesc &&
+		  cstate->rel->trigdesc->trig_insert_instead_row))
+	{
+		if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot copy to view \"%s\"",
+							RelationGetRelationName(cstate->rel)),
+					 errhint("To enable copying to a view, provide an INSTEAD OF INSERT trigger.")));
+		else if (cstate->rel->rd_rel->relkind == RELKIND_MATVIEW)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot copy to materialized view \"%s\"",
+							RelationGetRelationName(cstate->rel))));
+		else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot copy to sequence \"%s\"",
+							RelationGetRelationName(cstate->rel))));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot copy to non-table relation \"%s\"",
+							RelationGetRelationName(cstate->rel))));
+	}
+
+	/*
+	 * If the target file is new-in-transaction, we assume that checking FSM
+	 * for free space is a waste of time.  This could possibly be wrong, but
+	 * it's unlikely.
+	 */
+	if (RELKIND_HAS_STORAGE(cstate->rel->rd_rel->relkind) &&
+		(cstate->rel->rd_createSubid != InvalidSubTransactionId ||
+		 cstate->rel->rd_firstRelfilenodeSubid != InvalidSubTransactionId))
+		ti_options |= TABLE_INSERT_SKIP_FSM;
+
+	/*
+	 * Optimize if new relfilenode was created in this subxact or one of its
+	 * committed children and we won't see those rows later as part of an
+	 * earlier scan or command. The subxact test ensures that if this subxact
+	 * aborts then the frozen rows won't be visible after xact cleanup.  Note
+	 * that the stronger test of exactly which subtransaction created it is
+	 * crucial for correctness of this optimization. The test for an earlier
+	 * scan or command tolerates false negatives. FREEZE causes other sessions
+	 * to see rows they would not see under MVCC, and a false negative merely
+	 * spreads that anomaly to the current session.
+	 */
+	if (cstate->opts.freeze)
+	{
+		/*
+		 * We currently disallow COPY FREEZE on partitioned tables.  The
+		 * reason for this is that we've simply not yet opened the partitions
+		 * to determine if the optimization can be applied to them.  We could
+		 * go and open them all here, but doing so may be quite a costly
+		 * overhead for small copies.  In any case, we may just end up routing
+		 * tuples to a small number of partitions.  It seems better just to
+		 * raise an ERROR for partitioned tables.
+		 */
+		if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot perform COPY FREEZE on a partitioned table")));
+		}
+
+		/*
+		 * Tolerate one registration for the benefit of FirstXactSnapshot.
+		 * Scan-bearing queries generally create at least two registrations,
+		 * though relying on that is fragile, as is ignoring ActiveSnapshot.
+		 * Clear CatalogSnapshot to avoid counting its registration.  We'll
+		 * still detect ongoing catalog scans, each of which separately
+		 * registers the snapshot it uses.
+		 */
+		InvalidateCatalogSnapshot();
+		if (!ThereAreNoPriorRegisteredSnapshots() || !ThereAreNoReadyPortals())
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+					 errmsg("cannot perform COPY FREEZE because of prior transaction activity")));
+
+		if (cstate->rel->rd_createSubid != GetCurrentSubTransactionId() &&
+			cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId())
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("cannot perform COPY FREEZE because the table was not created or truncated in the current subtransaction")));
+
+		ti_options |= TABLE_INSERT_FROZEN;
+	}
+
+	/*
+	 * We need a ResultRelInfo so we can use the regular executor's
+	 * index-entry-making machinery.  (There used to be a huge amount of code
+	 * here that basically duplicated execUtils.c ...)
+	 */
+	ExecInitRangeTable(estate, cstate->range_table);
+	resultRelInfo = target_resultRelInfo = makeNode(ResultRelInfo);
+	ExecInitResultRelation(estate, resultRelInfo, 1);
+
+	/* Verify the named relation is a valid target for INSERT */
+	CheckValidResultRel(resultRelInfo, CMD_INSERT);
+
+	ExecOpenIndices(resultRelInfo, false);
+
+	/*
+	 * Set up a ModifyTableState so we can let FDW(s) init themselves for
+	 * foreign-table result relation(s).
+	 */
+	mtstate = makeNode(ModifyTableState);
+	mtstate->ps.plan = NULL;
+	mtstate->ps.state = estate;
+	mtstate->operation = CMD_INSERT;
+	mtstate->mt_nrels = 1;
+	mtstate->resultRelInfo = resultRelInfo;
+	mtstate->rootResultRelInfo = resultRelInfo;
+
+	if (resultRelInfo->ri_FdwRoutine != NULL &&
+		resultRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
+		resultRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate,
+														 resultRelInfo);
+
+	/* Prepare to catch AFTER triggers. */
+	AfterTriggerBeginQuery();
+
+	/*
+	 * If there are any triggers with transition tables on the named relation,
+	 * we need to be prepared to capture transition tuples.
+	 *
+	 * Because partition tuple routing would like to know about whether
+	 * transition capture is active, we also set it in mtstate, which is
+	 * passed to ExecFindPartition() below.
+	 */
+	cstate->transition_capture = mtstate->mt_transition_capture =
+		MakeTransitionCaptureState(cstate->rel->trigdesc,
+								   RelationGetRelid(cstate->rel),
+								   CMD_INSERT);
+
+	/*
+	 * If the named relation is a partitioned table, initialize state for
+	 * CopyFrom tuple routing.
+	 */
+	if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		proute = ExecSetupPartitionTupleRouting(estate, cstate->rel);
+
+	if (cstate->whereClause)
+		cstate->qualexpr = ExecInitQual(castNode(List, cstate->whereClause),
+										&mtstate->ps);
+
+	/*
+	 * It's generally more efficient to prepare a bunch of tuples for
+	 * insertion, and insert them in one table_multi_insert() call, than call
+	 * table_tuple_insert() separately for every tuple. However, there are a
+	 * number of reasons why we might not be able to do this.  These are
+	 * explained below.
+	 */
+	if (resultRelInfo->ri_TrigDesc != NULL &&
+		(resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
+		 resultRelInfo->ri_TrigDesc->trig_insert_instead_row))
+	{
+		/*
+		 * Can't support multi-inserts when there are any BEFORE/INSTEAD OF
+		 * triggers on the table. Such triggers might query the table we're
+		 * inserting into and act differently if the tuples that have already
+		 * been processed and prepared for insertion are not there.
+		 */
+		insertMethod = CIM_SINGLE;
+	}
+	else if (proute != NULL && resultRelInfo->ri_TrigDesc != NULL &&
+			 resultRelInfo->ri_TrigDesc->trig_insert_new_table)
+	{
+		/*
+		 * For partitioned tables we can't support multi-inserts when there
+		 * are any statement level insert triggers. It might be possible to
+		 * allow partitioned tables with such triggers in the future, but for
+		 * now, CopyMultiInsertInfoFlush expects that any after row insert and
+		 * statement level insert triggers are on the same relation.
+		 */
+		insertMethod = CIM_SINGLE;
+	}
+	else if (resultRelInfo->ri_FdwRoutine != NULL ||
+			 cstate->volatile_defexprs)
+	{
+		/*
+		 * Can't support multi-inserts to foreign tables or if there are any
+		 * volatile default expressions in the table.  Similarly to the
+		 * trigger case above, such expressions may query the table we're
+		 * inserting into.
+		 *
+		 * Note: It does not matter if any partitions have any volatile
+		 * default expressions as we use the defaults from the target of the
+		 * COPY command.
+		 */
+		insertMethod = CIM_SINGLE;
+	}
+	else if (contain_volatile_functions(cstate->whereClause))
+	{
+		/*
+		 * Can't support multi-inserts if there are any volatile function
+		 * expressions in WHERE clause.  Similarly to the trigger case above,
+		 * such expressions may query the table we're inserting into.
+		 */
+		insertMethod = CIM_SINGLE;
+	}
+	else
+	{
+		/*
+		 * For partitioned tables, we may still be able to perform bulk
+		 * inserts.  However, the possibility of this depends on which types
+		 * of triggers exist on the partition.  We must disable bulk inserts
+		 * if the partition is a foreign table or it has any before row insert
+		 * or insert instead triggers (same as we checked above for the parent
+		 * table).  Since the partition's resultRelInfos are initialized only
+		 * when we actually need to insert the first tuple into them, we must
+		 * have the intermediate insert method of CIM_MULTI_CONDITIONAL to
+		 * flag that we must later determine if we can use bulk-inserts for
+		 * the partition being inserted into.
+		 */
+		if (proute)
+			insertMethod = CIM_MULTI_CONDITIONAL;
+		else
+			insertMethod = CIM_MULTI;
+
+		CopyMultiInsertInfoInit(&multiInsertInfo, resultRelInfo, cstate,
+								estate, mycid, ti_options);
+	}
+
+	/*
+	 * If not using batch mode (which allocates slots as needed) set up a
+	 * tuple slot too. When inserting into a partitioned table, we also need
+	 * one, even if we might batch insert, to read the tuple in the root
+	 * partition's form.
+	 */
+	if (insertMethod == CIM_SINGLE || insertMethod == CIM_MULTI_CONDITIONAL)
+	{
+		singleslot = table_slot_create(resultRelInfo->ri_RelationDesc,
+									   &estate->es_tupleTable);
+		bistate = GetBulkInsertState();
+	}
+
+	has_before_insert_row_trig = (resultRelInfo->ri_TrigDesc &&
+								  resultRelInfo->ri_TrigDesc->trig_insert_before_row);
+
+	has_instead_insert_row_trig = (resultRelInfo->ri_TrigDesc &&
+								   resultRelInfo->ri_TrigDesc->trig_insert_instead_row);
+
+	/*
+	 * Check BEFORE STATEMENT insertion triggers. It's debatable whether we
+	 * should do this for COPY, since it's not really an "INSERT" statement as
+	 * such. However, executing these triggers maintains consistency with the
+	 * EACH ROW triggers that we already fire on COPY.
+	 */
+	ExecBSInsertTriggers(estate, resultRelInfo);
+
+	econtext = GetPerTupleExprContext(estate);
+
+	/* Set up callback to identify error line number */
+	errcallback.callback = CopyFromErrorCallback;
+	errcallback.arg = (void *) cstate;
+	errcallback.previous = error_context_stack;
+	error_context_stack = &errcallback;
+
+	for (;;)
+	{
+		TupleTableSlot *myslot;
+		bool		skip_tuple;
+
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * Reset the per-tuple exprcontext. We do this after every tuple, to
+		 * clean-up after expression evaluations etc.
+		 */
+		ResetPerTupleExprContext(estate);
+
+		/* select slot to (initially) load row into */
+		if (insertMethod == CIM_SINGLE || proute)
+		{
+			myslot = singleslot;
+			Assert(myslot != NULL);
+		}
+		else
+		{
+			Assert(resultRelInfo == target_resultRelInfo);
+			Assert(insertMethod == CIM_MULTI);
+
+			myslot = CopyMultiInsertInfoNextFreeSlot(&multiInsertInfo,
+													 resultRelInfo);
+		}
+
+		/*
+		 * Switch to per-tuple context before calling NextCopyFrom, which does
+		 * evaluate default expressions etc. and requires per-tuple context.
+		 */
+		MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+
+		ExecClearTuple(myslot);
+
+		/* Directly store the values/nulls array in the slot */
+		if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
+			break;
+
+		ExecStoreVirtualTuple(myslot);
+
+		/*
+		 * Constraints and where clause might reference the tableoid column,
+		 * so (re-)initialize tts_tableOid before evaluating them.
+		 */
+		myslot->tts_tableOid = RelationGetRelid(target_resultRelInfo->ri_RelationDesc);
+
+		/* Triggers and stuff need to be invoked in query context. */
+		MemoryContextSwitchTo(oldcontext);
+
+		if (cstate->whereClause)
+		{
+			econtext->ecxt_scantuple = myslot;
+			/* Skip items that don't match COPY's WHERE clause */
+			if (!ExecQual(cstate->qualexpr, econtext))
+			{
+				/*
+				 * Report that this tuple was filtered out by the WHERE
+				 * clause.
+				 */
+				pgstat_progress_update_param(PROGRESS_COPY_TUPLES_EXCLUDED,
+											 ++excluded);
+				continue;
+			}
+		}
+
+		/* Determine the partition to insert the tuple into */
+		if (proute)
+		{
+			TupleConversionMap *map;
+
+			/*
+			 * Attempt to find a partition suitable for this tuple.
+			 * ExecFindPartition() will raise an error if none can be found or
+			 * if the found partition is not suitable for INSERTs.
+			 */
+			resultRelInfo = ExecFindPartition(mtstate, target_resultRelInfo,
+											  proute, myslot, estate);
+
+			if (prevResultRelInfo != resultRelInfo)
+			{
+				/* Determine which triggers exist on this partition */
+				has_before_insert_row_trig = (resultRelInfo->ri_TrigDesc &&
+											  resultRelInfo->ri_TrigDesc->trig_insert_before_row);
+
+				has_instead_insert_row_trig = (resultRelInfo->ri_TrigDesc &&
+											   resultRelInfo->ri_TrigDesc->trig_insert_instead_row);
+
+				/*
+				 * Disable multi-inserts when the partition has BEFORE/INSTEAD
+				 * OF triggers, or if the partition is a foreign partition.
+				 */
+				leafpart_use_multi_insert = insertMethod == CIM_MULTI_CONDITIONAL &&
+					!has_before_insert_row_trig &&
+					!has_instead_insert_row_trig &&
+					resultRelInfo->ri_FdwRoutine == NULL;
+
+				/* Set the multi-insert buffer to use for this partition. */
+				if (leafpart_use_multi_insert)
+				{
+					if (resultRelInfo->ri_CopyMultiInsertBuffer == NULL)
+						CopyMultiInsertInfoSetupBuffer(&multiInsertInfo,
+													   resultRelInfo);
+				}
+				else if (insertMethod == CIM_MULTI_CONDITIONAL &&
+						 !CopyMultiInsertInfoIsEmpty(&multiInsertInfo))
+				{
+					/*
+					 * Flush pending inserts if this partition can't use
+					 * batching, so rows are visible to triggers etc.
+					 */
+					CopyMultiInsertInfoFlush(&multiInsertInfo, resultRelInfo);
+				}
+
+				if (bistate != NULL)
+					ReleaseBulkInsertStatePin(bistate);
+				prevResultRelInfo = resultRelInfo;
+			}
+
+			/*
+			 * If we're capturing transition tuples, we might need to convert
+			 * from the partition rowtype to root rowtype. But if there are no
+			 * BEFORE triggers on the partition that could change the tuple,
+			 * we can just remember the original unconverted tuple to avoid a
+			 * needless round trip conversion.
+			 */
+			if (cstate->transition_capture != NULL)
+				cstate->transition_capture->tcs_original_insert_tuple =
+					!has_before_insert_row_trig ? myslot : NULL;
+
+			/*
+			 * We might need to convert from the root rowtype to the partition
+			 * rowtype.
+			 */
+			map = resultRelInfo->ri_RootToPartitionMap;
+			if (insertMethod == CIM_SINGLE || !leafpart_use_multi_insert)
+			{
+				/* non batch insert */
+				if (map != NULL)
+				{
+					TupleTableSlot *new_slot;
+
+					new_slot = resultRelInfo->ri_PartitionTupleSlot;
+					myslot = execute_attr_map_slot(map->attrMap, myslot, new_slot);
+				}
+			}
+			else
+			{
+				/*
+				 * Prepare to queue up tuple for later batch insert into
+				 * current partition.
+				 */
+				TupleTableSlot *batchslot;
+
+				/* no other path available for partitioned table */
+				Assert(insertMethod == CIM_MULTI_CONDITIONAL);
+
+				batchslot = CopyMultiInsertInfoNextFreeSlot(&multiInsertInfo,
+															resultRelInfo);
+
+				if (map != NULL)
+					myslot = execute_attr_map_slot(map->attrMap, myslot,
+												   batchslot);
+				else
+				{
+					/*
+					 * This looks more expensive than it is (Believe me, I
+					 * optimized it away. Twice.). The input is in virtual
+					 * form, and we'll materialize the slot below - for most
+					 * slot types the copy performs the work materialization
+					 * would later require anyway.
+					 */
+					ExecCopySlot(batchslot, myslot);
+					myslot = batchslot;
+				}
+			}
+
+			/* ensure that triggers etc see the right relation  */
+			myslot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+		}
+
+		skip_tuple = false;
+
+		/* BEFORE ROW INSERT Triggers */
+		if (has_before_insert_row_trig)
+		{
+			if (!ExecBRInsertTriggers(estate, resultRelInfo, myslot))
+				skip_tuple = true;	/* "do nothing" */
+		}
+
+		if (!skip_tuple)
+		{
+			/*
+			 * If there is an INSTEAD OF INSERT ROW trigger, let it handle the
+			 * tuple.  Otherwise, proceed with inserting the tuple into the
+			 * table or foreign table.
+			 */
+			if (has_instead_insert_row_trig)
+			{
+				ExecIRInsertTriggers(estate, resultRelInfo, myslot);
+			}
+			else
+			{
+				/* Compute stored generated columns */
+				if (resultRelInfo->ri_RelationDesc->rd_att->constr &&
+					resultRelInfo->ri_RelationDesc->rd_att->constr->has_generated_stored)
+					ExecComputeStoredGenerated(resultRelInfo, estate, myslot,
+											   CMD_INSERT);
+
+				/*
+				 * If the target is a plain table, check the constraints of
+				 * the tuple.
+				 */
+				if (resultRelInfo->ri_FdwRoutine == NULL &&
+					resultRelInfo->ri_RelationDesc->rd_att->constr)
+					ExecConstraints(resultRelInfo, myslot, estate);
+
+				/*
+				 * Also check the tuple against the partition constraint, if
+				 * there is one; except that if we got here via tuple-routing,
+				 * we don't need to if there's no BR trigger defined on the
+				 * partition.
+				 */
+				if (resultRelInfo->ri_RelationDesc->rd_rel->relispartition &&
+					(proute == NULL || has_before_insert_row_trig))
+					ExecPartitionCheck(resultRelInfo, myslot, estate, true);
+
+				/* Store the slot in the multi-insert buffer, when enabled. */
+				if (insertMethod == CIM_MULTI || leafpart_use_multi_insert)
+				{
+					/*
+					 * The slot previously might point into the per-tuple
+					 * context. For batching it needs to be longer lived.
+					 */
+					ExecMaterializeSlot(myslot);
+
+					/* Add this tuple to the tuple buffer */
+					CopyMultiInsertInfoStore(&multiInsertInfo,
+											 resultRelInfo, myslot,
+											 cstate->line_buf.len,
+											 cstate->cur_lineno);
+
+					/*
+					 * If enough inserts have queued up, then flush all
+					 * buffers out to their tables.
+					 */
+					if (CopyMultiInsertInfoIsFull(&multiInsertInfo))
+						CopyMultiInsertInfoFlush(&multiInsertInfo, resultRelInfo);
+				}
+				else
+				{
+					List	   *recheckIndexes = NIL;
+
+					/* OK, store the tuple */
+					if (resultRelInfo->ri_FdwRoutine != NULL)
+					{
+						myslot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
+																				 resultRelInfo,
+																				 myslot,
+																				 NULL);
+
+						if (myslot == NULL) /* "do nothing" */
+							continue;	/* next tuple please */
+
+						/*
+						 * AFTER ROW Triggers might reference the tableoid
+						 * column, so (re-)initialize tts_tableOid before
+						 * evaluating them.
+						 */
+						myslot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+					}
+					else
+					{
+						/* OK, store the tuple and create index entries for it */
+						table_tuple_insert(resultRelInfo->ri_RelationDesc,
+										   myslot, mycid, ti_options, bistate);
+
+						if (resultRelInfo->ri_NumIndices > 0)
+							recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
+																   myslot,
+																   estate,
+																   false,
+																   false,
+																   NULL,
+																   NIL);
+					}
+
+					/* AFTER ROW INSERT Triggers */
+					ExecARInsertTriggers(estate, resultRelInfo, myslot,
+										 recheckIndexes, cstate->transition_capture);
+
+					list_free(recheckIndexes);
+				}
+			}
+
+			/*
+			 * We count only tuples not suppressed by a BEFORE INSERT trigger
+			 * or FDW; this is the same definition used by nodeModifyTable.c
+			 * for counting tuples inserted by an INSERT command.  Update
+			 * progress of the COPY command as well.
+			 */
+			pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
+										 ++processed);
+		}
+	}
+
+	/* Flush any remaining buffered tuples */
+	if (insertMethod != CIM_SINGLE)
+	{
+		if (!CopyMultiInsertInfoIsEmpty(&multiInsertInfo))
+			CopyMultiInsertInfoFlush(&multiInsertInfo, NULL);
+	}
+
+	/* Done, clean up */
+	error_context_stack = errcallback.previous;
+
+	if (bistate != NULL)
+		FreeBulkInsertState(bistate);
+
+	MemoryContextSwitchTo(oldcontext);
+
+	/* Execute AFTER STATEMENT insertion triggers */
+	ExecASInsertTriggers(estate, target_resultRelInfo, cstate->transition_capture);
+
+	/* Handle queued AFTER triggers */
+	AfterTriggerEndQuery(estate);
+
+	ExecResetTupleTable(estate->es_tupleTable, false);
+
+	/* Allow the FDW to shut down */
+	if (target_resultRelInfo->ri_FdwRoutine != NULL &&
+		target_resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
+		target_resultRelInfo->ri_FdwRoutine->EndForeignInsert(estate,
+															  target_resultRelInfo);
+
+	/* Tear down the multi-insert buffer data */
+	if (insertMethod != CIM_SINGLE)
+		CopyMultiInsertInfoCleanup(&multiInsertInfo);
+
+	/* Close all the partitioned tables, leaf partitions, and their indices */
+	if (proute)
+		ExecCleanupTupleRouting(mtstate, proute);
+
+	/* Close the result relations, including any trigger target relations */
+	ExecCloseResultRelations(estate);
+	ExecCloseRangeTableRelations(estate);
+
+	FreeExecutorState(estate);
+
+	return processed;
+}
+
+/*
+ * Setup to read tuples from a file for COPY FROM.
+ *
+ * 'rel': Used as a template for the tuples
+ * 'whereClause': WHERE clause from the COPY FROM command
+ * 'filename': Name of server-local file to read, NULL for STDIN
+ * 'is_program': true if 'filename' is program to execute
+ * 'data_source_cb': callback that provides the input data
+ * 'attnamelist': List of char *, columns to include. NIL selects all cols.
+ * 'options': List of DefElem. See copy_opt_item in gram.y for selections.
+ *
+ * Returns a CopyFromState, to be passed to NextCopyFrom and related functions.
+ */
+CopyFromState
+BeginCopyFrom(ParseState *pstate,
+			  Relation rel,
+			  Node *whereClause,
+			  const char *filename,
+			  bool is_program,
+			  copy_data_source_cb data_source_cb,
+			  List *attnamelist,
+			  List *options)
+{
+	CopyFromState cstate;
+	bool		pipe = (filename == NULL);
+	TupleDesc	tupDesc;
+	AttrNumber	num_phys_attrs,
+				num_defaults;
+	FmgrInfo   *in_functions;
+	Oid		   *typioparams;
+	int			attnum;
+	Oid			in_func_oid;
+	int		   *defmap;
+	ExprState **defexprs;
+	MemoryContext oldcontext;
+	bool		volatile_defexprs;
+	const int	progress_cols[] = {
+		PROGRESS_COPY_COMMAND,
+		PROGRESS_COPY_TYPE,
+		PROGRESS_COPY_BYTES_TOTAL
+	};
+	int64		progress_vals[] = {
+		PROGRESS_COPY_COMMAND_FROM,
+		0,
+		0
+	};
+
+	/* Allocate workspace and zero all fields */
+	cstate = (CopyFromStateData *) palloc0(sizeof(CopyFromStateData));
+
+	/*
+	 * We allocate everything used by a cstate in a new memory context. This
+	 * avoids memory leaks during repeated use of COPY in a query.
+	 */
+	cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
+												"COPY",
+												ALLOCSET_DEFAULT_SIZES);
+
+	oldcontext = MemoryContextSwitchTo(cstate->copycontext);
+
+	/* Extract options from the statement node tree */
+	ProcessCopyOptions(pstate, &cstate->opts, true /* is_from */ , options);
+
+	/* Process the target relation */
+	cstate->rel = rel;
+
+	tupDesc = RelationGetDescr(cstate->rel);
+
+	/* process common options or initialization */
+
+	/* Generate or convert list of attributes to process */
+	cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
+
+	num_phys_attrs = tupDesc->natts;
+
+	/* Convert FORCE_NOT_NULL name list to per-column flags, check validity */
+	cstate->opts.force_notnull_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+	if (cstate->opts.force_notnull)
+	{
+		List	   *attnums;
+		ListCell   *cur;
+
+		attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_notnull);
+
+		foreach(cur, attnums)
+		{
+			int			attnum = lfirst_int(cur);
+			Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+			if (!list_member_int(cstate->attnumlist, attnum))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+						 errmsg("FORCE_NOT_NULL column \"%s\" not referenced by COPY",
+								NameStr(attr->attname))));
+			cstate->opts.force_notnull_flags[attnum - 1] = true;
+		}
+	}
+
+	/* Convert FORCE_NULL name list to per-column flags, check validity */
+	cstate->opts.force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+	if (cstate->opts.force_null)
+	{
+		List	   *attnums;
+		ListCell   *cur;
+
+		attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_null);
+
+		foreach(cur, attnums)
+		{
+			int			attnum = lfirst_int(cur);
+			Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+			if (!list_member_int(cstate->attnumlist, attnum))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+						 errmsg("FORCE_NULL column \"%s\" not referenced by COPY",
+								NameStr(attr->attname))));
+			cstate->opts.force_null_flags[attnum - 1] = true;
+		}
+	}
+
+	/* Convert convert_selectively name list to per-column flags */
+	if (cstate->opts.convert_selectively)
+	{
+		List	   *attnums;
+		ListCell   *cur;
+
+		cstate->convert_select_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+
+		attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.convert_select);
+
+		foreach(cur, attnums)
+		{
+			int			attnum = lfirst_int(cur);
+			Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+			if (!list_member_int(cstate->attnumlist, attnum))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+						 errmsg_internal("selected column \"%s\" not referenced by COPY",
+										 NameStr(attr->attname))));
+			cstate->convert_select_flags[attnum - 1] = true;
+		}
+	}
+
+	/* Use client encoding when ENCODING option is not specified. */
+	if (cstate->opts.file_encoding < 0)
+		cstate->file_encoding = pg_get_client_encoding();
+	else
+		cstate->file_encoding = cstate->opts.file_encoding;
+
+	/*
+	 * Look up encoding conversion function.
+	 */
+	if (cstate->file_encoding == GetDatabaseEncoding() ||
+		cstate->file_encoding == PG_SQL_ASCII ||
+		GetDatabaseEncoding() == PG_SQL_ASCII)
+	{
+		cstate->need_transcoding = false;
+	}
+	else
+	{
+		cstate->need_transcoding = true;
+		cstate->conversion_proc = FindDefaultConversionProc(cstate->file_encoding,
+															GetDatabaseEncoding());
+		if (!OidIsValid(cstate->conversion_proc))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_FUNCTION),
+					 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
+							pg_encoding_to_char(cstate->file_encoding),
+							pg_encoding_to_char(GetDatabaseEncoding()))));
+	}
+
+	cstate->copy_src = COPY_FILE;	/* default */
+
+	cstate->whereClause = whereClause;
+
+	/* Initialize state variables */
+	cstate->eol_type = EOL_UNKNOWN;
+	cstate->cur_relname = RelationGetRelationName(cstate->rel);
+	cstate->cur_lineno = 0;
+	cstate->cur_attname = NULL;
+	cstate->cur_attval = NULL;
+
+	/*
+	 * Allocate buffers for the input pipeline.
+	 *
+	 * attribute_buf and raw_buf are used in both text and binary modes, but
+	 * input_buf and line_buf only in text mode.
+	 */
+	cstate->raw_buf = palloc(RAW_BUF_SIZE + 1);
+	cstate->raw_buf_index = cstate->raw_buf_len = 0;
+	cstate->raw_reached_eof = false;
+
+	if (!cstate->opts.binary)
+	{
+		/*
+		 * If encoding conversion is needed, we need another buffer to hold
+		 * the converted input data.  Otherwise, we can just point input_buf
+		 * to the same buffer as raw_buf.
+		 */
+		if (cstate->need_transcoding)
+		{
+			cstate->input_buf = (char *) palloc(INPUT_BUF_SIZE + 1);
+			cstate->input_buf_index = cstate->input_buf_len = 0;
+		}
+		else
+			cstate->input_buf = cstate->raw_buf;
+		cstate->input_reached_eof = false;
+
+		initStringInfo(&cstate->line_buf);
+	}
+
+	initStringInfo(&cstate->attribute_buf);
+
+	/* Assign range table, we'll need it in CopyFrom. */
+	if (pstate)
+		cstate->range_table = pstate->p_rtable;
+
+	tupDesc = RelationGetDescr(cstate->rel);
+	num_phys_attrs = tupDesc->natts;
+	num_defaults = 0;
+	volatile_defexprs = false;
+
+	/*
+	 * Pick up the required catalog information for each attribute in the
+	 * relation, including the input function, the element type (to pass to
+	 * the input function), and info about defaults and constraints. (Which
+	 * input function we use depends on text/binary format choice.)
+	 */
+	in_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
+	typioparams = (Oid *) palloc(num_phys_attrs * sizeof(Oid));
+	defmap = (int *) palloc(num_phys_attrs * sizeof(int));
+	defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
+
+	for (attnum = 1; attnum <= num_phys_attrs; attnum++)
+	{
+		Form_pg_attribute att = TupleDescAttr(tupDesc, attnum - 1);
+
+		/* We don't need info for dropped attributes */
+		if (att->attisdropped)
+			continue;
+
+		/* Fetch the input function and typioparam info */
+		if (cstate->opts.binary)
+			getTypeBinaryInputInfo(att->atttypid,
+								   &in_func_oid, &typioparams[attnum - 1]);
+		else
+			getTypeInputInfo(att->atttypid,
+							 &in_func_oid, &typioparams[attnum - 1]);
+		fmgr_info(in_func_oid, &in_functions[attnum - 1]);
+
+		/* Get default info if needed */
+		if (!list_member_int(cstate->attnumlist, attnum) && !att->attgenerated)
+		{
+			/* attribute is NOT to be copied from input */
+			/* use default value if one exists */
+			Expr	   *defexpr = (Expr *) build_column_default(cstate->rel,
+																attnum);
+
+			if (defexpr != NULL)
+			{
+				/* Run the expression through planner */
+				defexpr = expression_planner(defexpr);
+
+				/* Initialize executable expression in copycontext */
+				defexprs[num_defaults] = ExecInitExpr(defexpr, NULL);
+				defmap[num_defaults] = attnum - 1;
+				num_defaults++;
+
+				/*
+				 * If a default expression looks at the table being loaded,
+				 * then it could give the wrong answer when using
+				 * multi-insert. Since database access can be dynamic this is
+				 * hard to test for exactly, so we use the much wider test of
+				 * whether the default expression is volatile. We allow for
+				 * the special case of when the default expression is the
+				 * nextval() of a sequence which in this specific case is
+				 * known to be safe for use with the multi-insert
+				 * optimization. Hence we use this special case function
+				 * checker rather than the standard check for
+				 * contain_volatile_functions().
+				 */
+				if (!volatile_defexprs)
+					volatile_defexprs = contain_volatile_functions_not_nextval((Node *) defexpr);
+			}
+		}
+	}
+
+
+	/* initialize progress */
+	pgstat_progress_start_command(PROGRESS_COMMAND_COPY,
+								  cstate->rel ? RelationGetRelid(cstate->rel) : InvalidOid);
+	cstate->bytes_processed = 0;
+
+	/* We keep those variables in cstate. */
+	cstate->in_functions = in_functions;
+	cstate->typioparams = typioparams;
+	cstate->defmap = defmap;
+	cstate->defexprs = defexprs;
+	cstate->volatile_defexprs = volatile_defexprs;
+	cstate->num_defaults = num_defaults;
+	cstate->is_program = is_program;
+
+	if (data_source_cb)
+	{
+		progress_vals[1] = PROGRESS_COPY_TYPE_CALLBACK;
+		cstate->copy_src = COPY_CALLBACK;
+		cstate->data_source_cb = data_source_cb;
+	}
+	else if (pipe)
+	{
+		progress_vals[1] = PROGRESS_COPY_TYPE_PIPE;
+		Assert(!is_program);	/* the grammar does not allow this */
+		if (whereToSendOutput == DestRemote)
+			ReceiveCopyBegin(cstate);
+		else
+			cstate->copy_file = stdin;
+	}
+	else
+	{
+		cstate->filename = pstrdup(filename);
+
+		if (cstate->is_program)
+		{
+			progress_vals[1] = PROGRESS_COPY_TYPE_PROGRAM;
+			cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_R);
+			if (cstate->copy_file == NULL)
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not execute command \"%s\": %m",
+								cstate->filename)));
+		}
+		else
+		{
+			struct stat st;
+
+			progress_vals[1] = PROGRESS_COPY_TYPE_FILE;
+			cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_R);
+			if (cstate->copy_file == NULL)
+			{
+				/* copy errno because ereport subfunctions might change it */
+				int			save_errno = errno;
+
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not open file \"%s\" for reading: %m",
+								cstate->filename),
+						 (save_errno == ENOENT || save_errno == EACCES) ?
+						 errhint("COPY FROM instructs the PostgreSQL server process to read a file. "
+								 "You may want a client-side facility such as psql's \\copy.") : 0));
+			}
+
+			if (fstat(fileno(cstate->copy_file), &st))
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not stat file \"%s\": %m",
+								cstate->filename)));
+
+			if (S_ISDIR(st.st_mode))
+				ereport(ERROR,
+						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+						 errmsg("\"%s\" is a directory", cstate->filename)));
+
+			progress_vals[2] = st.st_size;
+		}
+	}
+
+	pgstat_progress_update_multi_param(3, progress_cols, progress_vals);
+
+	if (cstate->opts.binary)
+	{
+		/* Read and verify binary header */
+		ReceiveCopyBinaryHeader(cstate);
+	}
+
+	/* create workspace for CopyReadAttributes results */
+	if (!cstate->opts.binary)
+	{
+		AttrNumber	attr_count = list_length(cstate->attnumlist);
+
+		cstate->max_fields = attr_count;
+		cstate->raw_fields = (char **) palloc(attr_count * sizeof(char *));
+	}
+
+	MemoryContextSwitchTo(oldcontext);
+
+	return cstate;
+}
+
+/*
+ * Clean up storage and release resources for COPY FROM.
+ */
+void
+EndCopyFrom(CopyFromState cstate)
+{
+	/* No COPY FROM related resources except memory. */
+	if (cstate->is_program)
+	{
+		ClosePipeFromProgram(cstate);
+	}
+	else
+	{
+		if (cstate->filename != NULL && FreeFile(cstate->copy_file))
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not close file \"%s\": %m",
+							cstate->filename)));
+	}
+
+	pgstat_progress_end_command();
+
+	MemoryContextDelete(cstate->copycontext);
+	pfree(cstate);
+}
+
+/*
+ * Closes the pipe from an external program, checking the pclose() return code.
+ */
+static void
+ClosePipeFromProgram(CopyFromState cstate)
+{
+	int			pclose_rc;
+
+	Assert(cstate->is_program);
+
+	pclose_rc = ClosePipeStream(cstate->copy_file);
+	if (pclose_rc == -1)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not close pipe to external command: %m")));
+	else if (pclose_rc != 0)
+	{
+		/*
+		 * If we ended a COPY FROM PROGRAM before reaching EOF, then it's
+		 * expectable for the called program to fail with SIGPIPE, and we
+		 * should not report that as an error.  Otherwise, SIGPIPE indicates a
+		 * problem.
+		 */
+		if (!cstate->raw_reached_eof &&
+			wait_result_is_signal(pclose_rc, SIGPIPE))
+			return;
+
+		ereport(ERROR,
+				(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
+				 errmsg("program \"%s\" failed",
+						cstate->filename),
+				 errdetail_internal("%s", wait_result_to_str(pclose_rc))));
+	}
+}
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
new file mode 100644
index 0000000..097414e
--- /dev/null
+++ b/src/backend/commands/copyfromparse.c
@@ -0,0 +1,1921 @@
+/*-------------------------------------------------------------------------
+ *
+ * copyfromparse.c
+ *		Parse CSV/text/binary format for COPY FROM.
+ *
+ * This file contains routines to parse the text, CSV and binary input
+ * formats.  The main entry point is NextCopyFrom(), which parses the
+ * next input line and returns it as Datums.
+ *
+ * In text/CSV mode, the parsing happens in multiple stages:
+ *
+ * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf
+ *                1.          2.            3.           4.
+ *
+ * 1. CopyLoadRawBuf() reads raw data from the input file or client, and
+ *    places it into 'raw_buf'.
+ *
+ * 2. CopyConvertBuf() calls the encoding conversion function to convert
+ *    the data in 'raw_buf' from client to server encoding, placing the
+ *    converted result in 'input_buf'.
+ *
+ * 3. CopyReadLine() parses the data in 'input_buf', one line at a time.
+ *    It is responsible for finding the next newline marker, taking quote and
+ *    escape characters into account according to the COPY options.  The line
+ *    is copied into 'line_buf', with quotes and escape characters still
+ *    intact.
+ *
+ * 4. CopyReadAttributesText/CSV() function takes the input line from
+ *    'line_buf', and splits it into fields, unescaping the data as required.
+ *    The fields are stored in 'attribute_buf', and 'raw_fields' array holds
+ *    pointers to each field.
+ *
+ * If encoding conversion is not required, a shortcut is taken in step 2 to
+ * avoid copying the data unnecessarily.  The 'input_buf' pointer is set to
+ * point directly to 'raw_buf', so that CopyLoadRawBuf() loads the raw data
+ * directly into 'input_buf'.  CopyConvertBuf() then merely validates that
+ * the data is valid in the current encoding.
+ *
+ * In binary mode, the pipeline is much simpler.  Input is loaded into
+ * 'raw_buf', and encoding conversion is done in the datatype-specific
+ * receive functions, if required.  'input_buf' and 'line_buf' are not used,
+ * but 'attribute_buf' is used as a temporary buffer to hold one attribute's
+ * data when it's passed the receive function.
+ *
+ * 'raw_buf' is always 64 kB in size (RAW_BUF_SIZE).  'input_buf' is also
+ * 64 kB (INPUT_BUF_SIZE), if encoding conversion is required.  'line_buf'
+ * and 'attribute_buf' are expanded on demand, to hold the longest line
+ * encountered so far.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/copyfromparse.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "commands/copy.h"
+#include "commands/copyfrom_internal.h"
+#include "commands/progress.h"
+#include "executor/executor.h"
+#include "libpq/libpq.h"
+#include "libpq/pqformat.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "port/pg_bswap.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+#define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
+#define OCTVALUE(c) ((c) - '0')
+
+/*
+ * These macros centralize code used to process line_buf and input_buf buffers.
+ * They are macros because they often do continue/break control and to avoid
+ * function call overhead in tight COPY loops.
+ *
+ * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
+ * prevent the continue/break processing from working.  We end the "if (1)"
+ * with "else ((void) 0)" to ensure the "if" does not unintentionally match
+ * any "else" in the calling code, and to avoid any compiler warnings about
+ * empty statements.  See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
+ */
+
+/*
+ * This keeps the character read at the top of the loop in the buffer
+ * even if there is more than one read-ahead.
+ */
+#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
+if (1) \
+{ \
+	if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
+	{ \
+		input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
+		need_data = true; \
+		continue; \
+	} \
+} else ((void) 0)
+
+/* This consumes the remainder of the buffer and breaks */
+#define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
+if (1) \
+{ \
+	if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
+	{ \
+		if (extralen) \
+			input_buf_ptr = copy_buf_len; /* consume the partial character */ \
+		/* backslash just before EOF, treat as data char */ \
+		result = true; \
+		break; \
+	} \
+} else ((void) 0)
+
+/*
+ * Transfer any approved data to line_buf; must do this to be sure
+ * there is some room in input_buf.
+ */
+#define REFILL_LINEBUF \
+if (1) \
+{ \
+	if (input_buf_ptr > cstate->input_buf_index) \
+	{ \
+		appendBinaryStringInfo(&cstate->line_buf, \
+							 cstate->input_buf + cstate->input_buf_index, \
+							   input_buf_ptr - cstate->input_buf_index); \
+		cstate->input_buf_index = input_buf_ptr; \
+	} \
+} else ((void) 0)
+
+/* Undo any read-ahead and jump out of the block. */
+#define NO_END_OF_COPY_GOTO \
+if (1) \
+{ \
+	input_buf_ptr = prev_raw_ptr + 1; \
+	goto not_end_of_copy; \
+} else ((void) 0)
+
+/* NOTE: there's a copy of this in copyto.c */
+static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
+
+
+/* non-export function prototypes */
+static bool CopyReadLine(CopyFromState cstate);
+static bool CopyReadLineText(CopyFromState cstate);
+static int	CopyReadAttributesText(CopyFromState cstate);
+static int	CopyReadAttributesCSV(CopyFromState cstate);
+static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
+									 Oid typioparam, int32 typmod,
+									 bool *isnull);
+
+
+/* Low-level communications functions */
+static int	CopyGetData(CopyFromState cstate, void *databuf,
+						int minread, int maxread);
+static inline bool CopyGetInt32(CopyFromState cstate, int32 *val);
+static inline bool CopyGetInt16(CopyFromState cstate, int16 *val);
+static void CopyLoadInputBuf(CopyFromState cstate);
+static int	CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes);
+
+void
+ReceiveCopyBegin(CopyFromState cstate)
+{
+	StringInfoData buf;
+	int			natts = list_length(cstate->attnumlist);
+	int16		format = (cstate->opts.binary ? 1 : 0);
+	int			i;
+
+	pq_beginmessage(&buf, 'G');
+	pq_sendbyte(&buf, format);	/* overall format */
+	pq_sendint16(&buf, natts);
+	for (i = 0; i < natts; i++)
+		pq_sendint16(&buf, format); /* per-column formats */
+	pq_endmessage(&buf);
+	cstate->copy_src = COPY_FRONTEND;
+	cstate->fe_msgbuf = makeStringInfo();
+	/* We *must* flush here to ensure FE knows it can send. */
+	pq_flush();
+}
+
+void
+ReceiveCopyBinaryHeader(CopyFromState cstate)
+{
+	char		readSig[11];
+	int32		tmp;
+
+	/* Signature */
+	if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
+		memcmp(readSig, BinarySignature, 11) != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+				 errmsg("COPY file signature not recognized")));
+	/* Flags field */
+	if (!CopyGetInt32(cstate, &tmp))
+		ereport(ERROR,
+				(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+				 errmsg("invalid COPY file header (missing flags)")));
+	if ((tmp & (1 << 16)) != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+				 errmsg("invalid COPY file header (WITH OIDS)")));
+	tmp &= ~(1 << 16);
+	if ((tmp >> 16) != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+				 errmsg("unrecognized critical flags in COPY file header")));
+	/* Header extension length */
+	if (!CopyGetInt32(cstate, &tmp) ||
+		tmp < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+				 errmsg("invalid COPY file header (missing length)")));
+	/* Skip extension header, if present */
+	while (tmp-- > 0)
+	{
+		if (CopyReadBinaryData(cstate, readSig, 1) != 1)
+			ereport(ERROR,
+					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+					 errmsg("invalid COPY file header (wrong length)")));
+	}
+}
+
+/*
+ * CopyGetData reads data from the source (file or frontend)
+ *
+ * We attempt to read at least minread, and at most maxread, bytes from
+ * the source.  The actual number of bytes read is returned; if this is
+ * less than minread, EOF was detected.
+ *
+ * Note: when copying from the frontend, we expect a proper EOF mark per
+ * protocol; if the frontend simply drops the connection, we raise error.
+ * It seems unwise to allow the COPY IN to complete normally in that case.
+ *
+ * NB: no data conversion is applied here.
+ */
+static int
+CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
+{
+	int			bytesread = 0;
+
+	switch (cstate->copy_src)
+	{
+		case COPY_FILE:
+			bytesread = fread(databuf, 1, maxread, cstate->copy_file);
+			if (ferror(cstate->copy_file))
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not read from COPY file: %m")));
+			if (bytesread == 0)
+				cstate->raw_reached_eof = true;
+			break;
+		case COPY_FRONTEND:
+			while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
+			{
+				int			avail;
+
+				while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
+				{
+					/* Try to receive another message */
+					int			mtype;
+					int			maxmsglen;
+
+			readmessage:
+					HOLD_CANCEL_INTERRUPTS();
+					pq_startmsgread();
+					mtype = pq_getbyte();
+					if (mtype == EOF)
+						ereport(ERROR,
+								(errcode(ERRCODE_CONNECTION_FAILURE),
+								 errmsg("unexpected EOF on client connection with an open transaction")));
+					/* Validate message type and set packet size limit */
+					switch (mtype)
+					{
+						case 'd':	/* CopyData */
+							maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
+							break;
+						case 'c':	/* CopyDone */
+						case 'f':	/* CopyFail */
+						case 'H':	/* Flush */
+						case 'S':	/* Sync */
+							maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
+							break;
+						default:
+							ereport(ERROR,
+									(errcode(ERRCODE_PROTOCOL_VIOLATION),
+									 errmsg("unexpected message type 0x%02X during COPY from stdin",
+											mtype)));
+							maxmsglen = 0;	/* keep compiler quiet */
+							break;
+					}
+					/* Now collect the message body */
+					if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
+						ereport(ERROR,
+								(errcode(ERRCODE_CONNECTION_FAILURE),
+								 errmsg("unexpected EOF on client connection with an open transaction")));
+					RESUME_CANCEL_INTERRUPTS();
+					/* ... and process it */
+					switch (mtype)
+					{
+						case 'd':	/* CopyData */
+							break;
+						case 'c':	/* CopyDone */
+							/* COPY IN correctly terminated by frontend */
+							cstate->raw_reached_eof = true;
+							return bytesread;
+						case 'f':	/* CopyFail */
+							ereport(ERROR,
+									(errcode(ERRCODE_QUERY_CANCELED),
+									 errmsg("COPY from stdin failed: %s",
+											pq_getmsgstring(cstate->fe_msgbuf))));
+							break;
+						case 'H':	/* Flush */
+						case 'S':	/* Sync */
+
+							/*
+							 * Ignore Flush/Sync for the convenience of client
+							 * libraries (such as libpq) that may send those
+							 * without noticing that the command they just
+							 * sent was COPY.
+							 */
+							goto readmessage;
+						default:
+							Assert(false);	/* NOT REACHED */
+					}
+				}
+				avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
+				if (avail > maxread)
+					avail = maxread;
+				pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
+				databuf = (void *) ((char *) databuf + avail);
+				maxread -= avail;
+				bytesread += avail;
+			}
+			break;
+		case COPY_CALLBACK:
+			bytesread = cstate->data_source_cb(databuf, minread, maxread);
+			break;
+	}
+
+	return bytesread;
+}
+
+
+/*
+ * These functions do apply some data conversion
+ */
+
+/*
+ * CopyGetInt32 reads an int32 that appears in network byte order
+ *
+ * Returns true if OK, false if EOF
+ */
+static inline bool
+CopyGetInt32(CopyFromState cstate, int32 *val)
+{
+	uint32		buf;
+
+	if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
+	{
+		*val = 0;				/* suppress compiler warning */
+		return false;
+	}
+	*val = (int32) pg_ntoh32(buf);
+	return true;
+}
+
+/*
+ * CopyGetInt16 reads an int16 that appears in network byte order
+ */
+static inline bool
+CopyGetInt16(CopyFromState cstate, int16 *val)
+{
+	uint16		buf;
+
+	if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
+	{
+		*val = 0;				/* suppress compiler warning */
+		return false;
+	}
+	*val = (int16) pg_ntoh16(buf);
+	return true;
+}
+
+
+/*
+ * Perform encoding conversion on data in 'raw_buf', writing the converted
+ * data into 'input_buf'.
+ *
+ * On entry, there must be some data to convert in 'raw_buf'.
+ */
+static void
+CopyConvertBuf(CopyFromState cstate)
+{
+	/*
+	 * If the file and server encoding are the same, no encoding conversion is
+	 * required.  However, we still need to verify that the input is valid for
+	 * the encoding.
+	 */
+	if (!cstate->need_transcoding)
+	{
+		/*
+		 * When conversion is not required, input_buf and raw_buf are the
+		 * same.  raw_buf_len is the total number of bytes in the buffer, and
+		 * input_buf_len tracks how many of those bytes have already been
+		 * verified.
+		 */
+		int			preverifiedlen = cstate->input_buf_len;
+		int			unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
+		int			nverified;
+
+		if (unverifiedlen == 0)
+		{
+			/*
+			 * If no more raw data is coming, report the EOF to the caller.
+			 */
+			if (cstate->raw_reached_eof)
+				cstate->input_reached_eof = true;
+			return;
+		}
+
+		/*
+		 * Verify the new data, including any residual unverified bytes from
+		 * previous round.
+		 */
+		nverified = pg_encoding_verifymbstr(cstate->file_encoding,
+											cstate->raw_buf + preverifiedlen,
+											unverifiedlen);
+		if (nverified == 0)
+		{
+			/*
+			 * Could not verify anything.
+			 *
+			 * If there is no more raw input data coming, it means that there
+			 * was an incomplete multi-byte sequence at the end.  Also, if
+			 * there's "enough" input left, we should be able to verify at
+			 * least one character, and a failure to do so means that we've
+			 * hit an invalid byte sequence.
+			 */
+			if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding))
+				cstate->input_reached_error = true;
+			return;
+		}
+		cstate->input_buf_len += nverified;
+	}
+	else
+	{
+		/*
+		 * Encoding conversion is needed.
+		 */
+		int			nbytes;
+		unsigned char *src;
+		int			srclen;
+		unsigned char *dst;
+		int			dstlen;
+		int			convertedlen;
+
+		if (RAW_BUF_BYTES(cstate) == 0)
+		{
+			/*
+			 * If no more raw data is coming, report the EOF to the caller.
+			 */
+			if (cstate->raw_reached_eof)
+				cstate->input_reached_eof = true;
+			return;
+		}
+
+		/*
+		 * First, copy down any unprocessed data.
+		 */
+		nbytes = INPUT_BUF_BYTES(cstate);
+		if (nbytes > 0 && cstate->input_buf_index > 0)
+			memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
+					nbytes);
+		cstate->input_buf_index = 0;
+		cstate->input_buf_len = nbytes;
+		cstate->input_buf[nbytes] = '\0';
+
+		src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
+		srclen = cstate->raw_buf_len - cstate->raw_buf_index;
+		dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
+		dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
+
+		/*
+		 * Do the conversion.  This might stop short, if there is an invalid
+		 * byte sequence in the input.  We'll convert as much as we can in
+		 * that case.
+		 *
+		 * Note: Even if we hit an invalid byte sequence, we don't report the
+		 * error until all the valid bytes have been consumed.  The input
+		 * might contain an end-of-input marker (\.), and we don't want to
+		 * report an error if the invalid byte sequence is after the
+		 * end-of-input marker.  We might unnecessarily convert some data
+		 * after the end-of-input marker as long as it's valid for the
+		 * encoding, but that's harmless.
+		 */
+		convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
+													 cstate->file_encoding,
+													 GetDatabaseEncoding(),
+													 src, srclen,
+													 dst, dstlen,
+													 true);
+		if (convertedlen == 0)
+		{
+			/*
+			 * Could not convert anything.  If there is no more raw input data
+			 * coming, it means that there was an incomplete multi-byte
+			 * sequence at the end.  Also, if there is plenty of input left,
+			 * we should be able to convert at least one character, so a
+			 * failure to do so must mean that we've hit a byte sequence
+			 * that's invalid.
+			 */
+			if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
+				cstate->input_reached_error = true;
+			return;
+		}
+		cstate->raw_buf_index += convertedlen;
+		cstate->input_buf_len += strlen((char *) dst);
+	}
+}
+
+/*
+ * Report an encoding or conversion error.
+ */
+static void
+CopyConversionError(CopyFromState cstate)
+{
+	Assert(cstate->raw_buf_len > 0);
+	Assert(cstate->input_reached_error);
+
+	if (!cstate->need_transcoding)
+	{
+		/*
+		 * Everything up to input_buf_len was successfully verified, and
+		 * input_buf_len points to the invalid or incomplete character.
+		 */
+		report_invalid_encoding(cstate->file_encoding,
+								cstate->raw_buf + cstate->input_buf_len,
+								cstate->raw_buf_len - cstate->input_buf_len);
+	}
+	else
+	{
+		/*
+		 * raw_buf_index points to the invalid or untranslatable character. We
+		 * let the conversion routine report the error, because it can provide
+		 * a more specific error message than we could here.  An earlier call
+		 * to the conversion routine in CopyConvertBuf() detected that there
+		 * is an error, now we call the conversion routine again with
+		 * noError=false, to have it throw the error.
+		 */
+		unsigned char *src;
+		int			srclen;
+		unsigned char *dst;
+		int			dstlen;
+
+		src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
+		srclen = cstate->raw_buf_len - cstate->raw_buf_index;
+		dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
+		dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
+
+		(void) pg_do_encoding_conversion_buf(cstate->conversion_proc,
+											 cstate->file_encoding,
+											 GetDatabaseEncoding(),
+											 src, srclen,
+											 dst, dstlen,
+											 false);
+
+		/*
+		 * The conversion routine should have reported an error, so this
+		 * should not be reached.
+		 */
+		elog(ERROR, "encoding conversion failed without error");
+	}
+}
+
+/*
+ * Load more data from data source to raw_buf.
+ *
+ * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the
+ * beginning of the buffer, and we load new data after that.
+ */
+static void
+CopyLoadRawBuf(CopyFromState cstate)
+{
+	int			nbytes;
+	int			inbytes;
+
+	/*
+	 * In text mode, if encoding conversion is not required, raw_buf and
+	 * input_buf point to the same buffer.  Their len/index better agree, too.
+	 */
+	if (cstate->raw_buf == cstate->input_buf)
+	{
+		Assert(!cstate->need_transcoding);
+		Assert(cstate->raw_buf_index == cstate->input_buf_index);
+		Assert(cstate->input_buf_len <= cstate->raw_buf_len);
+	}
+
+	/*
+	 * Copy down the unprocessed data if any.
+	 */
+	nbytes = RAW_BUF_BYTES(cstate);
+	if (nbytes > 0 && cstate->raw_buf_index > 0)
+		memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
+				nbytes);
+	cstate->raw_buf_len -= cstate->raw_buf_index;
+	cstate->raw_buf_index = 0;
+
+	/*
+	 * If raw_buf and input_buf are in fact the same buffer, adjust the
+	 * input_buf variables, too.
+	 */
+	if (cstate->raw_buf == cstate->input_buf)
+	{
+		cstate->input_buf_len -= cstate->input_buf_index;
+		cstate->input_buf_index = 0;
+	}
+
+	/* Load more data */
+	inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
+						  1, RAW_BUF_SIZE - cstate->raw_buf_len);
+	nbytes += inbytes;
+	cstate->raw_buf[nbytes] = '\0';
+	cstate->raw_buf_len = nbytes;
+
+	cstate->bytes_processed += inbytes;
+	pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
+
+	if (inbytes == 0)
+		cstate->raw_reached_eof = true;
+}
+
+/*
+ * CopyLoadInputBuf loads some more data into input_buf
+ *
+ * On return, at least one more input character is loaded into
+ * input_buf, or input_reached_eof is set.
+ *
+ * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
+ * of the buffer and then we load more data after that.
+ */
+static void
+CopyLoadInputBuf(CopyFromState cstate)
+{
+	int			nbytes = INPUT_BUF_BYTES(cstate);
+
+	/*
+	 * The caller has updated input_buf_index to indicate how much of the
+	 * input has been consumed and isn't needed anymore.  If input_buf is the
+	 * same physical area as raw_buf, update raw_buf_index accordingly.
+	 */
+	if (cstate->raw_buf == cstate->input_buf)
+	{
+		Assert(!cstate->need_transcoding);
+		Assert(cstate->input_buf_index >= cstate->raw_buf_index);
+		cstate->raw_buf_index = cstate->input_buf_index;
+	}
+
+	for (;;)
+	{
+		/* If we now have some unconverted data, try to convert it */
+		CopyConvertBuf(cstate);
+
+		/* If we now have some more input bytes ready, return them */
+		if (INPUT_BUF_BYTES(cstate) > nbytes)
+			return;
+
+		/*
+		 * If we reached an invalid byte sequence, or we're at an incomplete
+		 * multi-byte character but there is no more raw input data, report
+		 * conversion error.
+		 */
+		if (cstate->input_reached_error)
+			CopyConversionError(cstate);
+
+		/* no more input, and everything has been converted */
+		if (cstate->input_reached_eof)
+			break;
+
+		/* Try to load more raw data */
+		Assert(!cstate->raw_reached_eof);
+		CopyLoadRawBuf(cstate);
+	}
+}
+
+/*
+ * CopyReadBinaryData
+ *
+ * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
+ * and writes them to 'dest'.  Returns the number of bytes read (which
+ * would be less than 'nbytes' only if we reach EOF).
+ */
+static int
+CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
+{
+	int			copied_bytes = 0;
+
+	if (RAW_BUF_BYTES(cstate) >= nbytes)
+	{
+		/* Enough bytes are present in the buffer. */
+		memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
+		cstate->raw_buf_index += nbytes;
+		copied_bytes = nbytes;
+	}
+	else
+	{
+		/*
+		 * Not enough bytes in the buffer, so must read from the file.  Need
+		 * to loop since 'nbytes' could be larger than the buffer size.
+		 */
+		do
+		{
+			int			copy_bytes;
+
+			/* Load more data if buffer is empty. */
+			if (RAW_BUF_BYTES(cstate) == 0)
+			{
+				CopyLoadRawBuf(cstate);
+				if (cstate->raw_reached_eof)
+					break;		/* EOF */
+			}
+
+			/* Transfer some bytes. */
+			copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
+			memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
+			cstate->raw_buf_index += copy_bytes;
+			dest += copy_bytes;
+			copied_bytes += copy_bytes;
+		} while (copied_bytes < nbytes);
+	}
+
+	return copied_bytes;
+}
+
+/*
+ * Read raw fields in the next line for COPY FROM in text or csv mode.
+ * Return false if no more lines.
+ *
+ * An internal temporary buffer is returned via 'fields'. It is valid until
+ * the next call of the function. Since the function returns all raw fields
+ * in the input file, 'nfields' could be different from the number of columns
+ * in the relation.
+ *
+ * NOTE: force_not_null option are not applied to the returned fields.
+ */
+bool
+NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
+{
+	int			fldct;
+	bool		done;
+
+	/* only available for text or csv input */
+	Assert(!cstate->opts.binary);
+
+	/* on input check that the header line is correct if needed */
+	if (cstate->cur_lineno == 0 && cstate->opts.header_line)
+	{
+		ListCell   *cur;
+		TupleDesc	tupDesc;
+
+		tupDesc = RelationGetDescr(cstate->rel);
+
+		cstate->cur_lineno++;
+		done = CopyReadLine(cstate);
+
+		if (cstate->opts.header_line == COPY_HEADER_MATCH)
+		{
+			int			fldnum;
+
+			if (cstate->opts.csv_mode)
+				fldct = CopyReadAttributesCSV(cstate);
+			else
+				fldct = CopyReadAttributesText(cstate);
+
+			if (fldct != list_length(cstate->attnumlist))
+				ereport(ERROR,
+						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+						 errmsg("wrong number of fields in header line: got %d, expected %d",
+								fldct, list_length(cstate->attnumlist))));
+
+			fldnum = 0;
+			foreach(cur, cstate->attnumlist)
+			{
+				int			attnum = lfirst_int(cur);
+				char	   *colName;
+				Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+				Assert(fldnum < cstate->max_fields);
+
+				colName = cstate->raw_fields[fldnum++];
+				if (colName == NULL)
+					ereport(ERROR,
+							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+							 errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
+									fldnum, cstate->opts.null_print, NameStr(attr->attname))));
+
+				if (namestrcmp(&attr->attname, colName) != 0)
+				{
+					ereport(ERROR,
+							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+							 errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
+									fldnum, colName, NameStr(attr->attname))));
+				}
+			}
+		}
+
+		if (done)
+			return false;
+	}
+
+	cstate->cur_lineno++;
+
+	/* Actually read the line into memory here */
+	done = CopyReadLine(cstate);
+
+	/*
+	 * EOF at start of line means we're done.  If we see EOF after some
+	 * characters, we act as though it was newline followed by EOF, ie,
+	 * process the line and then exit loop on next iteration.
+	 */
+	if (done && cstate->line_buf.len == 0)
+		return false;
+
+	/* Parse the line into de-escaped field values */
+	if (cstate->opts.csv_mode)
+		fldct = CopyReadAttributesCSV(cstate);
+	else
+		fldct = CopyReadAttributesText(cstate);
+
+	*fields = cstate->raw_fields;
+	*nfields = fldct;
+	return true;
+}
+
+/*
+ * Read next tuple from file for COPY FROM. Return false if no more tuples.
+ *
+ * 'econtext' is used to evaluate default expression for each column not
+ * read from the file. It can be NULL when no default values are used, i.e.
+ * when all columns are read from the file.
+ *
+ * 'values' and 'nulls' arrays must be the same length as columns of the
+ * relation passed to BeginCopyFrom. This function fills the arrays.
+ */
+bool
+NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
+			 Datum *values, bool *nulls)
+{
+	TupleDesc	tupDesc;
+	AttrNumber	num_phys_attrs,
+				attr_count,
+				num_defaults = cstate->num_defaults;
+	FmgrInfo   *in_functions = cstate->in_functions;
+	Oid		   *typioparams = cstate->typioparams;
+	int			i;
+	int		   *defmap = cstate->defmap;
+	ExprState **defexprs = cstate->defexprs;
+
+	tupDesc = RelationGetDescr(cstate->rel);
+	num_phys_attrs = tupDesc->natts;
+	attr_count = list_length(cstate->attnumlist);
+
+	/* Initialize all values for row to NULL */
+	MemSet(values, 0, num_phys_attrs * sizeof(Datum));
+	MemSet(nulls, true, num_phys_attrs * sizeof(bool));
+
+	if (!cstate->opts.binary)
+	{
+		char	  **field_strings;
+		ListCell   *cur;
+		int			fldct;
+		int			fieldno;
+		char	   *string;
+
+		/* read raw fields in the next line */
+		if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
+			return false;
+
+		/* check for overflowing fields */
+		if (attr_count > 0 && fldct > attr_count)
+			ereport(ERROR,
+					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+					 errmsg("extra data after last expected column")));
+
+		fieldno = 0;
+
+		/* Loop to read the user attributes on the line. */
+		foreach(cur, cstate->attnumlist)
+		{
+			int			attnum = lfirst_int(cur);
+			int			m = attnum - 1;
+			Form_pg_attribute att = TupleDescAttr(tupDesc, m);
+
+			if (fieldno >= fldct)
+				ereport(ERROR,
+						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+						 errmsg("missing data for column \"%s\"",
+								NameStr(att->attname))));
+			string = field_strings[fieldno++];
+
+			if (cstate->convert_select_flags &&
+				!cstate->convert_select_flags[m])
+			{
+				/* ignore input field, leaving column as NULL */
+				continue;
+			}
+
+			if (cstate->opts.csv_mode)
+			{
+				if (string == NULL &&
+					cstate->opts.force_notnull_flags[m])
+				{
+					/*
+					 * FORCE_NOT_NULL option is set and column is NULL -
+					 * convert it to the NULL string.
+					 */
+					string = cstate->opts.null_print;
+				}
+				else if (string != NULL && cstate->opts.force_null_flags[m]
+						 && strcmp(string, cstate->opts.null_print) == 0)
+				{
+					/*
+					 * FORCE_NULL option is set and column matches the NULL
+					 * string. It must have been quoted, or otherwise the
+					 * string would already have been set to NULL. Convert it
+					 * to NULL as specified.
+					 */
+					string = NULL;
+				}
+			}
+
+			cstate->cur_attname = NameStr(att->attname);
+			cstate->cur_attval = string;
+			values[m] = InputFunctionCall(&in_functions[m],
+										  string,
+										  typioparams[m],
+										  att->atttypmod);
+			if (string != NULL)
+				nulls[m] = false;
+			cstate->cur_attname = NULL;
+			cstate->cur_attval = NULL;
+		}
+
+		Assert(fieldno == attr_count);
+	}
+	else
+	{
+		/* binary */
+		int16		fld_count;
+		ListCell   *cur;
+
+		cstate->cur_lineno++;
+
+		if (!CopyGetInt16(cstate, &fld_count))
+		{
+			/* EOF detected (end of file, or protocol-level EOF) */
+			return false;
+		}
+
+		if (fld_count == -1)
+		{
+			/*
+			 * Received EOF marker.  Wait for the protocol-level EOF, and
+			 * complain if it doesn't come immediately.  In COPY FROM STDIN,
+			 * this ensures that we correctly handle CopyFail, if client
+			 * chooses to send that now.  When copying from file, we could
+			 * ignore the rest of the file like in text mode, but we choose to
+			 * be consistent with the COPY FROM STDIN case.
+			 */
+			char		dummy;
+
+			if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+						 errmsg("received copy data after EOF marker")));
+			return false;
+		}
+
+		if (fld_count != attr_count)
+			ereport(ERROR,
+					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+					 errmsg("row field count is %d, expected %d",
+							(int) fld_count, attr_count)));
+
+		foreach(cur, cstate->attnumlist)
+		{
+			int			attnum = lfirst_int(cur);
+			int			m = attnum - 1;
+			Form_pg_attribute att = TupleDescAttr(tupDesc, m);
+
+			cstate->cur_attname = NameStr(att->attname);
+			values[m] = CopyReadBinaryAttribute(cstate,
+												&in_functions[m],
+												typioparams[m],
+												att->atttypmod,
+												&nulls[m]);
+			cstate->cur_attname = NULL;
+		}
+	}
+
+	/*
+	 * Now compute and insert any defaults available for the columns not
+	 * provided by the input data.  Anything not processed here or above will
+	 * remain NULL.
+	 */
+	for (i = 0; i < num_defaults; i++)
+	{
+		/*
+		 * The caller must supply econtext and have switched into the
+		 * per-tuple memory context in it.
+		 */
+		Assert(econtext != NULL);
+		Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
+
+		values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext,
+										 &nulls[defmap[i]]);
+	}
+
+	return true;
+}
+
+/*
+ * Read the next input line and stash it in line_buf.
+ *
+ * Result is true if read was terminated by EOF, false if terminated
+ * by newline.  The terminating newline or EOF marker is not included
+ * in the final value of line_buf.
+ */
+static bool
+CopyReadLine(CopyFromState cstate)
+{
+	bool		result;
+
+	resetStringInfo(&cstate->line_buf);
+	cstate->line_buf_valid = false;
+
+	/* Parse data and transfer into line_buf */
+	result = CopyReadLineText(cstate);
+
+	if (result)
+	{
+		/*
+		 * Reached EOF.  In protocol version 3, we should ignore anything
+		 * after \. up to the protocol end of copy data.  (XXX maybe better
+		 * not to treat \. as special?)
+		 */
+		if (cstate->copy_src == COPY_FRONTEND)
+		{
+			int			inbytes;
+
+			do
+			{
+				inbytes = CopyGetData(cstate, cstate->input_buf,
+									  1, INPUT_BUF_SIZE);
+			} while (inbytes > 0);
+			cstate->input_buf_index = 0;
+			cstate->input_buf_len = 0;
+			cstate->raw_buf_index = 0;
+			cstate->raw_buf_len = 0;
+		}
+	}
+	else
+	{
+		/*
+		 * If we didn't hit EOF, then we must have transferred the EOL marker
+		 * to line_buf along with the data.  Get rid of it.
+		 */
+		switch (cstate->eol_type)
+		{
+			case EOL_NL:
+				Assert(cstate->line_buf.len >= 1);
+				Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
+				cstate->line_buf.len--;
+				cstate->line_buf.data[cstate->line_buf.len] = '\0';
+				break;
+			case EOL_CR:
+				Assert(cstate->line_buf.len >= 1);
+				Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
+				cstate->line_buf.len--;
+				cstate->line_buf.data[cstate->line_buf.len] = '\0';
+				break;
+			case EOL_CRNL:
+				Assert(cstate->line_buf.len >= 2);
+				Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
+				Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
+				cstate->line_buf.len -= 2;
+				cstate->line_buf.data[cstate->line_buf.len] = '\0';
+				break;
+			case EOL_UNKNOWN:
+				/* shouldn't get here */
+				Assert(false);
+				break;
+		}
+	}
+
+	/* Now it's safe to use the buffer in error messages */
+	cstate->line_buf_valid = true;
+
+	return result;
+}
+
+/*
+ * CopyReadLineText - inner loop of CopyReadLine for text mode
+ */
+static bool
+CopyReadLineText(CopyFromState cstate)
+{
+	char	   *copy_input_buf;
+	int			input_buf_ptr;
+	int			copy_buf_len;
+	bool		need_data = false;
+	bool		hit_eof = false;
+	bool		result = false;
+
+	/* CSV variables */
+	bool		first_char_in_line = true;
+	bool		in_quote = false,
+				last_was_esc = false;
+	char		quotec = '\0';
+	char		escapec = '\0';
+
+	if (cstate->opts.csv_mode)
+	{
+		quotec = cstate->opts.quote[0];
+		escapec = cstate->opts.escape[0];
+		/* ignore special escape processing if it's the same as quotec */
+		if (quotec == escapec)
+			escapec = '\0';
+	}
+
+	/*
+	 * The objective of this loop is to transfer the entire next input line
+	 * into line_buf.  Hence, we only care for detecting newlines (\r and/or
+	 * \n) and the end-of-copy marker (\.).
+	 *
+	 * In CSV mode, \r and \n inside a quoted field are just part of the data
+	 * value and are put in line_buf.  We keep just enough state to know if we
+	 * are currently in a quoted field or not.
+	 *
+	 * These four characters, and the CSV escape and quote characters, are
+	 * assumed the same in frontend and backend encodings.
+	 *
+	 * The input has already been converted to the database encoding.  All
+	 * supported server encodings have the property that all bytes in a
+	 * multi-byte sequence have the high bit set, so a multibyte character
+	 * cannot contain any newline or escape characters embedded in the
+	 * multibyte sequence.  Therefore, we can process the input byte-by-byte,
+	 * regardless of the encoding.
+	 *
+	 * For speed, we try to move data from input_buf to line_buf in chunks
+	 * rather than one character at a time.  input_buf_ptr points to the next
+	 * character to examine; any characters from input_buf_index to
+	 * input_buf_ptr have been determined to be part of the line, but not yet
+	 * transferred to line_buf.
+	 *
+	 * For a little extra speed within the loop, we copy input_buf and
+	 * input_buf_len into local variables.
+	 */
+	copy_input_buf = cstate->input_buf;
+	input_buf_ptr = cstate->input_buf_index;
+	copy_buf_len = cstate->input_buf_len;
+
+	for (;;)
+	{
+		int			prev_raw_ptr;
+		char		c;
+
+		/*
+		 * Load more data if needed.
+		 *
+		 * TODO: We could just force four bytes of read-ahead and avoid the
+		 * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE().  That was
+		 * unsafe with the old v2 COPY protocol, but we don't support that
+		 * anymore.
+		 */
+		if (input_buf_ptr >= copy_buf_len || need_data)
+		{
+			REFILL_LINEBUF;
+
+			CopyLoadInputBuf(cstate);
+			/* update our local variables */
+			hit_eof = cstate->input_reached_eof;
+			input_buf_ptr = cstate->input_buf_index;
+			copy_buf_len = cstate->input_buf_len;
+
+			/*
+			 * If we are completely out of data, break out of the loop,
+			 * reporting EOF.
+			 */
+			if (INPUT_BUF_BYTES(cstate) <= 0)
+			{
+				result = true;
+				break;
+			}
+			need_data = false;
+		}
+
+		/* OK to fetch a character */
+		prev_raw_ptr = input_buf_ptr;
+		c = copy_input_buf[input_buf_ptr++];
+
+		if (cstate->opts.csv_mode)
+		{
+			/*
+			 * If character is '\\' or '\r', we may need to look ahead below.
+			 * Force fetch of the next character if we don't already have it.
+			 * We need to do this before changing CSV state, in case one of
+			 * these characters is also the quote or escape character.
+			 */
+			if (c == '\\' || c == '\r')
+			{
+				IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
+			}
+
+			/*
+			 * Dealing with quotes and escapes here is mildly tricky. If the
+			 * quote char is also the escape char, there's no problem - we
+			 * just use the char as a toggle. If they are different, we need
+			 * to ensure that we only take account of an escape inside a
+			 * quoted field and immediately preceding a quote char, and not
+			 * the second in an escape-escape sequence.
+			 */
+			if (in_quote && c == escapec)
+				last_was_esc = !last_was_esc;
+			if (c == quotec && !last_was_esc)
+				in_quote = !in_quote;
+			if (c != escapec)
+				last_was_esc = false;
+
+			/*
+			 * Updating the line count for embedded CR and/or LF chars is
+			 * necessarily a little fragile - this test is probably about the
+			 * best we can do.  (XXX it's arguable whether we should do this
+			 * at all --- is cur_lineno a physical or logical count?)
+			 */
+			if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
+				cstate->cur_lineno++;
+		}
+
+		/* Process \r */
+		if (c == '\r' && (!cstate->opts.csv_mode || !in_quote))
+		{
+			/* Check for \r\n on first line, _and_ handle \r\n. */
+			if (cstate->eol_type == EOL_UNKNOWN ||
+				cstate->eol_type == EOL_CRNL)
+			{
+				/*
+				 * If need more data, go back to loop top to load it.
+				 *
+				 * Note that if we are at EOF, c will wind up as '\0' because
+				 * of the guaranteed pad of input_buf.
+				 */
+				IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
+
+				/* get next char */
+				c = copy_input_buf[input_buf_ptr];
+
+				if (c == '\n')
+				{
+					input_buf_ptr++;	/* eat newline */
+					cstate->eol_type = EOL_CRNL;	/* in case not set yet */
+				}
+				else
+				{
+					/* found \r, but no \n */
+					if (cstate->eol_type == EOL_CRNL)
+						ereport(ERROR,
+								(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+								 !cstate->opts.csv_mode ?
+								 errmsg("literal carriage return found in data") :
+								 errmsg("unquoted carriage return found in data"),
+								 !cstate->opts.csv_mode ?
+								 errhint("Use \"\\r\" to represent carriage return.") :
+								 errhint("Use quoted CSV field to represent carriage return.")));
+
+					/*
+					 * if we got here, it is the first line and we didn't find
+					 * \n, so don't consume the peeked character
+					 */
+					cstate->eol_type = EOL_CR;
+				}
+			}
+			else if (cstate->eol_type == EOL_NL)
+				ereport(ERROR,
+						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+						 !cstate->opts.csv_mode ?
+						 errmsg("literal carriage return found in data") :
+						 errmsg("unquoted carriage return found in data"),
+						 !cstate->opts.csv_mode ?
+						 errhint("Use \"\\r\" to represent carriage return.") :
+						 errhint("Use quoted CSV field to represent carriage return.")));
+			/* If reach here, we have found the line terminator */
+			break;
+		}
+
+		/* Process \n */
+		if (c == '\n' && (!cstate->opts.csv_mode || !in_quote))
+		{
+			if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
+				ereport(ERROR,
+						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+						 !cstate->opts.csv_mode ?
+						 errmsg("literal newline found in data") :
+						 errmsg("unquoted newline found in data"),
+						 !cstate->opts.csv_mode ?
+						 errhint("Use \"\\n\" to represent newline.") :
+						 errhint("Use quoted CSV field to represent newline.")));
+			cstate->eol_type = EOL_NL;	/* in case not set yet */
+			/* If reach here, we have found the line terminator */
+			break;
+		}
+
+		/*
+		 * In CSV mode, we only recognize \. alone on a line.  This is because
+		 * \. is a valid CSV data value.
+		 */
+		if (c == '\\' && (!cstate->opts.csv_mode || first_char_in_line))
+		{
+			char		c2;
+
+			IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
+			IF_NEED_REFILL_AND_EOF_BREAK(0);
+
+			/* -----
+			 * get next character
+			 * Note: we do not change c so if it isn't \., we can fall
+			 * through and continue processing.
+			 * -----
+			 */
+			c2 = copy_input_buf[input_buf_ptr];
+
+			if (c2 == '.')
+			{
+				input_buf_ptr++;	/* consume the '.' */
+
+				/*
+				 * Note: if we loop back for more data here, it does not
+				 * matter that the CSV state change checks are re-executed; we
+				 * will come back here with no important state changed.
+				 */
+				if (cstate->eol_type == EOL_CRNL)
+				{
+					/* Get the next character */
+					IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
+					/* if hit_eof, c2 will become '\0' */
+					c2 = copy_input_buf[input_buf_ptr++];
+
+					if (c2 == '\n')
+					{
+						if (!cstate->opts.csv_mode)
+							ereport(ERROR,
+									(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+									 errmsg("end-of-copy marker does not match previous newline style")));
+						else
+							NO_END_OF_COPY_GOTO;
+					}
+					else if (c2 != '\r')
+					{
+						if (!cstate->opts.csv_mode)
+							ereport(ERROR,
+									(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+									 errmsg("end-of-copy marker corrupt")));
+						else
+							NO_END_OF_COPY_GOTO;
+					}
+				}
+
+				/* Get the next character */
+				IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
+				/* if hit_eof, c2 will become '\0' */
+				c2 = copy_input_buf[input_buf_ptr++];
+
+				if (c2 != '\r' && c2 != '\n')
+				{
+					if (!cstate->opts.csv_mode)
+						ereport(ERROR,
+								(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+								 errmsg("end-of-copy marker corrupt")));
+					else
+						NO_END_OF_COPY_GOTO;
+				}
+
+				if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
+					(cstate->eol_type == EOL_CRNL && c2 != '\n') ||
+					(cstate->eol_type == EOL_CR && c2 != '\r'))
+				{
+					ereport(ERROR,
+							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+							 errmsg("end-of-copy marker does not match previous newline style")));
+				}
+
+				/*
+				 * Transfer only the data before the \. into line_buf, then
+				 * discard the data and the \. sequence.
+				 */
+				if (prev_raw_ptr > cstate->input_buf_index)
+					appendBinaryStringInfo(&cstate->line_buf,
+										   cstate->input_buf + cstate->input_buf_index,
+										   prev_raw_ptr - cstate->input_buf_index);
+				cstate->input_buf_index = input_buf_ptr;
+				result = true;	/* report EOF */
+				break;
+			}
+			else if (!cstate->opts.csv_mode)
+			{
+				/*
+				 * If we are here, it means we found a backslash followed by
+				 * something other than a period.  In non-CSV mode, anything
+				 * after a backslash is special, so we skip over that second
+				 * character too.  If we didn't do that \\. would be
+				 * considered an eof-of copy, while in non-CSV mode it is a
+				 * literal backslash followed by a period.  In CSV mode,
+				 * backslashes are not special, so we want to process the
+				 * character after the backslash just like a normal character,
+				 * so we don't increment in those cases.
+				 */
+				input_buf_ptr++;
+			}
+		}
+
+		/*
+		 * This label is for CSV cases where \. appears at the start of a
+		 * line, but there is more text after it, meaning it was a data value.
+		 * We are more strict for \. in CSV mode because \. could be a data
+		 * value, while in non-CSV mode, \. cannot be a data value.
+		 */
+not_end_of_copy:
+		first_char_in_line = false;
+	}							/* end of outer loop */
+
+	/*
+	 * Transfer any still-uncopied data to line_buf.
+	 */
+	REFILL_LINEBUF;
+
+	return result;
+}
+
+/*
+ *	Return decimal value for a hexadecimal digit
+ */
+static int
+GetDecimalFromHex(char hex)
+{
+	if (isdigit((unsigned char) hex))
+		return hex - '0';
+	else
+		return tolower((unsigned char) hex) - 'a' + 10;
+}
+
+/*
+ * Parse the current line into separate attributes (fields),
+ * performing de-escaping as needed.
+ *
+ * The input is in line_buf.  We use attribute_buf to hold the result
+ * strings.  cstate->raw_fields[k] is set to point to the k'th attribute
+ * string, or NULL when the input matches the null marker string.
+ * This array is expanded as necessary.
+ *
+ * (Note that the caller cannot check for nulls since the returned
+ * string would be the post-de-escaping equivalent, which may look
+ * the same as some valid data string.)
+ *
+ * delim is the column delimiter string (must be just one byte for now).
+ * null_print is the null marker string.  Note that this is compared to
+ * the pre-de-escaped input string.
+ *
+ * The return value is the number of fields actually read.
+ */
+static int
+CopyReadAttributesText(CopyFromState cstate)
+{
+	char		delimc = cstate->opts.delim[0];
+	int			fieldno;
+	char	   *output_ptr;
+	char	   *cur_ptr;
+	char	   *line_end_ptr;
+
+	/*
+	 * We need a special case for zero-column tables: check that the input
+	 * line is empty, and return.
+	 */
+	if (cstate->max_fields <= 0)
+	{
+		if (cstate->line_buf.len != 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+					 errmsg("extra data after last expected column")));
+		return 0;
+	}
+
+	resetStringInfo(&cstate->attribute_buf);
+
+	/*
+	 * The de-escaped attributes will certainly not be longer than the input
+	 * data line, so we can just force attribute_buf to be large enough and
+	 * then transfer data without any checks for enough space.  We need to do
+	 * it this way because enlarging attribute_buf mid-stream would invalidate
+	 * pointers already stored into cstate->raw_fields[].
+	 */
+	if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
+		enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
+	output_ptr = cstate->attribute_buf.data;
+
+	/* set pointer variables for loop */
+	cur_ptr = cstate->line_buf.data;
+	line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
+
+	/* Outer loop iterates over fields */
+	fieldno = 0;
+	for (;;)
+	{
+		bool		found_delim = false;
+		char	   *start_ptr;
+		char	   *end_ptr;
+		int			input_len;
+		bool		saw_non_ascii = false;
+
+		/* Make sure there is enough space for the next value */
+		if (fieldno >= cstate->max_fields)
+		{
+			cstate->max_fields *= 2;
+			cstate->raw_fields =
+				repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
+		}
+
+		/* Remember start of field on both input and output sides */
+		start_ptr = cur_ptr;
+		cstate->raw_fields[fieldno] = output_ptr;
+
+		/*
+		 * Scan data for field.
+		 *
+		 * Note that in this loop, we are scanning to locate the end of field
+		 * and also speculatively performing de-escaping.  Once we find the
+		 * end-of-field, we can match the raw field contents against the null
+		 * marker string.  Only after that comparison fails do we know that
+		 * de-escaping is actually the right thing to do; therefore we *must
+		 * not* throw any syntax errors before we've done the null-marker
+		 * check.
+		 */
+		for (;;)
+		{
+			char		c;
+
+			end_ptr = cur_ptr;
+			if (cur_ptr >= line_end_ptr)
+				break;
+			c = *cur_ptr++;
+			if (c == delimc)
+			{
+				found_delim = true;
+				break;
+			}
+			if (c == '\\')
+			{
+				if (cur_ptr >= line_end_ptr)
+					break;
+				c = *cur_ptr++;
+				switch (c)
+				{
+					case '0':
+					case '1':
+					case '2':
+					case '3':
+					case '4':
+					case '5':
+					case '6':
+					case '7':
+						{
+							/* handle \013 */
+							int			val;
+
+							val = OCTVALUE(c);
+							if (cur_ptr < line_end_ptr)
+							{
+								c = *cur_ptr;
+								if (ISOCTAL(c))
+								{
+									cur_ptr++;
+									val = (val << 3) + OCTVALUE(c);
+									if (cur_ptr < line_end_ptr)
+									{
+										c = *cur_ptr;
+										if (ISOCTAL(c))
+										{
+											cur_ptr++;
+											val = (val << 3) + OCTVALUE(c);
+										}
+									}
+								}
+							}
+							c = val & 0377;
+							if (c == '\0' || IS_HIGHBIT_SET(c))
+								saw_non_ascii = true;
+						}
+						break;
+					case 'x':
+						/* Handle \x3F */
+						if (cur_ptr < line_end_ptr)
+						{
+							char		hexchar = *cur_ptr;
+
+							if (isxdigit((unsigned char) hexchar))
+							{
+								int			val = GetDecimalFromHex(hexchar);
+
+								cur_ptr++;
+								if (cur_ptr < line_end_ptr)
+								{
+									hexchar = *cur_ptr;
+									if (isxdigit((unsigned char) hexchar))
+									{
+										cur_ptr++;
+										val = (val << 4) + GetDecimalFromHex(hexchar);
+									}
+								}
+								c = val & 0xff;
+								if (c == '\0' || IS_HIGHBIT_SET(c))
+									saw_non_ascii = true;
+							}
+						}
+						break;
+					case 'b':
+						c = '\b';
+						break;
+					case 'f':
+						c = '\f';
+						break;
+					case 'n':
+						c = '\n';
+						break;
+					case 'r':
+						c = '\r';
+						break;
+					case 't':
+						c = '\t';
+						break;
+					case 'v':
+						c = '\v';
+						break;
+
+						/*
+						 * in all other cases, take the char after '\'
+						 * literally
+						 */
+				}
+			}
+
+			/* Add c to output string */
+			*output_ptr++ = c;
+		}
+
+		/* Check whether raw input matched null marker */
+		input_len = end_ptr - start_ptr;
+		if (input_len == cstate->opts.null_print_len &&
+			strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
+			cstate->raw_fields[fieldno] = NULL;
+		else
+		{
+			/*
+			 * At this point we know the field is supposed to contain data.
+			 *
+			 * If we de-escaped any non-7-bit-ASCII chars, make sure the
+			 * resulting string is valid data for the db encoding.
+			 */
+			if (saw_non_ascii)
+			{
+				char	   *fld = cstate->raw_fields[fieldno];
+
+				pg_verifymbstr(fld, output_ptr - fld, false);
+			}
+		}
+
+		/* Terminate attribute value in output area */
+		*output_ptr++ = '\0';
+
+		fieldno++;
+		/* Done if we hit EOL instead of a delim */
+		if (!found_delim)
+			break;
+	}
+
+	/* Clean up state of attribute_buf */
+	output_ptr--;
+	Assert(*output_ptr == '\0');
+	cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
+
+	return fieldno;
+}
+
+/*
+ * Parse the current line into separate attributes (fields),
+ * performing de-escaping as needed.  This has exactly the same API as
+ * CopyReadAttributesText, except we parse the fields according to
+ * "standard" (i.e. common) CSV usage.
+ */
+static int
+CopyReadAttributesCSV(CopyFromState cstate)
+{
+	char		delimc = cstate->opts.delim[0];
+	char		quotec = cstate->opts.quote[0];
+	char		escapec = cstate->opts.escape[0];
+	int			fieldno;
+	char	   *output_ptr;
+	char	   *cur_ptr;
+	char	   *line_end_ptr;
+
+	/*
+	 * We need a special case for zero-column tables: check that the input
+	 * line is empty, and return.
+	 */
+	if (cstate->max_fields <= 0)
+	{
+		if (cstate->line_buf.len != 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+					 errmsg("extra data after last expected column")));
+		return 0;
+	}
+
+	resetStringInfo(&cstate->attribute_buf);
+
+	/*
+	 * The de-escaped attributes will certainly not be longer than the input
+	 * data line, so we can just force attribute_buf to be large enough and
+	 * then transfer data without any checks for enough space.  We need to do
+	 * it this way because enlarging attribute_buf mid-stream would invalidate
+	 * pointers already stored into cstate->raw_fields[].
+	 */
+	if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
+		enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
+	output_ptr = cstate->attribute_buf.data;
+
+	/* set pointer variables for loop */
+	cur_ptr = cstate->line_buf.data;
+	line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
+
+	/* Outer loop iterates over fields */
+	fieldno = 0;
+	for (;;)
+	{
+		bool		found_delim = false;
+		bool		saw_quote = false;
+		char	   *start_ptr;
+		char	   *end_ptr;
+		int			input_len;
+
+		/* Make sure there is enough space for the next value */
+		if (fieldno >= cstate->max_fields)
+		{
+			cstate->max_fields *= 2;
+			cstate->raw_fields =
+				repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
+		}
+
+		/* Remember start of field on both input and output sides */
+		start_ptr = cur_ptr;
+		cstate->raw_fields[fieldno] = output_ptr;
+
+		/*
+		 * Scan data for field,
+		 *
+		 * The loop starts in "not quote" mode and then toggles between that
+		 * and "in quote" mode. The loop exits normally if it is in "not
+		 * quote" mode and a delimiter or line end is seen.
+		 */
+		for (;;)
+		{
+			char		c;
+
+			/* Not in quote */
+			for (;;)
+			{
+				end_ptr = cur_ptr;
+				if (cur_ptr >= line_end_ptr)
+					goto endfield;
+				c = *cur_ptr++;
+				/* unquoted field delimiter */
+				if (c == delimc)
+				{
+					found_delim = true;
+					goto endfield;
+				}
+				/* start of quoted field (or part of field) */
+				if (c == quotec)
+				{
+					saw_quote = true;
+					break;
+				}
+				/* Add c to output string */
+				*output_ptr++ = c;
+			}
+
+			/* In quote */
+			for (;;)
+			{
+				end_ptr = cur_ptr;
+				if (cur_ptr >= line_end_ptr)
+					ereport(ERROR,
+							(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+							 errmsg("unterminated CSV quoted field")));
+
+				c = *cur_ptr++;
+
+				/* escape within a quoted field */
+				if (c == escapec)
+				{
+					/*
+					 * peek at the next char if available, and escape it if it
+					 * is an escape char or a quote char
+					 */
+					if (cur_ptr < line_end_ptr)
+					{
+						char		nextc = *cur_ptr;
+
+						if (nextc == escapec || nextc == quotec)
+						{
+							*output_ptr++ = nextc;
+							cur_ptr++;
+							continue;
+						}
+					}
+				}
+
+				/*
+				 * end of quoted field. Must do this test after testing for
+				 * escape in case quote char and escape char are the same
+				 * (which is the common case).
+				 */
+				if (c == quotec)
+					break;
+
+				/* Add c to output string */
+				*output_ptr++ = c;
+			}
+		}
+endfield:
+
+		/* Terminate attribute value in output area */
+		*output_ptr++ = '\0';
+
+		/* Check whether raw input matched null marker */
+		input_len = end_ptr - start_ptr;
+		if (!saw_quote && input_len == cstate->opts.null_print_len &&
+			strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
+			cstate->raw_fields[fieldno] = NULL;
+
+		fieldno++;
+		/* Done if we hit EOL instead of a delim */
+		if (!found_delim)
+			break;
+	}
+
+	/* Clean up state of attribute_buf */
+	output_ptr--;
+	Assert(*output_ptr == '\0');
+	cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
+
+	return fieldno;
+}
+
+
+/*
+ * Read a binary attribute
+ */
+static Datum
+CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
+						Oid typioparam, int32 typmod,
+						bool *isnull)
+{
+	int32		fld_size;
+	Datum		result;
+
+	if (!CopyGetInt32(cstate, &fld_size))
+		ereport(ERROR,
+				(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+				 errmsg("unexpected EOF in COPY data")));
+	if (fld_size == -1)
+	{
+		*isnull = true;
+		return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
+	}
+	if (fld_size < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+				 errmsg("invalid field size")));
+
+	/* reset attribute_buf to empty, and load raw data in it */
+	resetStringInfo(&cstate->attribute_buf);
+
+	enlargeStringInfo(&cstate->attribute_buf, fld_size);
+	if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
+						   fld_size) != fld_size)
+		ereport(ERROR,
+				(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+				 errmsg("unexpected EOF in COPY data")));
+
+	cstate->attribute_buf.len = fld_size;
+	cstate->attribute_buf.data[fld_size] = '\0';
+
+	/* Call the column type's binary input converter */
+	result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
+								 typioparam, typmod);
+
+	/* Trouble if it didn't eat the whole buffer */
+	if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+				 errmsg("incorrect binary data format")));
+
+	*isnull = false;
+	return result;
+}
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
new file mode 100644
index 0000000..73e286f
--- /dev/null
+++ b/src/backend/commands/copyto.c
@@ -0,0 +1,1310 @@
+/*-------------------------------------------------------------------------
+ *
+ * copyto.c
+ *		COPY <table> TO file/program/client
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/copyto.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "commands/copy.h"
+#include "commands/progress.h"
+#include "executor/execdesc.h"
+#include "executor/executor.h"
+#include "executor/tuptable.h"
+#include "libpq/libpq.h"
+#include "libpq/pqformat.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "optimizer/optimizer.h"
+#include "pgstat.h"
+#include "rewrite/rewriteHandler.h"
+#include "storage/fd.h"
+#include "tcop/tcopprot.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/partcache.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+
+/*
+ * Represents the different dest cases we need to worry about at
+ * the bottom level
+ */
+typedef enum CopyDest
+{
+	COPY_FILE,					/* to file (or a piped program) */
+	COPY_FRONTEND,				/* to frontend */
+} CopyDest;
+
+/*
+ * This struct contains all the state variables used throughout a COPY TO
+ * operation.
+ *
+ * Multi-byte encodings: all supported client-side encodings encode multi-byte
+ * characters by having the first byte's high bit set. Subsequent bytes of the
+ * character can have the high bit not set. When scanning data in such an
+ * encoding to look for a match to a single-byte (ie ASCII) character, we must
+ * use the full pg_encoding_mblen() machinery to skip over multibyte
+ * characters, else we might find a false match to a trailing byte. In
+ * supported server encodings, there is no possibility of a false match, and
+ * it's faster to make useless comparisons to trailing bytes than it is to
+ * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
+ * when we have to do it the hard way.
+ */
+typedef struct CopyToStateData
+{
+	/* low-level state data */
+	CopyDest	copy_dest;		/* type of copy source/destination */
+	FILE	   *copy_file;		/* used if copy_dest == COPY_FILE */
+	StringInfo	fe_msgbuf;		/* used for all dests during COPY TO */
+
+	int			file_encoding;	/* file or remote side's character encoding */
+	bool		need_transcoding;	/* file encoding diff from server? */
+	bool		encoding_embeds_ascii;	/* ASCII can be non-first byte? */
+
+	/* parameters from the COPY command */
+	Relation	rel;			/* relation to copy to */
+	QueryDesc  *queryDesc;		/* executable query to copy from */
+	List	   *attnumlist;		/* integer list of attnums to copy */
+	char	   *filename;		/* filename, or NULL for STDOUT */
+	bool		is_program;		/* is 'filename' a program to popen? */
+
+	CopyFormatOptions opts;
+	Node	   *whereClause;	/* WHERE condition (or NULL) */
+
+	/*
+	 * Working state
+	 */
+	MemoryContext copycontext;	/* per-copy execution context */
+
+	FmgrInfo   *out_functions;	/* lookup info for output functions */
+	MemoryContext rowcontext;	/* per-row evaluation context */
+	uint64		bytes_processed;	/* number of bytes processed so far */
+} CopyToStateData;
+
+/* DestReceiver for COPY (query) TO */
+typedef struct
+{
+	DestReceiver pub;			/* publicly-known function pointers */
+	CopyToState cstate;			/* CopyToStateData for the command */
+	uint64		processed;		/* # of tuples processed */
+} DR_copy;
+
+/* NOTE: there's a copy of this in copyfromparse.c */
+static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
+
+
+/* non-export function prototypes */
+static void EndCopy(CopyToState cstate);
+static void ClosePipeToProgram(CopyToState cstate);
+static void CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot);
+static void CopyAttributeOutText(CopyToState cstate, const char *string);
+static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
+								bool use_quote, bool single_attr);
+
+/* Low-level communications functions */
+static void SendCopyBegin(CopyToState cstate);
+static void SendCopyEnd(CopyToState cstate);
+static void CopySendData(CopyToState cstate, const void *databuf, int datasize);
+static void CopySendString(CopyToState cstate, const char *str);
+static void CopySendChar(CopyToState cstate, char c);
+static void CopySendEndOfRow(CopyToState cstate);
+static void CopySendInt32(CopyToState cstate, int32 val);
+static void CopySendInt16(CopyToState cstate, int16 val);
+
+
+/*
+ * Send copy start/stop messages for frontend copies.  These have changed
+ * in past protocol redesigns.
+ */
+static void
+SendCopyBegin(CopyToState cstate)
+{
+	StringInfoData buf;
+	int			natts = list_length(cstate->attnumlist);
+	int16		format = (cstate->opts.binary ? 1 : 0);
+	int			i;
+
+	pq_beginmessage(&buf, 'H');
+	pq_sendbyte(&buf, format);	/* overall format */
+	pq_sendint16(&buf, natts);
+	for (i = 0; i < natts; i++)
+		pq_sendint16(&buf, format); /* per-column formats */
+	pq_endmessage(&buf);
+	cstate->copy_dest = COPY_FRONTEND;
+}
+
+static void
+SendCopyEnd(CopyToState cstate)
+{
+	/* Shouldn't have any unsent data */
+	Assert(cstate->fe_msgbuf->len == 0);
+	/* Send Copy Done message */
+	pq_putemptymessage('c');
+}
+
+/*----------
+ * CopySendData sends output data to the destination (file or frontend)
+ * CopySendString does the same for null-terminated strings
+ * CopySendChar does the same for single characters
+ * CopySendEndOfRow does the appropriate thing at end of each data row
+ *	(data is not actually flushed except by CopySendEndOfRow)
+ *
+ * NB: no data conversion is applied by these functions
+ *----------
+ */
+static void
+CopySendData(CopyToState cstate, const void *databuf, int datasize)
+{
+	appendBinaryStringInfo(cstate->fe_msgbuf, databuf, datasize);
+}
+
+static void
+CopySendString(CopyToState cstate, const char *str)
+{
+	appendBinaryStringInfo(cstate->fe_msgbuf, str, strlen(str));
+}
+
+static void
+CopySendChar(CopyToState cstate, char c)
+{
+	appendStringInfoCharMacro(cstate->fe_msgbuf, c);
+}
+
+static void
+CopySendEndOfRow(CopyToState cstate)
+{
+	StringInfo	fe_msgbuf = cstate->fe_msgbuf;
+
+	switch (cstate->copy_dest)
+	{
+		case COPY_FILE:
+			if (!cstate->opts.binary)
+			{
+				/* Default line termination depends on platform */
+#ifndef WIN32
+				CopySendChar(cstate, '\n');
+#else
+				CopySendString(cstate, "\r\n");
+#endif
+			}
+
+			if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
+					   cstate->copy_file) != 1 ||
+				ferror(cstate->copy_file))
+			{
+				if (cstate->is_program)
+				{
+					if (errno == EPIPE)
+					{
+						/*
+						 * The pipe will be closed automatically on error at
+						 * the end of transaction, but we might get a better
+						 * error message from the subprocess' exit code than
+						 * just "Broken Pipe"
+						 */
+						ClosePipeToProgram(cstate);
+
+						/*
+						 * If ClosePipeToProgram() didn't throw an error, the
+						 * program terminated normally, but closed the pipe
+						 * first. Restore errno, and throw an error.
+						 */
+						errno = EPIPE;
+					}
+					ereport(ERROR,
+							(errcode_for_file_access(),
+							 errmsg("could not write to COPY program: %m")));
+				}
+				else
+					ereport(ERROR,
+							(errcode_for_file_access(),
+							 errmsg("could not write to COPY file: %m")));
+			}
+			break;
+		case COPY_FRONTEND:
+			/* The FE/BE protocol uses \n as newline for all platforms */
+			if (!cstate->opts.binary)
+				CopySendChar(cstate, '\n');
+
+			/* Dump the accumulated row as one CopyData message */
+			(void) pq_putmessage('d', fe_msgbuf->data, fe_msgbuf->len);
+			break;
+	}
+
+	/* Update the progress */
+	cstate->bytes_processed += fe_msgbuf->len;
+	pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
+
+	resetStringInfo(fe_msgbuf);
+}
+
+/*
+ * These functions do apply some data conversion
+ */
+
+/*
+ * CopySendInt32 sends an int32 in network byte order
+ */
+static inline void
+CopySendInt32(CopyToState cstate, int32 val)
+{
+	uint32		buf;
+
+	buf = pg_hton32((uint32) val);
+	CopySendData(cstate, &buf, sizeof(buf));
+}
+
+/*
+ * CopySendInt16 sends an int16 in network byte order
+ */
+static inline void
+CopySendInt16(CopyToState cstate, int16 val)
+{
+	uint16		buf;
+
+	buf = pg_hton16((uint16) val);
+	CopySendData(cstate, &buf, sizeof(buf));
+}
+
+/*
+ * Closes the pipe to an external program, checking the pclose() return code.
+ */
+static void
+ClosePipeToProgram(CopyToState cstate)
+{
+	int			pclose_rc;
+
+	Assert(cstate->is_program);
+
+	pclose_rc = ClosePipeStream(cstate->copy_file);
+	if (pclose_rc == -1)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not close pipe to external command: %m")));
+	else if (pclose_rc != 0)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
+				 errmsg("program \"%s\" failed",
+						cstate->filename),
+				 errdetail_internal("%s", wait_result_to_str(pclose_rc))));
+	}
+}
+
+/*
+ * Release resources allocated in a cstate for COPY TO/FROM.
+ */
+static void
+EndCopy(CopyToState cstate)
+{
+	if (cstate->is_program)
+	{
+		ClosePipeToProgram(cstate);
+	}
+	else
+	{
+		if (cstate->filename != NULL && FreeFile(cstate->copy_file))
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not close file \"%s\": %m",
+							cstate->filename)));
+	}
+
+	pgstat_progress_end_command();
+
+	MemoryContextDelete(cstate->copycontext);
+	pfree(cstate);
+}
+
+/*
+ * Setup CopyToState to read tuples from a table or a query for COPY TO.
+ */
+CopyToState
+BeginCopyTo(ParseState *pstate,
+			Relation rel,
+			RawStmt *raw_query,
+			Oid queryRelId,
+			const char *filename,
+			bool is_program,
+			List *attnamelist,
+			List *options)
+{
+	CopyToState cstate;
+	bool		pipe = (filename == NULL);
+	TupleDesc	tupDesc;
+	int			num_phys_attrs;
+	MemoryContext oldcontext;
+	const int	progress_cols[] = {
+		PROGRESS_COPY_COMMAND,
+		PROGRESS_COPY_TYPE
+	};
+	int64		progress_vals[] = {
+		PROGRESS_COPY_COMMAND_TO,
+		0
+	};
+
+	if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
+	{
+		if (rel->rd_rel->relkind == RELKIND_VIEW)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot copy from view \"%s\"",
+							RelationGetRelationName(rel)),
+					 errhint("Try the COPY (SELECT ...) TO variant.")));
+		else if (rel->rd_rel->relkind == RELKIND_MATVIEW)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot copy from materialized view \"%s\"",
+							RelationGetRelationName(rel)),
+					 errhint("Try the COPY (SELECT ...) TO variant.")));
+		else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot copy from foreign table \"%s\"",
+							RelationGetRelationName(rel)),
+					 errhint("Try the COPY (SELECT ...) TO variant.")));
+		else if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot copy from sequence \"%s\"",
+							RelationGetRelationName(rel))));
+		else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot copy from partitioned table \"%s\"",
+							RelationGetRelationName(rel)),
+					 errhint("Try the COPY (SELECT ...) TO variant.")));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot copy from non-table relation \"%s\"",
+							RelationGetRelationName(rel))));
+	}
+
+
+	/* Allocate workspace and zero all fields */
+	cstate = (CopyToStateData *) palloc0(sizeof(CopyToStateData));
+
+	/*
+	 * We allocate everything used by a cstate in a new memory context. This
+	 * avoids memory leaks during repeated use of COPY in a query.
+	 */
+	cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
+												"COPY",
+												ALLOCSET_DEFAULT_SIZES);
+
+	oldcontext = MemoryContextSwitchTo(cstate->copycontext);
+
+	/* Extract options from the statement node tree */
+	ProcessCopyOptions(pstate, &cstate->opts, false /* is_from */ , options);
+
+	/* Process the source/target relation or query */
+	if (rel)
+	{
+		Assert(!raw_query);
+
+		cstate->rel = rel;
+
+		tupDesc = RelationGetDescr(cstate->rel);
+	}
+	else
+	{
+		List	   *rewritten;
+		Query	   *query;
+		PlannedStmt *plan;
+		DestReceiver *dest;
+
+		cstate->rel = NULL;
+
+		/*
+		 * Run parse analysis and rewrite.  Note this also acquires sufficient
+		 * locks on the source table(s).
+		 */
+		rewritten = pg_analyze_and_rewrite_fixedparams(raw_query,
+													   pstate->p_sourcetext, NULL, 0,
+													   NULL);
+
+		/* check that we got back something we can work with */
+		if (rewritten == NIL)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("DO INSTEAD NOTHING rules are not supported for COPY")));
+		}
+		else if (list_length(rewritten) > 1)
+		{
+			ListCell   *lc;
+
+			/* examine queries to determine which error message to issue */
+			foreach(lc, rewritten)
+			{
+				Query	   *q = lfirst_node(Query, lc);
+
+				if (q->querySource == QSRC_QUAL_INSTEAD_RULE)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("conditional DO INSTEAD rules are not supported for COPY")));
+				if (q->querySource == QSRC_NON_INSTEAD_RULE)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("DO ALSO rules are not supported for the COPY")));
+			}
+
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("multi-statement DO INSTEAD rules are not supported for COPY")));
+		}
+
+		query = linitial_node(Query, rewritten);
+
+		/* The grammar allows SELECT INTO, but we don't support that */
+		if (query->utilityStmt != NULL &&
+			IsA(query->utilityStmt, CreateTableAsStmt))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("COPY (SELECT INTO) is not supported")));
+
+		Assert(query->utilityStmt == NULL);
+
+		/*
+		 * Similarly the grammar doesn't enforce the presence of a RETURNING
+		 * clause, but this is required here.
+		 */
+		if (query->commandType != CMD_SELECT &&
+			query->returningList == NIL)
+		{
+			Assert(query->commandType == CMD_INSERT ||
+				   query->commandType == CMD_UPDATE ||
+				   query->commandType == CMD_DELETE);
+
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("COPY query must have a RETURNING clause")));
+		}
+
+		/* plan the query */
+		plan = pg_plan_query(query, pstate->p_sourcetext,
+							 CURSOR_OPT_PARALLEL_OK, NULL);
+
+		/*
+		 * With row-level security and a user using "COPY relation TO", we
+		 * have to convert the "COPY relation TO" to a query-based COPY (eg:
+		 * "COPY (SELECT * FROM ONLY relation) TO"), to allow the rewriter to
+		 * add in any RLS clauses.
+		 *
+		 * When this happens, we are passed in the relid of the originally
+		 * found relation (which we have locked).  As the planner will look up
+		 * the relation again, we double-check here to make sure it found the
+		 * same one that we have locked.
+		 */
+		if (queryRelId != InvalidOid)
+		{
+			/*
+			 * Note that with RLS involved there may be multiple relations,
+			 * and while the one we need is almost certainly first, we don't
+			 * make any guarantees of that in the planner, so check the whole
+			 * list and make sure we find the original relation.
+			 */
+			if (!list_member_oid(plan->relationOids, queryRelId))
+				ereport(ERROR,
+						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						 errmsg("relation referenced by COPY statement has changed")));
+		}
+
+		/*
+		 * Use a snapshot with an updated command ID to ensure this query sees
+		 * results of any previously executed queries.
+		 */
+		PushCopiedSnapshot(GetActiveSnapshot());
+		UpdateActiveSnapshotCommandId();
+
+		/* Create dest receiver for COPY OUT */
+		dest = CreateDestReceiver(DestCopyOut);
+		((DR_copy *) dest)->cstate = cstate;
+
+		/* Create a QueryDesc requesting no output */
+		cstate->queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext,
+											GetActiveSnapshot(),
+											InvalidSnapshot,
+											dest, NULL, NULL, 0);
+
+		/*
+		 * Call ExecutorStart to prepare the plan for execution.
+		 *
+		 * ExecutorStart computes a result tupdesc for us
+		 */
+		ExecutorStart(cstate->queryDesc, 0);
+
+		tupDesc = cstate->queryDesc->tupDesc;
+	}
+
+	/* Generate or convert list of attributes to process */
+	cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
+
+	num_phys_attrs = tupDesc->natts;
+
+	/* Convert FORCE_QUOTE name list to per-column flags, check validity */
+	cstate->opts.force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+	if (cstate->opts.force_quote_all)
+	{
+		int			i;
+
+		for (i = 0; i < num_phys_attrs; i++)
+			cstate->opts.force_quote_flags[i] = true;
+	}
+	else if (cstate->opts.force_quote)
+	{
+		List	   *attnums;
+		ListCell   *cur;
+
+		attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_quote);
+
+		foreach(cur, attnums)
+		{
+			int			attnum = lfirst_int(cur);
+			Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+			if (!list_member_int(cstate->attnumlist, attnum))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+						 errmsg("FORCE_QUOTE column \"%s\" not referenced by COPY",
+								NameStr(attr->attname))));
+			cstate->opts.force_quote_flags[attnum - 1] = true;
+		}
+	}
+
+	/* Convert FORCE_NOT_NULL name list to per-column flags, check validity */
+	cstate->opts.force_notnull_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+	if (cstate->opts.force_notnull)
+	{
+		List	   *attnums;
+		ListCell   *cur;
+
+		attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_notnull);
+
+		foreach(cur, attnums)
+		{
+			int			attnum = lfirst_int(cur);
+			Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+			if (!list_member_int(cstate->attnumlist, attnum))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+						 errmsg("FORCE_NOT_NULL column \"%s\" not referenced by COPY",
+								NameStr(attr->attname))));
+			cstate->opts.force_notnull_flags[attnum - 1] = true;
+		}
+	}
+
+	/* Convert FORCE_NULL name list to per-column flags, check validity */
+	cstate->opts.force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+	if (cstate->opts.force_null)
+	{
+		List	   *attnums;
+		ListCell   *cur;
+
+		attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_null);
+
+		foreach(cur, attnums)
+		{
+			int			attnum = lfirst_int(cur);
+			Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+			if (!list_member_int(cstate->attnumlist, attnum))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+						 errmsg("FORCE_NULL column \"%s\" not referenced by COPY",
+								NameStr(attr->attname))));
+			cstate->opts.force_null_flags[attnum - 1] = true;
+		}
+	}
+
+	/* Use client encoding when ENCODING option is not specified. */
+	if (cstate->opts.file_encoding < 0)
+		cstate->file_encoding = pg_get_client_encoding();
+	else
+		cstate->file_encoding = cstate->opts.file_encoding;
+
+	/*
+	 * Set up encoding conversion info.  Even if the file and server encodings
+	 * are the same, we must apply pg_any_to_server() to validate data in
+	 * multibyte encodings.
+	 */
+	cstate->need_transcoding =
+		(cstate->file_encoding != GetDatabaseEncoding() ||
+		 pg_database_encoding_max_length() > 1);
+	/* See Multibyte encoding comment above */
+	cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
+
+	cstate->copy_dest = COPY_FILE;	/* default */
+
+	if (pipe)
+	{
+		progress_vals[1] = PROGRESS_COPY_TYPE_PIPE;
+
+		Assert(!is_program);	/* the grammar does not allow this */
+		if (whereToSendOutput != DestRemote)
+			cstate->copy_file = stdout;
+	}
+	else
+	{
+		cstate->filename = pstrdup(filename);
+		cstate->is_program = is_program;
+
+		if (is_program)
+		{
+			progress_vals[1] = PROGRESS_COPY_TYPE_PROGRAM;
+			cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_W);
+			if (cstate->copy_file == NULL)
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not execute command \"%s\": %m",
+								cstate->filename)));
+		}
+		else
+		{
+			mode_t		oumask; /* Pre-existing umask value */
+			struct stat st;
+
+			progress_vals[1] = PROGRESS_COPY_TYPE_FILE;
+
+			/*
+			 * Prevent write to relative path ... too easy to shoot oneself in
+			 * the foot by overwriting a database file ...
+			 */
+			if (!is_absolute_path(filename))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_NAME),
+						 errmsg("relative path not allowed for COPY to file")));
+
+			oumask = umask(S_IWGRP | S_IWOTH);
+			PG_TRY();
+			{
+				cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W);
+			}
+			PG_FINALLY();
+			{
+				umask(oumask);
+			}
+			PG_END_TRY();
+			if (cstate->copy_file == NULL)
+			{
+				/* copy errno because ereport subfunctions might change it */
+				int			save_errno = errno;
+
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not open file \"%s\" for writing: %m",
+								cstate->filename),
+						 (save_errno == ENOENT || save_errno == EACCES) ?
+						 errhint("COPY TO instructs the PostgreSQL server process to write a file. "
+								 "You may want a client-side facility such as psql's \\copy.") : 0));
+			}
+
+			if (fstat(fileno(cstate->copy_file), &st))
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not stat file \"%s\": %m",
+								cstate->filename)));
+
+			if (S_ISDIR(st.st_mode))
+				ereport(ERROR,
+						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+						 errmsg("\"%s\" is a directory", cstate->filename)));
+		}
+	}
+
+	/* initialize progress */
+	pgstat_progress_start_command(PROGRESS_COMMAND_COPY,
+								  cstate->rel ? RelationGetRelid(cstate->rel) : InvalidOid);
+	pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
+
+	cstate->bytes_processed = 0;
+
+	MemoryContextSwitchTo(oldcontext);
+
+	return cstate;
+}
+
+/*
+ * Clean up storage and release resources for COPY TO.
+ */
+void
+EndCopyTo(CopyToState cstate)
+{
+	if (cstate->queryDesc != NULL)
+	{
+		/* Close down the query and free resources. */
+		ExecutorFinish(cstate->queryDesc);
+		ExecutorEnd(cstate->queryDesc);
+		FreeQueryDesc(cstate->queryDesc);
+		PopActiveSnapshot();
+	}
+
+	/* Clean up storage */
+	EndCopy(cstate);
+}
+
+/*
+ * Copy from relation or query TO file.
+ */
+uint64
+DoCopyTo(CopyToState cstate)
+{
+	bool		pipe = (cstate->filename == NULL);
+	bool		fe_copy = (pipe && whereToSendOutput == DestRemote);
+	TupleDesc	tupDesc;
+	int			num_phys_attrs;
+	ListCell   *cur;
+	uint64		processed;
+
+	if (fe_copy)
+		SendCopyBegin(cstate);
+
+	if (cstate->rel)
+		tupDesc = RelationGetDescr(cstate->rel);
+	else
+		tupDesc = cstate->queryDesc->tupDesc;
+	num_phys_attrs = tupDesc->natts;
+	cstate->opts.null_print_client = cstate->opts.null_print;	/* default */
+
+	/* We use fe_msgbuf as a per-row buffer regardless of copy_dest */
+	cstate->fe_msgbuf = makeStringInfo();
+
+	/* Get info about the columns we need to process. */
+	cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
+	foreach(cur, cstate->attnumlist)
+	{
+		int			attnum = lfirst_int(cur);
+		Oid			out_func_oid;
+		bool		isvarlena;
+		Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+		if (cstate->opts.binary)
+			getTypeBinaryOutputInfo(attr->atttypid,
+									&out_func_oid,
+									&isvarlena);
+		else
+			getTypeOutputInfo(attr->atttypid,
+							  &out_func_oid,
+							  &isvarlena);
+		fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]);
+	}
+
+	/*
+	 * Create a temporary memory context that we can reset once per row to
+	 * recover palloc'd memory.  This avoids any problems with leaks inside
+	 * datatype output routines, and should be faster than retail pfree's
+	 * anyway.  (We don't need a whole econtext as CopyFrom does.)
+	 */
+	cstate->rowcontext = AllocSetContextCreate(CurrentMemoryContext,
+											   "COPY TO",
+											   ALLOCSET_DEFAULT_SIZES);
+
+	if (cstate->opts.binary)
+	{
+		/* Generate header for a binary copy */
+		int32		tmp;
+
+		/* Signature */
+		CopySendData(cstate, BinarySignature, 11);
+		/* Flags field */
+		tmp = 0;
+		CopySendInt32(cstate, tmp);
+		/* No header extension */
+		tmp = 0;
+		CopySendInt32(cstate, tmp);
+	}
+	else
+	{
+		/*
+		 * For non-binary copy, we need to convert null_print to file
+		 * encoding, because it will be sent directly with CopySendString.
+		 */
+		if (cstate->need_transcoding)
+			cstate->opts.null_print_client = pg_server_to_any(cstate->opts.null_print,
+															  cstate->opts.null_print_len,
+															  cstate->file_encoding);
+
+		/* if a header has been requested send the line */
+		if (cstate->opts.header_line)
+		{
+			bool		hdr_delim = false;
+
+			foreach(cur, cstate->attnumlist)
+			{
+				int			attnum = lfirst_int(cur);
+				char	   *colname;
+
+				if (hdr_delim)
+					CopySendChar(cstate, cstate->opts.delim[0]);
+				hdr_delim = true;
+
+				colname = NameStr(TupleDescAttr(tupDesc, attnum - 1)->attname);
+
+				if (cstate->opts.csv_mode)
+					CopyAttributeOutCSV(cstate, colname, false,
+										list_length(cstate->attnumlist) == 1);
+				else
+					CopyAttributeOutText(cstate, colname);
+			}
+
+			CopySendEndOfRow(cstate);
+		}
+	}
+
+	if (cstate->rel)
+	{
+		TupleTableSlot *slot;
+		TableScanDesc scandesc;
+
+		scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
+		slot = table_slot_create(cstate->rel, NULL);
+
+		processed = 0;
+		while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
+		{
+			CHECK_FOR_INTERRUPTS();
+
+			/* Deconstruct the tuple ... */
+			slot_getallattrs(slot);
+
+			/* Format and send the data */
+			CopyOneRowTo(cstate, slot);
+
+			/*
+			 * Increment the number of processed tuples, and report the
+			 * progress.
+			 */
+			pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
+										 ++processed);
+		}
+
+		ExecDropSingleTupleTableSlot(slot);
+		table_endscan(scandesc);
+	}
+	else
+	{
+		/* run the plan --- the dest receiver will send tuples */
+		ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L, true);
+		processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
+	}
+
+	if (cstate->opts.binary)
+	{
+		/* Generate trailer for a binary copy */
+		CopySendInt16(cstate, -1);
+		/* Need to flush out the trailer */
+		CopySendEndOfRow(cstate);
+	}
+
+	MemoryContextDelete(cstate->rowcontext);
+
+	if (fe_copy)
+		SendCopyEnd(cstate);
+
+	return processed;
+}
+
+/*
+ * Emit one row during DoCopyTo().
+ */
+static void
+CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
+{
+	bool		need_delim = false;
+	FmgrInfo   *out_functions = cstate->out_functions;
+	MemoryContext oldcontext;
+	ListCell   *cur;
+	char	   *string;
+
+	MemoryContextReset(cstate->rowcontext);
+	oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
+
+	if (cstate->opts.binary)
+	{
+		/* Binary per-tuple header */
+		CopySendInt16(cstate, list_length(cstate->attnumlist));
+	}
+
+	/* Make sure the tuple is fully deconstructed */
+	slot_getallattrs(slot);
+
+	foreach(cur, cstate->attnumlist)
+	{
+		int			attnum = lfirst_int(cur);
+		Datum		value = slot->tts_values[attnum - 1];
+		bool		isnull = slot->tts_isnull[attnum - 1];
+
+		if (!cstate->opts.binary)
+		{
+			if (need_delim)
+				CopySendChar(cstate, cstate->opts.delim[0]);
+			need_delim = true;
+		}
+
+		if (isnull)
+		{
+			if (!cstate->opts.binary)
+				CopySendString(cstate, cstate->opts.null_print_client);
+			else
+				CopySendInt32(cstate, -1);
+		}
+		else
+		{
+			if (!cstate->opts.binary)
+			{
+				string = OutputFunctionCall(&out_functions[attnum - 1],
+											value);
+				if (cstate->opts.csv_mode)
+					CopyAttributeOutCSV(cstate, string,
+										cstate->opts.force_quote_flags[attnum - 1],
+										list_length(cstate->attnumlist) == 1);
+				else
+					CopyAttributeOutText(cstate, string);
+			}
+			else
+			{
+				bytea	   *outputbytes;
+
+				outputbytes = SendFunctionCall(&out_functions[attnum - 1],
+											   value);
+				CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
+				CopySendData(cstate, VARDATA(outputbytes),
+							 VARSIZE(outputbytes) - VARHDRSZ);
+			}
+		}
+	}
+
+	CopySendEndOfRow(cstate);
+
+	MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Send text representation of one attribute, with conversion and escaping
+ */
+#define DUMPSOFAR() \
+	do { \
+		if (ptr > start) \
+			CopySendData(cstate, start, ptr - start); \
+	} while (0)
+
+static void
+CopyAttributeOutText(CopyToState cstate, const char *string)
+{
+	const char *ptr;
+	const char *start;
+	char		c;
+	char		delimc = cstate->opts.delim[0];
+
+	if (cstate->need_transcoding)
+		ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
+	else
+		ptr = string;
+
+	/*
+	 * We have to grovel through the string searching for control characters
+	 * and instances of the delimiter character.  In most cases, though, these
+	 * are infrequent.  To avoid overhead from calling CopySendData once per
+	 * character, we dump out all characters between escaped characters in a
+	 * single call.  The loop invariant is that the data from "start" to "ptr"
+	 * can be sent literally, but hasn't yet been.
+	 *
+	 * We can skip pg_encoding_mblen() overhead when encoding is safe, because
+	 * in valid backend encodings, extra bytes of a multibyte character never
+	 * look like ASCII.  This loop is sufficiently performance-critical that
+	 * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
+	 * of the normal safe-encoding path.
+	 */
+	if (cstate->encoding_embeds_ascii)
+	{
+		start = ptr;
+		while ((c = *ptr) != '\0')
+		{
+			if ((unsigned char) c < (unsigned char) 0x20)
+			{
+				/*
+				 * \r and \n must be escaped, the others are traditional. We
+				 * prefer to dump these using the C-like notation, rather than
+				 * a backslash and the literal character, because it makes the
+				 * dump file a bit more proof against Microsoftish data
+				 * mangling.
+				 */
+				switch (c)
+				{
+					case '\b':
+						c = 'b';
+						break;
+					case '\f':
+						c = 'f';
+						break;
+					case '\n':
+						c = 'n';
+						break;
+					case '\r':
+						c = 'r';
+						break;
+					case '\t':
+						c = 't';
+						break;
+					case '\v':
+						c = 'v';
+						break;
+					default:
+						/* If it's the delimiter, must backslash it */
+						if (c == delimc)
+							break;
+						/* All ASCII control chars are length 1 */
+						ptr++;
+						continue;	/* fall to end of loop */
+				}
+				/* if we get here, we need to convert the control char */
+				DUMPSOFAR();
+				CopySendChar(cstate, '\\');
+				CopySendChar(cstate, c);
+				start = ++ptr;	/* do not include char in next run */
+			}
+			else if (c == '\\' || c == delimc)
+			{
+				DUMPSOFAR();
+				CopySendChar(cstate, '\\');
+				start = ptr++;	/* we include char in next run */
+			}
+			else if (IS_HIGHBIT_SET(c))
+				ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
+			else
+				ptr++;
+		}
+	}
+	else
+	{
+		start = ptr;
+		while ((c = *ptr) != '\0')
+		{
+			if ((unsigned char) c < (unsigned char) 0x20)
+			{
+				/*
+				 * \r and \n must be escaped, the others are traditional. We
+				 * prefer to dump these using the C-like notation, rather than
+				 * a backslash and the literal character, because it makes the
+				 * dump file a bit more proof against Microsoftish data
+				 * mangling.
+				 */
+				switch (c)
+				{
+					case '\b':
+						c = 'b';
+						break;
+					case '\f':
+						c = 'f';
+						break;
+					case '\n':
+						c = 'n';
+						break;
+					case '\r':
+						c = 'r';
+						break;
+					case '\t':
+						c = 't';
+						break;
+					case '\v':
+						c = 'v';
+						break;
+					default:
+						/* If it's the delimiter, must backslash it */
+						if (c == delimc)
+							break;
+						/* All ASCII control chars are length 1 */
+						ptr++;
+						continue;	/* fall to end of loop */
+				}
+				/* if we get here, we need to convert the control char */
+				DUMPSOFAR();
+				CopySendChar(cstate, '\\');
+				CopySendChar(cstate, c);
+				start = ++ptr;	/* do not include char in next run */
+			}
+			else if (c == '\\' || c == delimc)
+			{
+				DUMPSOFAR();
+				CopySendChar(cstate, '\\');
+				start = ptr++;	/* we include char in next run */
+			}
+			else
+				ptr++;
+		}
+	}
+
+	DUMPSOFAR();
+}
+
+/*
+ * Send text representation of one attribute, with conversion and
+ * CSV-style escaping
+ */
+static void
+CopyAttributeOutCSV(CopyToState cstate, const char *string,
+					bool use_quote, bool single_attr)
+{
+	const char *ptr;
+	const char *start;
+	char		c;
+	char		delimc = cstate->opts.delim[0];
+	char		quotec = cstate->opts.quote[0];
+	char		escapec = cstate->opts.escape[0];
+
+	/* force quoting if it matches null_print (before conversion!) */
+	if (!use_quote && strcmp(string, cstate->opts.null_print) == 0)
+		use_quote = true;
+
+	if (cstate->need_transcoding)
+		ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
+	else
+		ptr = string;
+
+	/*
+	 * Make a preliminary pass to discover if it needs quoting
+	 */
+	if (!use_quote)
+	{
+		/*
+		 * Because '\.' can be a data value, quote it if it appears alone on a
+		 * line so it is not interpreted as the end-of-data marker.
+		 */
+		if (single_attr && strcmp(ptr, "\\.") == 0)
+			use_quote = true;
+		else
+		{
+			const char *tptr = ptr;
+
+			while ((c = *tptr) != '\0')
+			{
+				if (c == delimc || c == quotec || c == '\n' || c == '\r')
+				{
+					use_quote = true;
+					break;
+				}
+				if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
+					tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
+				else
+					tptr++;
+			}
+		}
+	}
+
+	if (use_quote)
+	{
+		CopySendChar(cstate, quotec);
+
+		/*
+		 * We adopt the same optimization strategy as in CopyAttributeOutText
+		 */
+		start = ptr;
+		while ((c = *ptr) != '\0')
+		{
+			if (c == quotec || c == escapec)
+			{
+				DUMPSOFAR();
+				CopySendChar(cstate, escapec);
+				start = ptr;	/* we include char in next run */
+			}
+			if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
+				ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
+			else
+				ptr++;
+		}
+		DUMPSOFAR();
+
+		CopySendChar(cstate, quotec);
+	}
+	else
+	{
+		/* If it doesn't need quoting, we can just dump it as-is */
+		CopySendString(cstate, ptr);
+	}
+}
+
+/*
+ * copy_dest_startup --- executor startup
+ */
+static void
+copy_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
+{
+	/* no-op */
+}
+
+/*
+ * copy_dest_receive --- receive one tuple
+ */
+static bool
+copy_dest_receive(TupleTableSlot *slot, DestReceiver *self)
+{
+	DR_copy    *myState = (DR_copy *) self;
+	CopyToState cstate = myState->cstate;
+
+	/* Send the data */
+	CopyOneRowTo(cstate, slot);
+
+	/* Increment the number of processed tuples, and report the progress */
+	pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
+								 ++myState->processed);
+
+	return true;
+}
+
+/*
+ * copy_dest_shutdown --- executor end
+ */
+static void
+copy_dest_shutdown(DestReceiver *self)
+{
+	/* no-op */
+}
+
+/*
+ * copy_dest_destroy --- release DestReceiver object
+ */
+static void
+copy_dest_destroy(DestReceiver *self)
+{
+	pfree(self);
+}
+
+/*
+ * CreateCopyDestReceiver -- create a suitable DestReceiver object
+ */
+DestReceiver *
+CreateCopyDestReceiver(void)
+{
+	DR_copy    *self = (DR_copy *) palloc(sizeof(DR_copy));
+
+	self->pub.receiveSlot = copy_dest_receive;
+	self->pub.rStartup = copy_dest_startup;
+	self->pub.rShutdown = copy_dest_shutdown;
+	self->pub.rDestroy = copy_dest_destroy;
+	self->pub.mydest = DestCopyOut;
+
+	self->cstate = NULL;		/* will be set later */
+	self->processed = 0;
+
+	return (DestReceiver *) self;
+}
diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c
new file mode 100644
index 0000000..152c29b
--- /dev/null
+++ b/src/backend/commands/createas.c
@@ -0,0 +1,637 @@
+/*-------------------------------------------------------------------------
+ *
+ * createas.c
+ *	  Execution of CREATE TABLE ... AS, a/k/a SELECT INTO.
+ *	  Since CREATE MATERIALIZED VIEW shares syntax and most behaviors,
+ *	  we implement that here, too.
+ *
+ * We implement this by diverting the query's normal output to a
+ * specialized DestReceiver type.
+ *
+ * Formerly, CTAS was implemented as a variant of SELECT, which led
+ * to assorted legacy behaviors that we still try to preserve, notably that
+ * we must return a tuples-processed count in the QueryCompletion.  (We no
+ * longer do that for CTAS ... WITH NO DATA, however.)
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/createas.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/reloptions.h"
+#include "access/sysattr.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "catalog/namespace.h"
+#include "catalog/toasting.h"
+#include "commands/createas.h"
+#include "commands/matview.h"
+#include "commands/prepare.h"
+#include "commands/tablecmds.h"
+#include "commands/view.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/parse_clause.h"
+#include "rewrite/rewriteHandler.h"
+#include "storage/smgr.h"
+#include "tcop/tcopprot.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/rls.h"
+#include "utils/snapmgr.h"
+
+typedef struct
+{
+	DestReceiver pub;			/* publicly-known function pointers */
+	IntoClause *into;			/* target relation specification */
+	/* These fields are filled by intorel_startup: */
+	Relation	rel;			/* relation to write to */
+	ObjectAddress reladdr;		/* address of rel, for ExecCreateTableAs */
+	CommandId	output_cid;		/* cmin to insert in output tuples */
+	int			ti_options;		/* table_tuple_insert performance options */
+	BulkInsertState bistate;	/* bulk insert state */
+} DR_intorel;
+
+/* utility functions for CTAS definition creation */
+static ObjectAddress create_ctas_internal(List *attrList, IntoClause *into);
+static ObjectAddress create_ctas_nodata(List *tlist, IntoClause *into);
+
+/* DestReceiver routines for collecting data */
+static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
+static bool intorel_receive(TupleTableSlot *slot, DestReceiver *self);
+static void intorel_shutdown(DestReceiver *self);
+static void intorel_destroy(DestReceiver *self);
+
+
+/*
+ * create_ctas_internal
+ *
+ * Internal utility used for the creation of the definition of a relation
+ * created via CREATE TABLE AS or a materialized view.  Caller needs to
+ * provide a list of attributes (ColumnDef nodes).
+ */
+static ObjectAddress
+create_ctas_internal(List *attrList, IntoClause *into)
+{
+	CreateStmt *create = makeNode(CreateStmt);
+	bool		is_matview;
+	char		relkind;
+	Datum		toast_options;
+	static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
+	ObjectAddress intoRelationAddr;
+
+	/* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
+	is_matview = (into->viewQuery != NULL);
+	relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION;
+
+	/*
+	 * Create the target relation by faking up a CREATE TABLE parsetree and
+	 * passing it to DefineRelation.
+	 */
+	create->relation = into->rel;
+	create->tableElts = attrList;
+	create->inhRelations = NIL;
+	create->ofTypename = NULL;
+	create->constraints = NIL;
+	create->options = into->options;
+	create->oncommit = into->onCommit;
+	create->tablespacename = into->tableSpaceName;
+	create->if_not_exists = false;
+	create->accessMethod = into->accessMethod;
+
+	/*
+	 * Create the relation.  (This will error out if there's an existing view,
+	 * so we don't need more code to complain if "replace" is false.)
+	 */
+	intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL);
+
+	/*
+	 * If necessary, create a TOAST table for the target table.  Note that
+	 * NewRelationCreateToastTable ends with CommandCounterIncrement(), so
+	 * that the TOAST table will be visible for insertion.
+	 */
+	CommandCounterIncrement();
+
+	/* parse and validate reloptions for the toast table */
+	toast_options = transformRelOptions((Datum) 0,
+										create->options,
+										"toast",
+										validnsps,
+										true, false);
+
+	(void) heap_reloptions(RELKIND_TOASTVALUE, toast_options, true);
+
+	NewRelationCreateToastTable(intoRelationAddr.objectId, toast_options);
+
+	/* Create the "view" part of a materialized view. */
+	if (is_matview)
+	{
+		/* StoreViewQuery scribbles on tree, so make a copy */
+		Query	   *query = (Query *) copyObject(into->viewQuery);
+
+		StoreViewQuery(intoRelationAddr.objectId, query, false);
+		CommandCounterIncrement();
+	}
+
+	return intoRelationAddr;
+}
+
+
+/*
+ * create_ctas_nodata
+ *
+ * Create CTAS or materialized view when WITH NO DATA is used, starting from
+ * the targetlist of the SELECT or view definition.
+ */
+static ObjectAddress
+create_ctas_nodata(List *tlist, IntoClause *into)
+{
+	List	   *attrList;
+	ListCell   *t,
+			   *lc;
+
+	/*
+	 * Build list of ColumnDefs from non-junk elements of the tlist.  If a
+	 * column name list was specified in CREATE TABLE AS, override the column
+	 * names in the query.  (Too few column names are OK, too many are not.)
+	 */
+	attrList = NIL;
+	lc = list_head(into->colNames);
+	foreach(t, tlist)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(t);
+
+		if (!tle->resjunk)
+		{
+			ColumnDef  *col;
+			char	   *colname;
+
+			if (lc)
+			{
+				colname = strVal(lfirst(lc));
+				lc = lnext(into->colNames, lc);
+			}
+			else
+				colname = tle->resname;
+
+			col = makeColumnDef(colname,
+								exprType((Node *) tle->expr),
+								exprTypmod((Node *) tle->expr),
+								exprCollation((Node *) tle->expr));
+
+			/*
+			 * It's possible that the column is of a collatable type but the
+			 * collation could not be resolved, so double-check.  (We must
+			 * check this here because DefineRelation would adopt the type's
+			 * default collation rather than complaining.)
+			 */
+			if (!OidIsValid(col->collOid) &&
+				type_is_collatable(col->typeName->typeOid))
+				ereport(ERROR,
+						(errcode(ERRCODE_INDETERMINATE_COLLATION),
+						 errmsg("no collation was derived for column \"%s\" with collatable type %s",
+								col->colname,
+								format_type_be(col->typeName->typeOid)),
+						 errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+			attrList = lappend(attrList, col);
+		}
+	}
+
+	if (lc != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("too many column names were specified")));
+
+	/* Create the relation definition using the ColumnDef list */
+	return create_ctas_internal(attrList, into);
+}
+
+
+/*
+ * ExecCreateTableAs -- execute a CREATE TABLE AS command
+ */
+ObjectAddress
+ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt,
+				  ParamListInfo params, QueryEnvironment *queryEnv,
+				  QueryCompletion *qc)
+{
+	Query	   *query = castNode(Query, stmt->query);
+	IntoClause *into = stmt->into;
+	bool		is_matview = (into->viewQuery != NULL);
+	DestReceiver *dest;
+	Oid			save_userid = InvalidOid;
+	int			save_sec_context = 0;
+	int			save_nestlevel = 0;
+	ObjectAddress address;
+	List	   *rewritten;
+	PlannedStmt *plan;
+	QueryDesc  *queryDesc;
+
+	/* Check if the relation exists or not */
+	if (CreateTableAsRelExists(stmt))
+		return InvalidObjectAddress;
+
+	/*
+	 * Create the tuple receiver object and insert info it will need
+	 */
+	dest = CreateIntoRelDestReceiver(into);
+
+	/*
+	 * The contained Query could be a SELECT, or an EXECUTE utility command.
+	 * If the latter, we just pass it off to ExecuteQuery.
+	 */
+	if (query->commandType == CMD_UTILITY &&
+		IsA(query->utilityStmt, ExecuteStmt))
+	{
+		ExecuteStmt *estmt = castNode(ExecuteStmt, query->utilityStmt);
+
+		Assert(!is_matview);	/* excluded by syntax */
+		ExecuteQuery(pstate, estmt, into, params, dest, qc);
+
+		/* get object address that intorel_startup saved for us */
+		address = ((DR_intorel *) dest)->reladdr;
+
+		return address;
+	}
+	Assert(query->commandType == CMD_SELECT);
+
+	/*
+	 * For materialized views, lock down security-restricted operations and
+	 * arrange to make GUC variable changes local to this command.  This is
+	 * not necessary for security, but this keeps the behavior similar to
+	 * REFRESH MATERIALIZED VIEW.  Otherwise, one could create a materialized
+	 * view not possible to refresh.
+	 */
+	if (is_matview)
+	{
+		GetUserIdAndSecContext(&save_userid, &save_sec_context);
+		SetUserIdAndSecContext(save_userid,
+							   save_sec_context | SECURITY_RESTRICTED_OPERATION);
+		save_nestlevel = NewGUCNestLevel();
+	}
+
+	if (into->skipData)
+	{
+		/*
+		 * If WITH NO DATA was specified, do not go through the rewriter,
+		 * planner and executor.  Just define the relation using a code path
+		 * similar to CREATE VIEW.  This avoids dump/restore problems stemming
+		 * from running the planner before all dependencies are set up.
+		 */
+		address = create_ctas_nodata(query->targetList, into);
+	}
+	else
+	{
+		/*
+		 * Parse analysis was done already, but we still have to run the rule
+		 * rewriter.  We do not do AcquireRewriteLocks: we assume the query
+		 * either came straight from the parser, or suitable locks were
+		 * acquired by plancache.c.
+		 */
+		rewritten = QueryRewrite(query);
+
+		/* SELECT should never rewrite to more or less than one SELECT query */
+		if (list_length(rewritten) != 1)
+			elog(ERROR, "unexpected rewrite result for %s",
+				 is_matview ? "CREATE MATERIALIZED VIEW" :
+				 "CREATE TABLE AS SELECT");
+		query = linitial_node(Query, rewritten);
+		Assert(query->commandType == CMD_SELECT);
+
+		/* plan the query */
+		plan = pg_plan_query(query, pstate->p_sourcetext,
+							 CURSOR_OPT_PARALLEL_OK, params);
+
+		/*
+		 * Use a snapshot with an updated command ID to ensure this query sees
+		 * results of any previously executed queries.  (This could only
+		 * matter if the planner executed an allegedly-stable function that
+		 * changed the database contents, but let's do it anyway to be
+		 * parallel to the EXPLAIN code path.)
+		 */
+		PushCopiedSnapshot(GetActiveSnapshot());
+		UpdateActiveSnapshotCommandId();
+
+		/* Create a QueryDesc, redirecting output to our tuple receiver */
+		queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext,
+									GetActiveSnapshot(), InvalidSnapshot,
+									dest, params, queryEnv, 0);
+
+		/* call ExecutorStart to prepare the plan for execution */
+		ExecutorStart(queryDesc, GetIntoRelEFlags(into));
+
+		/* run the plan to completion */
+		ExecutorRun(queryDesc, ForwardScanDirection, 0L, true);
+
+		/* save the rowcount if we're given a qc to fill */
+		if (qc)
+			SetQueryCompletion(qc, CMDTAG_SELECT, queryDesc->estate->es_processed);
+
+		/* get object address that intorel_startup saved for us */
+		address = ((DR_intorel *) dest)->reladdr;
+
+		/* and clean up */
+		ExecutorFinish(queryDesc);
+		ExecutorEnd(queryDesc);
+
+		FreeQueryDesc(queryDesc);
+
+		PopActiveSnapshot();
+	}
+
+	if (is_matview)
+	{
+		/* Roll back any GUC changes */
+		AtEOXact_GUC(false, save_nestlevel);
+
+		/* Restore userid and security context */
+		SetUserIdAndSecContext(save_userid, save_sec_context);
+	}
+
+	return address;
+}
+
+/*
+ * GetIntoRelEFlags --- compute executor flags needed for CREATE TABLE AS
+ *
+ * This is exported because EXPLAIN and PREPARE need it too.  (Note: those
+ * callers still need to deal explicitly with the skipData flag; since they
+ * use different methods for suppressing execution, it doesn't seem worth
+ * trying to encapsulate that part.)
+ */
+int
+GetIntoRelEFlags(IntoClause *intoClause)
+{
+	int			flags = 0;
+
+	if (intoClause->skipData)
+		flags |= EXEC_FLAG_WITH_NO_DATA;
+
+	return flags;
+}
+
+/*
+ * CreateTableAsRelExists --- check existence of relation for CreateTableAsStmt
+ *
+ * Utility wrapper checking if the relation pending for creation in this
+ * CreateTableAsStmt query already exists or not.  Returns true if the
+ * relation exists, otherwise false.
+ */
+bool
+CreateTableAsRelExists(CreateTableAsStmt *ctas)
+{
+	Oid			nspid;
+	Oid			oldrelid;
+	ObjectAddress address;
+	IntoClause *into = ctas->into;
+
+	nspid = RangeVarGetCreationNamespace(into->rel);
+
+	oldrelid = get_relname_relid(into->rel->relname, nspid);
+	if (OidIsValid(oldrelid))
+	{
+		if (!ctas->if_not_exists)
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_TABLE),
+					 errmsg("relation \"%s\" already exists",
+							into->rel->relname)));
+
+		/*
+		 * The relation exists and IF NOT EXISTS has been specified.
+		 *
+		 * If we are in an extension script, insist that the pre-existing
+		 * object be a member of the extension, to avoid security risks.
+		 */
+		ObjectAddressSet(address, RelationRelationId, oldrelid);
+		checkMembershipInCurrentExtension(&address);
+
+		/* OK to skip */
+		ereport(NOTICE,
+				(errcode(ERRCODE_DUPLICATE_TABLE),
+				 errmsg("relation \"%s\" already exists, skipping",
+						into->rel->relname)));
+		return true;
+	}
+
+	/* Relation does not exist, it can be created */
+	return false;
+}
+
+/*
+ * CreateIntoRelDestReceiver -- create a suitable DestReceiver object
+ *
+ * intoClause will be NULL if called from CreateDestReceiver(), in which
+ * case it has to be provided later.  However, it is convenient to allow
+ * self->into to be filled in immediately for other callers.
+ */
+DestReceiver *
+CreateIntoRelDestReceiver(IntoClause *intoClause)
+{
+	DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel));
+
+	self->pub.receiveSlot = intorel_receive;
+	self->pub.rStartup = intorel_startup;
+	self->pub.rShutdown = intorel_shutdown;
+	self->pub.rDestroy = intorel_destroy;
+	self->pub.mydest = DestIntoRel;
+	self->into = intoClause;
+	/* other private fields will be set during intorel_startup */
+
+	return (DestReceiver *) self;
+}
+
+/*
+ * intorel_startup --- executor startup
+ */
+static void
+intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
+{
+	DR_intorel *myState = (DR_intorel *) self;
+	IntoClause *into = myState->into;
+	bool		is_matview;
+	List	   *attrList;
+	ObjectAddress intoRelationAddr;
+	Relation	intoRelationDesc;
+	ListCell   *lc;
+	int			attnum;
+
+	Assert(into != NULL);		/* else somebody forgot to set it */
+
+	/* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
+	is_matview = (into->viewQuery != NULL);
+
+	/*
+	 * Build column definitions using "pre-cooked" type and collation info. If
+	 * a column name list was specified in CREATE TABLE AS, override the
+	 * column names derived from the query.  (Too few column names are OK, too
+	 * many are not.)
+	 */
+	attrList = NIL;
+	lc = list_head(into->colNames);
+	for (attnum = 0; attnum < typeinfo->natts; attnum++)
+	{
+		Form_pg_attribute attribute = TupleDescAttr(typeinfo, attnum);
+		ColumnDef  *col;
+		char	   *colname;
+
+		if (lc)
+		{
+			colname = strVal(lfirst(lc));
+			lc = lnext(into->colNames, lc);
+		}
+		else
+			colname = NameStr(attribute->attname);
+
+		col = makeColumnDef(colname,
+							attribute->atttypid,
+							attribute->atttypmod,
+							attribute->attcollation);
+
+		/*
+		 * It's possible that the column is of a collatable type but the
+		 * collation could not be resolved, so double-check.  (We must check
+		 * this here because DefineRelation would adopt the type's default
+		 * collation rather than complaining.)
+		 */
+		if (!OidIsValid(col->collOid) &&
+			type_is_collatable(col->typeName->typeOid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INDETERMINATE_COLLATION),
+					 errmsg("no collation was derived for column \"%s\" with collatable type %s",
+							col->colname,
+							format_type_be(col->typeName->typeOid)),
+					 errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+		attrList = lappend(attrList, col);
+	}
+
+	if (lc != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("too many column names were specified")));
+
+	/*
+	 * Actually create the target table
+	 */
+	intoRelationAddr = create_ctas_internal(attrList, into);
+
+	/*
+	 * Finally we can open the target table
+	 */
+	intoRelationDesc = table_open(intoRelationAddr.objectId, AccessExclusiveLock);
+
+	/*
+	 * Make sure the constructed table does not have RLS enabled.
+	 *
+	 * check_enable_rls() will ereport(ERROR) itself if the user has requested
+	 * something invalid, and otherwise will return RLS_ENABLED if RLS should
+	 * be enabled here.  We don't actually support that currently, so throw
+	 * our own ereport(ERROR) if that happens.
+	 */
+	if (check_enable_rls(intoRelationAddr.objectId, InvalidOid, false) == RLS_ENABLED)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("policies not yet implemented for this command")));
+
+	/*
+	 * Tentatively mark the target as populated, if it's a matview and we're
+	 * going to fill it; otherwise, no change needed.
+	 */
+	if (is_matview && !into->skipData)
+		SetMatViewPopulatedState(intoRelationDesc, true);
+
+	/*
+	 * Fill private fields of myState for use by later routines
+	 */
+	myState->rel = intoRelationDesc;
+	myState->reladdr = intoRelationAddr;
+	myState->output_cid = GetCurrentCommandId(true);
+	myState->ti_options = TABLE_INSERT_SKIP_FSM;
+
+	/*
+	 * If WITH NO DATA is specified, there is no need to set up the state for
+	 * bulk inserts as there are no tuples to insert.
+	 */
+	if (!into->skipData)
+		myState->bistate = GetBulkInsertState();
+	else
+		myState->bistate = NULL;
+
+	/*
+	 * Valid smgr_targblock implies something already wrote to the relation.
+	 * This may be harmless, but this function hasn't planned for it.
+	 */
+	Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
+}
+
+/*
+ * intorel_receive --- receive one tuple
+ */
+static bool
+intorel_receive(TupleTableSlot *slot, DestReceiver *self)
+{
+	DR_intorel *myState = (DR_intorel *) self;
+
+	/* Nothing to insert if WITH NO DATA is specified. */
+	if (!myState->into->skipData)
+	{
+		/*
+		 * Note that the input slot might not be of the type of the target
+		 * relation. That's supported by table_tuple_insert(), but slightly
+		 * less efficient than inserting with the right slot - but the
+		 * alternative would be to copy into a slot of the right type, which
+		 * would not be cheap either. This also doesn't allow accessing per-AM
+		 * data (say a tuple's xmin), but since we don't do that here...
+		 */
+		table_tuple_insert(myState->rel,
+						   slot,
+						   myState->output_cid,
+						   myState->ti_options,
+						   myState->bistate);
+	}
+
+	/* We know this is a newly created relation, so there are no indexes */
+
+	return true;
+}
+
+/*
+ * intorel_shutdown --- executor end
+ */
+static void
+intorel_shutdown(DestReceiver *self)
+{
+	DR_intorel *myState = (DR_intorel *) self;
+	IntoClause *into = myState->into;
+
+	if (!into->skipData)
+	{
+		FreeBulkInsertState(myState->bistate);
+		table_finish_bulk_insert(myState->rel, myState->ti_options);
+	}
+
+	/* close rel, but keep lock until commit */
+	table_close(myState->rel, NoLock);
+	myState->rel = NULL;
+}
+
+/*
+ * intorel_destroy --- release DestReceiver object
+ */
+static void
+intorel_destroy(DestReceiver *self)
+{
+	pfree(self);
+}
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
new file mode 100644
index 0000000..93f0c73
--- /dev/null
+++ b/src/backend/commands/dbcommands.c
@@ -0,0 +1,3285 @@
+/*-------------------------------------------------------------------------
+ *
+ * dbcommands.c
+ *		Database management commands (create/drop database).
+ *
+ * Note: database creation/destruction commands use exclusive locks on
+ * the database objects (as expressed by LockSharedObject()) to avoid
+ * stepping on each others' toes.  Formerly we used table-level locks
+ * on pg_database, but that's too coarse-grained.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/dbcommands.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/multixact.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "access/xloginsert.h"
+#include "access/xlogrecovery.h"
+#include "access/xlogutils.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_database.h"
+#include "catalog/pg_db_role_setting.h"
+#include "catalog/pg_subscription.h"
+#include "catalog/pg_tablespace.h"
+#include "commands/comment.h"
+#include "commands/dbcommands.h"
+#include "commands/dbcommands_xlog.h"
+#include "commands/defrem.h"
+#include "commands/seclabel.h"
+#include "commands/tablespace.h"
+#include "common/file_perm.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "postmaster/bgwriter.h"
+#include "replication/slot.h"
+#include "storage/copydir.h"
+#include "storage/fd.h"
+#include "storage/ipc.h"
+#include "storage/lmgr.h"
+#include "storage/md.h"
+#include "storage/procarray.h"
+#include "storage/smgr.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/pg_locale.h"
+#include "utils/relmapper.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+
+/*
+ * Create database strategy.
+ *
+ * CREATEDB_WAL_LOG will copy the database at the block level and WAL log each
+ * copied block.
+ *
+ * CREATEDB_FILE_COPY will simply perform a file system level copy of the
+ * database and log a single record for each tablespace copied. To make this
+ * safe, it also triggers checkpoints before and after the operation.
+ */
+typedef enum CreateDBStrategy
+{
+	CREATEDB_WAL_LOG,
+	CREATEDB_FILE_COPY
+} CreateDBStrategy;
+
+typedef struct
+{
+	Oid			src_dboid;		/* source (template) DB */
+	Oid			dest_dboid;		/* DB we are trying to create */
+	CreateDBStrategy strategy;	/* create db strategy */
+} createdb_failure_params;
+
+typedef struct
+{
+	Oid			dest_dboid;		/* DB we are trying to move */
+	Oid			dest_tsoid;		/* tablespace we are trying to move to */
+} movedb_failure_params;
+
+/*
+ * Information about a relation to be copied when creating a database.
+ */
+typedef struct CreateDBRelInfo
+{
+	RelFileNode rnode;			/* physical relation identifier */
+	Oid			reloid;			/* relation oid */
+	bool		permanent;		/* relation is permanent or unlogged */
+} CreateDBRelInfo;
+
+
+/* non-export function prototypes */
+static void createdb_failure_callback(int code, Datum arg);
+static void movedb(const char *dbname, const char *tblspcname);
+static void movedb_failure_callback(int code, Datum arg);
+static bool get_db_info(const char *name, LOCKMODE lockmode,
+						Oid *dbIdP, Oid *ownerIdP,
+						int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
+						TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP,
+						Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale,
+						char *dbLocProvider,
+						char **dbCollversion);
+static bool have_createdb_privilege(void);
+static void remove_dbtablespaces(Oid db_id);
+static bool check_db_file_conflict(Oid db_id);
+static int	errdetail_busy_db(int notherbackends, int npreparedxacts);
+static void CreateDatabaseUsingWalLog(Oid src_dboid, Oid dboid, Oid src_tsid,
+									  Oid dst_tsid);
+static List *ScanSourceDatabasePgClass(Oid srctbid, Oid srcdbid, char *srcpath);
+static List *ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid,
+										   Oid dbid, char *srcpath,
+										   List *rnodelist, Snapshot snapshot);
+static CreateDBRelInfo *ScanSourceDatabasePgClassTuple(HeapTupleData *tuple,
+													   Oid tbid, Oid dbid,
+													   char *srcpath);
+static void CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid,
+									bool isRedo);
+static void CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dboid, Oid src_tsid,
+										Oid dst_tsid);
+static void recovery_create_dbdir(char *path, bool only_tblspc);
+
+/*
+ * Create a new database using the WAL_LOG strategy.
+ *
+ * Each copied block is separately written to the write-ahead log.
+ */
+static void
+CreateDatabaseUsingWalLog(Oid src_dboid, Oid dst_dboid,
+						  Oid src_tsid, Oid dst_tsid)
+{
+	char	   *srcpath;
+	char	   *dstpath;
+	List	   *rnodelist = NULL;
+	ListCell   *cell;
+	LockRelId	srcrelid;
+	LockRelId	dstrelid;
+	RelFileNode srcrnode;
+	RelFileNode dstrnode;
+	CreateDBRelInfo *relinfo;
+
+	/* Get source and destination database paths. */
+	srcpath = GetDatabasePath(src_dboid, src_tsid);
+	dstpath = GetDatabasePath(dst_dboid, dst_tsid);
+
+	/* Create database directory and write PG_VERSION file. */
+	CreateDirAndVersionFile(dstpath, dst_dboid, dst_tsid, false);
+
+	/* Copy relmap file from source database to the destination database. */
+	RelationMapCopy(dst_dboid, dst_tsid, srcpath, dstpath);
+
+	/* Get list of relfilenodes to copy from the source database. */
+	rnodelist = ScanSourceDatabasePgClass(src_tsid, src_dboid, srcpath);
+	Assert(rnodelist != NIL);
+
+	/*
+	 * Database IDs will be the same for all relations so set them before
+	 * entering the loop.
+	 */
+	srcrelid.dbId = src_dboid;
+	dstrelid.dbId = dst_dboid;
+
+	/* Loop over our list of relfilenodes and copy each one. */
+	foreach(cell, rnodelist)
+	{
+		relinfo = lfirst(cell);
+		srcrnode = relinfo->rnode;
+
+		/*
+		 * If the relation is from the source db's default tablespace then we
+		 * need to create it in the destinations db's default tablespace.
+		 * Otherwise, we need to create in the same tablespace as it is in the
+		 * source database.
+		 */
+		if (srcrnode.spcNode == src_tsid)
+			dstrnode.spcNode = dst_tsid;
+		else
+			dstrnode.spcNode = srcrnode.spcNode;
+
+		dstrnode.dbNode = dst_dboid;
+		dstrnode.relNode = srcrnode.relNode;
+
+		/*
+		 * Acquire locks on source and target relations before copying.
+		 *
+		 * We typically do not read relation data into shared_buffers without
+		 * holding a relation lock. It's unclear what could go wrong if we
+		 * skipped it in this case, because nobody can be modifying either the
+		 * source or destination database at this point, and we have locks on
+		 * both databases, too, but let's take the conservative route.
+		 */
+		dstrelid.relId = srcrelid.relId = relinfo->reloid;
+		LockRelationId(&srcrelid, AccessShareLock);
+		LockRelationId(&dstrelid, AccessShareLock);
+
+		/* Copy relation storage from source to the destination. */
+		CreateAndCopyRelationData(srcrnode, dstrnode, relinfo->permanent);
+
+		/* Release the relation locks. */
+		UnlockRelationId(&srcrelid, AccessShareLock);
+		UnlockRelationId(&dstrelid, AccessShareLock);
+	}
+
+	pfree(srcpath);
+	pfree(dstpath);
+	list_free_deep(rnodelist);
+}
+
+/*
+ * Scan the pg_class table in the source database to identify the relations
+ * that need to be copied to the destination database.
+ *
+ * This is an exception to the usual rule that cross-database access is
+ * not possible. We can make it work here because we know that there are no
+ * connections to the source database and (since there can't be prepared
+ * transactions touching that database) no in-doubt tuples either. This
+ * means that we don't need to worry about pruning removing anything from
+ * under us, and we don't need to be too picky about our snapshot either.
+ * As long as it sees all previously-committed XIDs as committed and all
+ * aborted XIDs as aborted, we should be fine: nothing else is possible
+ * here.
+ *
+ * We can't rely on the relcache for anything here, because that only knows
+ * about the database to which we are connected, and can't handle access to
+ * other databases. That also means we can't rely on the heap scan
+ * infrastructure, which would be a bad idea anyway since it might try
+ * to do things like HOT pruning which we definitely can't do safely in
+ * a database to which we're not even connected.
+ */
+static List *
+ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath)
+{
+	RelFileNode rnode;
+	BlockNumber nblocks;
+	BlockNumber blkno;
+	Buffer		buf;
+	Oid			relfilenode;
+	Page		page;
+	List	   *rnodelist = NIL;
+	LockRelId	relid;
+	Snapshot	snapshot;
+	SMgrRelation	smgr;
+	BufferAccessStrategy bstrategy;
+
+	/* Get pg_class relfilenode. */
+	relfilenode = RelationMapOidToFilenodeForDatabase(srcpath,
+													  RelationRelationId);
+
+	/* Don't read data into shared_buffers without holding a relation lock. */
+	relid.dbId = dbid;
+	relid.relId = RelationRelationId;
+	LockRelationId(&relid, AccessShareLock);
+
+	/* Prepare a RelFileNode for the pg_class relation. */
+	rnode.spcNode = tbid;
+	rnode.dbNode = dbid;
+	rnode.relNode = relfilenode;
+
+	smgr = smgropen(rnode, InvalidBackendId);
+	nblocks = smgrnblocks(smgr, MAIN_FORKNUM);
+	smgrclose(smgr);
+
+	/* Use a buffer access strategy since this is a bulk read operation. */
+	bstrategy = GetAccessStrategy(BAS_BULKREAD);
+
+	/*
+	 * As explained in the function header comments, we need a snapshot that
+	 * will see all committed transactions as committed, and our transaction
+	 * snapshot - or the active snapshot - might not be new enough for that,
+	 * but the return value of GetLatestSnapshot() should work fine.
+	 */
+	snapshot = GetLatestSnapshot();
+
+	/* Process the relation block by block. */
+	for (blkno = 0; blkno < nblocks; blkno++)
+	{
+		CHECK_FOR_INTERRUPTS();
+
+		buf = ReadBufferWithoutRelcache(rnode, MAIN_FORKNUM, blkno,
+										RBM_NORMAL, bstrategy, true);
+
+		LockBuffer(buf, BUFFER_LOCK_SHARE);
+		page = BufferGetPage(buf);
+		if (PageIsNew(page) || PageIsEmpty(page))
+		{
+			UnlockReleaseBuffer(buf);
+			continue;
+		}
+
+		/* Append relevant pg_class tuples for current page to rnodelist. */
+		rnodelist = ScanSourceDatabasePgClassPage(page, buf, tbid, dbid,
+												  srcpath, rnodelist,
+												  snapshot);
+
+		UnlockReleaseBuffer(buf);
+	}
+
+	/* Release relation lock. */
+	UnlockRelationId(&relid, AccessShareLock);
+
+	return rnodelist;
+}
+
+/*
+ * Scan one page of the source database's pg_class relation and add relevant
+ * entries to rnodelist. The return value is the updated list.
+ */
+static List *
+ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid,
+							  char *srcpath, List *rnodelist,
+							  Snapshot snapshot)
+{
+	BlockNumber blkno = BufferGetBlockNumber(buf);
+	OffsetNumber offnum;
+	OffsetNumber maxoff;
+	HeapTupleData tuple;
+
+	maxoff = PageGetMaxOffsetNumber(page);
+
+	/* Loop over offsets. */
+	for (offnum = FirstOffsetNumber;
+		 offnum <= maxoff;
+		 offnum = OffsetNumberNext(offnum))
+	{
+		ItemId		itemid;
+
+		itemid = PageGetItemId(page, offnum);
+
+		/* Nothing to do if slot is empty or already dead. */
+		if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid) ||
+			ItemIdIsRedirected(itemid))
+			continue;
+
+		Assert(ItemIdIsNormal(itemid));
+		ItemPointerSet(&(tuple.t_self), blkno, offnum);
+
+		/* Initialize a HeapTupleData structure. */
+		tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+		tuple.t_len = ItemIdGetLength(itemid);
+		tuple.t_tableOid = RelationRelationId;
+
+		/* Skip tuples that are not visible to this snapshot. */
+		if (HeapTupleSatisfiesVisibility(&tuple, snapshot, buf))
+		{
+			CreateDBRelInfo *relinfo;
+
+			/*
+			 * ScanSourceDatabasePgClassTuple is in charge of constructing a
+			 * CreateDBRelInfo object for this tuple, but can also decide that
+			 * this tuple isn't something we need to copy. If we do need to
+			 * copy the relation, add it to the list.
+			 */
+			relinfo = ScanSourceDatabasePgClassTuple(&tuple, tbid, dbid,
+													 srcpath);
+			if (relinfo != NULL)
+				rnodelist = lappend(rnodelist, relinfo);
+		}
+	}
+
+	return rnodelist;
+}
+
+/*
+ * Decide whether a certain pg_class tuple represents something that
+ * needs to be copied from the source database to the destination database,
+ * and if so, construct a CreateDBRelInfo for it.
+ *
+ * Visibility checks are handled by the caller, so our job here is just
+ * to assess the data stored in the tuple.
+ */
+CreateDBRelInfo *
+ScanSourceDatabasePgClassTuple(HeapTupleData *tuple, Oid tbid, Oid dbid,
+							   char *srcpath)
+{
+	CreateDBRelInfo *relinfo;
+	Form_pg_class classForm;
+	Oid			relfilenode = InvalidOid;
+
+	classForm = (Form_pg_class) GETSTRUCT(tuple);
+
+	/*
+	 * Return NULL if this object does not need to be copied.
+	 *
+	 * Shared objects don't need to be copied, because they are shared.
+	 * Objects without storage can't be copied, because there's nothing to
+	 * copy. Temporary relations don't need to be copied either, because they
+	 * are inaccessible outside of the session that created them, which must
+	 * be gone already, and couldn't connect to a different database if it
+	 * still existed. autovacuum will eventually remove the pg_class entries
+	 * as well.
+	 */
+	if (classForm->reltablespace == GLOBALTABLESPACE_OID ||
+		!RELKIND_HAS_STORAGE(classForm->relkind) ||
+		classForm->relpersistence == RELPERSISTENCE_TEMP)
+		return NULL;
+
+	/*
+	 * If relfilenode is valid then directly use it.  Otherwise, consult the
+	 * relmap.
+	 */
+	if (OidIsValid(classForm->relfilenode))
+		relfilenode = classForm->relfilenode;
+	else
+		relfilenode = RelationMapOidToFilenodeForDatabase(srcpath,
+														  classForm->oid);
+
+	/* We must have a valid relfilenode oid. */
+	if (!OidIsValid(relfilenode))
+		elog(ERROR, "relation with OID %u does not have a valid relfilenode",
+			 classForm->oid);
+
+	/* Prepare a rel info element and add it to the list. */
+	relinfo = (CreateDBRelInfo *) palloc(sizeof(CreateDBRelInfo));
+	if (OidIsValid(classForm->reltablespace))
+		relinfo->rnode.spcNode = classForm->reltablespace;
+	else
+		relinfo->rnode.spcNode = tbid;
+
+	relinfo->rnode.dbNode = dbid;
+	relinfo->rnode.relNode = relfilenode;
+	relinfo->reloid = classForm->oid;
+
+	/* Temporary relations were rejected above. */
+	Assert(classForm->relpersistence != RELPERSISTENCE_TEMP);
+	relinfo->permanent =
+		(classForm->relpersistence == RELPERSISTENCE_PERMANENT) ? true : false;
+
+	return relinfo;
+}
+
+/*
+ * Create database directory and write out the PG_VERSION file in the database
+ * path.  If isRedo is true, it's okay for the database directory to exist
+ * already.
+ */
+static void
+CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo)
+{
+	int			fd;
+	int			nbytes;
+	char		versionfile[MAXPGPATH];
+	char		buf[16];
+
+	/*
+	 * Prepare version data before starting a critical section.
+	 *
+	 * Note that we don't have to copy this from the source database; there's
+	 * only one legal value.
+	 */
+	sprintf(buf, "%s\n", PG_MAJORVERSION);
+	nbytes = strlen(PG_MAJORVERSION) + 1;
+
+	/* If we are not in WAL replay then write the WAL. */
+	if (!isRedo)
+	{
+		xl_dbase_create_wal_log_rec xlrec;
+		XLogRecPtr	lsn;
+
+		START_CRIT_SECTION();
+
+		xlrec.db_id = dbid;
+		xlrec.tablespace_id = tsid;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) (&xlrec),
+						 sizeof(xl_dbase_create_wal_log_rec));
+
+		lsn = XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG);
+
+		/* As always, WAL must hit the disk before the data update does. */
+		XLogFlush(lsn);
+	}
+
+	/* Create database directory. */
+	if (MakePGDirectory(dbpath) < 0)
+	{
+		/* Failure other than already exists or not in WAL replay? */
+		if (errno != EEXIST || !isRedo)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not create directory \"%s\": %m", dbpath)));
+	}
+
+	/*
+	 * Create PG_VERSION file in the database path.  If the file already
+	 * exists and we are in WAL replay then try again to open it in write
+	 * mode.
+	 */
+	snprintf(versionfile, sizeof(versionfile), "%s/%s", dbpath, "PG_VERSION");
+
+	fd = OpenTransientFile(versionfile, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY);
+	if (fd < 0 && errno == EEXIST && isRedo)
+		fd = OpenTransientFile(versionfile, O_WRONLY | O_TRUNC | PG_BINARY);
+
+	if (fd < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not create file \"%s\": %m", versionfile)));
+
+	/* Write PG_MAJORVERSION in the PG_VERSION file. */
+	pgstat_report_wait_start(WAIT_EVENT_VERSION_FILE_WRITE);
+	errno = 0;
+	if ((int) write(fd, buf, nbytes) != nbytes)
+	{
+		/* If write didn't set errno, assume problem is no disk space. */
+		if (errno == 0)
+			errno = ENOSPC;
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not write to file \"%s\": %m", versionfile)));
+	}
+	pgstat_report_wait_end();
+
+	/* Close the version file. */
+	CloseTransientFile(fd);
+
+	/* Critical section done. */
+	if (!isRedo)
+		END_CRIT_SECTION();
+}
+
+/*
+ * Create a new database using the FILE_COPY strategy.
+ *
+ * Copy each tablespace at the filesystem level, and log a single WAL record
+ * for each tablespace copied.  This requires a checkpoint before and after the
+ * copy, which may be expensive, but it does greatly reduce WAL generation
+ * if the copied database is large.
+ */
+static void
+CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid,
+							Oid dst_tsid)
+{
+	TableScanDesc scan;
+	Relation	rel;
+	HeapTuple	tuple;
+
+	/*
+	 * Force a checkpoint before starting the copy. This will force all dirty
+	 * buffers, including those of unlogged tables, out to disk, to ensure
+	 * source database is up-to-date on disk for the copy.
+	 * FlushDatabaseBuffers() would suffice for that, but we also want to
+	 * process any pending unlink requests. Otherwise, if a checkpoint
+	 * happened while we're copying files, a file might be deleted just when
+	 * we're about to copy it, causing the lstat() call in copydir() to fail
+	 * with ENOENT.
+	 */
+	RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE |
+					  CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL);
+
+	/*
+	 * Iterate through all tablespaces of the template database, and copy each
+	 * one to the new database.
+	 */
+	rel = table_open(TableSpaceRelationId, AccessShareLock);
+	scan = table_beginscan_catalog(rel, 0, NULL);
+	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	{
+		Form_pg_tablespace spaceform = (Form_pg_tablespace) GETSTRUCT(tuple);
+		Oid			srctablespace = spaceform->oid;
+		Oid			dsttablespace;
+		char	   *srcpath;
+		char	   *dstpath;
+		struct stat st;
+
+		/* No need to copy global tablespace */
+		if (srctablespace == GLOBALTABLESPACE_OID)
+			continue;
+
+		srcpath = GetDatabasePath(src_dboid, srctablespace);
+
+		if (stat(srcpath, &st) < 0 || !S_ISDIR(st.st_mode) ||
+			directory_is_empty(srcpath))
+		{
+			/* Assume we can ignore it */
+			pfree(srcpath);
+			continue;
+		}
+
+		if (srctablespace == src_tsid)
+			dsttablespace = dst_tsid;
+		else
+			dsttablespace = srctablespace;
+
+		dstpath = GetDatabasePath(dst_dboid, dsttablespace);
+
+		/*
+		 * Copy this subdirectory to the new location
+		 *
+		 * We don't need to copy subdirectories
+		 */
+		copydir(srcpath, dstpath, false);
+
+		/* Record the filesystem change in XLOG */
+		{
+			xl_dbase_create_file_copy_rec xlrec;
+
+			xlrec.db_id = dst_dboid;
+			xlrec.tablespace_id = dsttablespace;
+			xlrec.src_db_id = src_dboid;
+			xlrec.src_tablespace_id = srctablespace;
+
+			XLogBeginInsert();
+			XLogRegisterData((char *) &xlrec,
+							 sizeof(xl_dbase_create_file_copy_rec));
+
+			(void) XLogInsert(RM_DBASE_ID,
+							  XLOG_DBASE_CREATE_FILE_COPY | XLR_SPECIAL_REL_UPDATE);
+		}
+		pfree(srcpath);
+		pfree(dstpath);
+	}
+	table_endscan(scan);
+	table_close(rel, AccessShareLock);
+
+	/*
+	 * We force a checkpoint before committing.  This effectively means that
+	 * committed XLOG_DBASE_CREATE_FILE_COPY operations will never need to be
+	 * replayed (at least not in ordinary crash recovery; we still have to
+	 * make the XLOG entry for the benefit of PITR operations). This avoids
+	 * two nasty scenarios:
+	 *
+	 * #1: When PITR is off, we don't XLOG the contents of newly created
+	 * indexes; therefore the drop-and-recreate-whole-directory behavior of
+	 * DBASE_CREATE replay would lose such indexes.
+	 *
+	 * #2: Since we have to recopy the source database during DBASE_CREATE
+	 * replay, we run the risk of copying changes in it that were committed
+	 * after the original CREATE DATABASE command but before the system crash
+	 * that led to the replay.  This is at least unexpected and at worst could
+	 * lead to inconsistencies, eg duplicate table names.
+	 *
+	 * (Both of these were real bugs in releases 8.0 through 8.0.3.)
+	 *
+	 * In PITR replay, the first of these isn't an issue, and the second is
+	 * only a risk if the CREATE DATABASE and subsequent template database
+	 * change both occur while a base backup is being taken. There doesn't
+	 * seem to be much we can do about that except document it as a
+	 * limitation.
+	 *
+	 * See CreateDatabaseUsingWalLog() for a less cheesy CREATE DATABASE
+	 * strategy that avoids these problems.
+	 */
+	RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+}
+
+/*
+ * CREATE DATABASE
+ */
+Oid
+createdb(ParseState *pstate, const CreatedbStmt *stmt)
+{
+	Oid			src_dboid;
+	Oid			src_owner;
+	int			src_encoding = -1;
+	char	   *src_collate = NULL;
+	char	   *src_ctype = NULL;
+	char	   *src_iculocale = NULL;
+	char		src_locprovider = '\0';
+	char	   *src_collversion = NULL;
+	bool		src_istemplate;
+	bool		src_allowconn;
+	TransactionId src_frozenxid = InvalidTransactionId;
+	MultiXactId src_minmxid = InvalidMultiXactId;
+	Oid			src_deftablespace;
+	volatile Oid dst_deftablespace;
+	Relation	pg_database_rel;
+	HeapTuple	tuple;
+	Datum		new_record[Natts_pg_database];
+	bool		new_record_nulls[Natts_pg_database];
+	Oid			dboid = InvalidOid;
+	Oid			datdba;
+	ListCell   *option;
+	DefElem    *dtablespacename = NULL;
+	DefElem    *downer = NULL;
+	DefElem    *dtemplate = NULL;
+	DefElem    *dencoding = NULL;
+	DefElem    *dlocale = NULL;
+	DefElem    *dcollate = NULL;
+	DefElem    *dctype = NULL;
+	DefElem    *diculocale = NULL;
+	DefElem    *dlocprovider = NULL;
+	DefElem    *distemplate = NULL;
+	DefElem    *dallowconnections = NULL;
+	DefElem    *dconnlimit = NULL;
+	DefElem    *dcollversion = NULL;
+	DefElem    *dstrategy = NULL;
+	char	   *dbname = stmt->dbname;
+	char	   *dbowner = NULL;
+	const char *dbtemplate = NULL;
+	char	   *dbcollate = NULL;
+	char	   *dbctype = NULL;
+	char	   *dbiculocale = NULL;
+	char		dblocprovider = '\0';
+	char	   *canonname;
+	int			encoding = -1;
+	bool		dbistemplate = false;
+	bool		dballowconnections = true;
+	int			dbconnlimit = DATCONNLIMIT_UNLIMITED;
+	char	   *dbcollversion = NULL;
+	int			notherbackends;
+	int			npreparedxacts;
+	CreateDBStrategy dbstrategy = CREATEDB_WAL_LOG;
+	createdb_failure_params fparms;
+
+	/* Extract options from the statement node tree */
+	foreach(option, stmt->options)
+	{
+		DefElem    *defel = (DefElem *) lfirst(option);
+
+		if (strcmp(defel->defname, "tablespace") == 0)
+		{
+			if (dtablespacename)
+				errorConflictingDefElem(defel, pstate);
+			dtablespacename = defel;
+		}
+		else if (strcmp(defel->defname, "owner") == 0)
+		{
+			if (downer)
+				errorConflictingDefElem(defel, pstate);
+			downer = defel;
+		}
+		else if (strcmp(defel->defname, "template") == 0)
+		{
+			if (dtemplate)
+				errorConflictingDefElem(defel, pstate);
+			dtemplate = defel;
+		}
+		else if (strcmp(defel->defname, "encoding") == 0)
+		{
+			if (dencoding)
+				errorConflictingDefElem(defel, pstate);
+			dencoding = defel;
+		}
+		else if (strcmp(defel->defname, "locale") == 0)
+		{
+			if (dlocale)
+				errorConflictingDefElem(defel, pstate);
+			dlocale = defel;
+		}
+		else if (strcmp(defel->defname, "lc_collate") == 0)
+		{
+			if (dcollate)
+				errorConflictingDefElem(defel, pstate);
+			dcollate = defel;
+		}
+		else if (strcmp(defel->defname, "lc_ctype") == 0)
+		{
+			if (dctype)
+				errorConflictingDefElem(defel, pstate);
+			dctype = defel;
+		}
+		else if (strcmp(defel->defname, "icu_locale") == 0)
+		{
+			if (diculocale)
+				errorConflictingDefElem(defel, pstate);
+			diculocale = defel;
+		}
+		else if (strcmp(defel->defname, "locale_provider") == 0)
+		{
+			if (dlocprovider)
+				errorConflictingDefElem(defel, pstate);
+			dlocprovider = defel;
+		}
+		else if (strcmp(defel->defname, "is_template") == 0)
+		{
+			if (distemplate)
+				errorConflictingDefElem(defel, pstate);
+			distemplate = defel;
+		}
+		else if (strcmp(defel->defname, "allow_connections") == 0)
+		{
+			if (dallowconnections)
+				errorConflictingDefElem(defel, pstate);
+			dallowconnections = defel;
+		}
+		else if (strcmp(defel->defname, "connection_limit") == 0)
+		{
+			if (dconnlimit)
+				errorConflictingDefElem(defel, pstate);
+			dconnlimit = defel;
+		}
+		else if (strcmp(defel->defname, "collation_version") == 0)
+		{
+			if (dcollversion)
+				errorConflictingDefElem(defel, pstate);
+			dcollversion = defel;
+		}
+		else if (strcmp(defel->defname, "location") == 0)
+		{
+			ereport(WARNING,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("LOCATION is not supported anymore"),
+					 errhint("Consider using tablespaces instead."),
+					 parser_errposition(pstate, defel->location)));
+		}
+		else if (strcmp(defel->defname, "oid") == 0)
+		{
+			dboid = defGetObjectId(defel);
+
+			/*
+			 * We don't normally permit new databases to be created with
+			 * system-assigned OIDs. pg_upgrade tries to preserve database
+			 * OIDs, so we can't allow any database to be created with an OID
+			 * that might be in use in a freshly-initialized cluster created
+			 * by some future version. We assume all such OIDs will be from
+			 * the system-managed OID range.
+			 *
+			 * As an exception, however, we permit any OID to be assigned when
+			 * allow_system_table_mods=on (so that initdb can assign system
+			 * OIDs to template0 and postgres) or when performing a binary
+			 * upgrade (so that pg_upgrade can preserve whatever OIDs it finds
+			 * in the source cluster).
+			 */
+			if (dboid < FirstNormalObjectId &&
+				!allowSystemTableMods && !IsBinaryUpgrade)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE)),
+						errmsg("OIDs less than %u are reserved for system objects", FirstNormalObjectId));
+		}
+		else if (strcmp(defel->defname, "strategy") == 0)
+		{
+			if (dstrategy)
+				errorConflictingDefElem(defel, pstate);
+			dstrategy = defel;
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("option \"%s\" not recognized", defel->defname),
+					 parser_errposition(pstate, defel->location)));
+	}
+
+	if (downer && downer->arg)
+		dbowner = defGetString(downer);
+	if (dtemplate && dtemplate->arg)
+		dbtemplate = defGetString(dtemplate);
+	if (dencoding && dencoding->arg)
+	{
+		const char *encoding_name;
+
+		if (IsA(dencoding->arg, Integer))
+		{
+			encoding = defGetInt32(dencoding);
+			encoding_name = pg_encoding_to_char(encoding);
+			if (strcmp(encoding_name, "") == 0 ||
+				pg_valid_server_encoding(encoding_name) < 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_OBJECT),
+						 errmsg("%d is not a valid encoding code",
+								encoding),
+						 parser_errposition(pstate, dencoding->location)));
+		}
+		else
+		{
+			encoding_name = defGetString(dencoding);
+			encoding = pg_valid_server_encoding(encoding_name);
+			if (encoding < 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_OBJECT),
+						 errmsg("%s is not a valid encoding name",
+								encoding_name),
+						 parser_errposition(pstate, dencoding->location)));
+		}
+	}
+	if (dlocale && dlocale->arg)
+	{
+		dbcollate = defGetString(dlocale);
+		dbctype = defGetString(dlocale);
+	}
+	if (dcollate && dcollate->arg)
+		dbcollate = defGetString(dcollate);
+	if (dctype && dctype->arg)
+		dbctype = defGetString(dctype);
+	if (diculocale && diculocale->arg)
+		dbiculocale = defGetString(diculocale);
+	if (dlocprovider && dlocprovider->arg)
+	{
+		char	   *locproviderstr = defGetString(dlocprovider);
+
+		if (pg_strcasecmp(locproviderstr, "icu") == 0)
+			dblocprovider = COLLPROVIDER_ICU;
+		else if (pg_strcasecmp(locproviderstr, "libc") == 0)
+			dblocprovider = COLLPROVIDER_LIBC;
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("unrecognized locale provider: %s",
+							locproviderstr)));
+	}
+	if (distemplate && distemplate->arg)
+		dbistemplate = defGetBoolean(distemplate);
+	if (dallowconnections && dallowconnections->arg)
+		dballowconnections = defGetBoolean(dallowconnections);
+	if (dconnlimit && dconnlimit->arg)
+	{
+		dbconnlimit = defGetInt32(dconnlimit);
+		if (dbconnlimit < DATCONNLIMIT_UNLIMITED)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("invalid connection limit: %d", dbconnlimit)));
+	}
+	if (dcollversion)
+		dbcollversion = defGetString(dcollversion);
+
+	/* obtain OID of proposed owner */
+	if (dbowner)
+		datdba = get_role_oid(dbowner, false);
+	else
+		datdba = GetUserId();
+
+	/*
+	 * To create a database, must have createdb privilege and must be able to
+	 * become the target role (this does not imply that the target role itself
+	 * must have createdb privilege).  The latter provision guards against
+	 * "giveaway" attacks.  Note that a superuser will always have both of
+	 * these privileges a fortiori.
+	 */
+	if (!have_createdb_privilege())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied to create database")));
+
+	check_is_member_of_role(GetUserId(), datdba);
+
+	/*
+	 * Lookup database (template) to be cloned, and obtain share lock on it.
+	 * ShareLock allows two CREATE DATABASEs to work from the same template
+	 * concurrently, while ensuring no one is busy dropping it in parallel
+	 * (which would be Very Bad since we'd likely get an incomplete copy
+	 * without knowing it).  This also prevents any new connections from being
+	 * made to the source until we finish copying it, so we can be sure it
+	 * won't change underneath us.
+	 */
+	if (!dbtemplate)
+		dbtemplate = "template1";	/* Default template database name */
+
+	if (!get_db_info(dbtemplate, ShareLock,
+					 &src_dboid, &src_owner, &src_encoding,
+					 &src_istemplate, &src_allowconn,
+					 &src_frozenxid, &src_minmxid, &src_deftablespace,
+					 &src_collate, &src_ctype, &src_iculocale, &src_locprovider,
+					 &src_collversion))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_DATABASE),
+				 errmsg("template database \"%s\" does not exist",
+						dbtemplate)));
+
+	/*
+	 * If the source database was in the process of being dropped, we can't
+	 * use it as a template.
+	 */
+	if (database_is_invalid_oid(src_dboid))
+		ereport(ERROR,
+				errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				errmsg("cannot use invalid database \"%s\" as template", dbtemplate),
+				errhint("Use DROP DATABASE to drop invalid databases."));
+
+	/*
+	 * Permission check: to copy a DB that's not marked datistemplate, you
+	 * must be superuser or the owner thereof.
+	 */
+	if (!src_istemplate)
+	{
+		if (!pg_database_ownercheck(src_dboid, GetUserId()))
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("permission denied to copy database \"%s\"",
+							dbtemplate)));
+	}
+
+	/* Validate the database creation strategy. */
+	if (dstrategy && dstrategy->arg)
+	{
+		char	   *strategy;
+
+		strategy = defGetString(dstrategy);
+		if (strcmp(strategy, "wal_log") == 0)
+			dbstrategy = CREATEDB_WAL_LOG;
+		else if (strcmp(strategy, "file_copy") == 0)
+			dbstrategy = CREATEDB_FILE_COPY;
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("invalid create database strategy \"%s\"", strategy),
+					 errhint("Valid strategies are \"wal_log\", and \"file_copy\".")));
+	}
+
+	/* If encoding or locales are defaulted, use source's setting */
+	if (encoding < 0)
+		encoding = src_encoding;
+	if (dbcollate == NULL)
+		dbcollate = src_collate;
+	if (dbctype == NULL)
+		dbctype = src_ctype;
+	if (dblocprovider == '\0')
+		dblocprovider = src_locprovider;
+	if (dbiculocale == NULL && dblocprovider == COLLPROVIDER_ICU)
+		dbiculocale = src_iculocale;
+
+	/* Some encodings are client only */
+	if (!PG_VALID_BE_ENCODING(encoding))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("invalid server encoding %d", encoding)));
+
+	/* Check that the chosen locales are valid, and get canonical spellings */
+	if (!check_locale(LC_COLLATE, dbcollate, &canonname))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("invalid locale name: \"%s\"", dbcollate)));
+	dbcollate = canonname;
+	if (!check_locale(LC_CTYPE, dbctype, &canonname))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("invalid locale name: \"%s\"", dbctype)));
+	dbctype = canonname;
+
+	check_encoding_locale_matches(encoding, dbcollate, dbctype);
+
+	if (dblocprovider == COLLPROVIDER_ICU)
+	{
+		if (!(is_encoding_supported_by_icu(encoding)))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("encoding \"%s\" is not supported with ICU provider",
+							pg_encoding_to_char(encoding))));
+
+		/*
+		 * This would happen if template0 uses the libc provider but the new
+		 * database uses icu.
+		 */
+		if (!dbiculocale)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("ICU locale must be specified")));
+
+		check_icu_locale(dbiculocale);
+	}
+	else
+	{
+		if (dbiculocale)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("ICU locale cannot be specified unless locale provider is ICU")));
+	}
+
+	/*
+	 * Check that the new encoding and locale settings match the source
+	 * database.  We insist on this because we simply copy the source data ---
+	 * any non-ASCII data would be wrongly encoded, and any indexes sorted
+	 * according to the source locale would be wrong.
+	 *
+	 * However, we assume that template0 doesn't contain any non-ASCII data
+	 * nor any indexes that depend on collation or ctype, so template0 can be
+	 * used as template for creating a database with any encoding or locale.
+	 */
+	if (strcmp(dbtemplate, "template0") != 0)
+	{
+		if (encoding != src_encoding)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("new encoding (%s) is incompatible with the encoding of the template database (%s)",
+							pg_encoding_to_char(encoding),
+							pg_encoding_to_char(src_encoding)),
+					 errhint("Use the same encoding as in the template database, or use template0 as template.")));
+
+		if (strcmp(dbcollate, src_collate) != 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("new collation (%s) is incompatible with the collation of the template database (%s)",
+							dbcollate, src_collate),
+					 errhint("Use the same collation as in the template database, or use template0 as template.")));
+
+		if (strcmp(dbctype, src_ctype) != 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("new LC_CTYPE (%s) is incompatible with the LC_CTYPE of the template database (%s)",
+							dbctype, src_ctype),
+					 errhint("Use the same LC_CTYPE as in the template database, or use template0 as template.")));
+
+		if (dblocprovider != src_locprovider)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("new locale provider (%s) does not match locale provider of the template database (%s)",
+							collprovider_name(dblocprovider), collprovider_name(src_locprovider)),
+					 errhint("Use the same locale provider as in the template database, or use template0 as template.")));
+
+		if (dblocprovider == COLLPROVIDER_ICU)
+		{
+			Assert(dbiculocale);
+			Assert(src_iculocale);
+			if (strcmp(dbiculocale, src_iculocale) != 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("new ICU locale (%s) is incompatible with the ICU locale of the template database (%s)",
+								dbiculocale, src_iculocale),
+						 errhint("Use the same ICU locale as in the template database, or use template0 as template.")));
+		}
+	}
+
+	/*
+	 * If we got a collation version for the template database, check that it
+	 * matches the actual OS collation version.  Otherwise error; the user
+	 * needs to fix the template database first.  Don't complain if a
+	 * collation version was specified explicitly as a statement option; that
+	 * is used by pg_upgrade to reproduce the old state exactly.
+	 *
+	 * (If the template database has no collation version, then either the
+	 * platform/provider does not support collation versioning, or it's
+	 * template0, for which we stipulate that it does not contain
+	 * collation-using objects.)
+	 */
+	if (src_collversion && !dcollversion)
+	{
+		char	   *actual_versionstr;
+
+		actual_versionstr = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dbiculocale : dbcollate);
+		if (!actual_versionstr)
+			ereport(ERROR,
+					(errmsg("template database \"%s\" has a collation version, but no actual collation version could be determined",
+							dbtemplate)));
+
+		if (strcmp(actual_versionstr, src_collversion) != 0)
+			ereport(ERROR,
+					(errmsg("template database \"%s\" has a collation version mismatch",
+							dbtemplate),
+					 errdetail("The template database was created using collation version %s, "
+							   "but the operating system provides version %s.",
+							   src_collversion, actual_versionstr),
+					 errhint("Rebuild all objects in the template database that use the default collation and run "
+							 "ALTER DATABASE %s REFRESH COLLATION VERSION, "
+							 "or build PostgreSQL with the right library version.",
+							 quote_identifier(dbtemplate))));
+	}
+
+	if (dbcollversion == NULL)
+		dbcollversion = src_collversion;
+
+	/*
+	 * Normally, we copy the collation version from the template database.
+	 * This last resort only applies if the template database does not have a
+	 * collation version, which is normally only the case for template0.
+	 */
+	if (dbcollversion == NULL)
+		dbcollversion = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dbiculocale : dbcollate);
+
+	/* Resolve default tablespace for new database */
+	if (dtablespacename && dtablespacename->arg)
+	{
+		char	   *tablespacename;
+		AclResult	aclresult;
+
+		tablespacename = defGetString(dtablespacename);
+		dst_deftablespace = get_tablespace_oid(tablespacename, false);
+		/* check permissions */
+		aclresult = pg_tablespace_aclcheck(dst_deftablespace, GetUserId(),
+										   ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_TABLESPACE,
+						   tablespacename);
+
+		/* pg_global must never be the default tablespace */
+		if (dst_deftablespace == GLOBALTABLESPACE_OID)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("pg_global cannot be used as default tablespace")));
+
+		/*
+		 * If we are trying to change the default tablespace of the template,
+		 * we require that the template not have any files in the new default
+		 * tablespace.  This is necessary because otherwise the copied
+		 * database would contain pg_class rows that refer to its default
+		 * tablespace both explicitly (by OID) and implicitly (as zero), which
+		 * would cause problems.  For example another CREATE DATABASE using
+		 * the copied database as template, and trying to change its default
+		 * tablespace again, would yield outright incorrect results (it would
+		 * improperly move tables to the new default tablespace that should
+		 * stay in the same tablespace).
+		 */
+		if (dst_deftablespace != src_deftablespace)
+		{
+			char	   *srcpath;
+			struct stat st;
+
+			srcpath = GetDatabasePath(src_dboid, dst_deftablespace);
+
+			if (stat(srcpath, &st) == 0 &&
+				S_ISDIR(st.st_mode) &&
+				!directory_is_empty(srcpath))
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot assign new default tablespace \"%s\"",
+								tablespacename),
+						 errdetail("There is a conflict because database \"%s\" already has some tables in this tablespace.",
+								   dbtemplate)));
+			pfree(srcpath);
+		}
+	}
+	else
+	{
+		/* Use template database's default tablespace */
+		dst_deftablespace = src_deftablespace;
+		/* Note there is no additional permission check in this path */
+	}
+
+	/*
+	 * If built with appropriate switch, whine when regression-testing
+	 * conventions for database names are violated.  But don't complain during
+	 * initdb.
+	 */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+	if (IsUnderPostmaster && strstr(dbname, "regression") == NULL)
+		elog(WARNING, "databases created by regression test cases should have names including \"regression\"");
+#endif
+
+	/*
+	 * Check for db name conflict.  This is just to give a more friendly error
+	 * message than "unique index violation".  There's a race condition but
+	 * we're willing to accept the less friendly message in that case.
+	 */
+	if (OidIsValid(get_database_oid(dbname, true)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_DATABASE),
+				 errmsg("database \"%s\" already exists", dbname)));
+
+	/*
+	 * The source DB can't have any active backends, except this one
+	 * (exception is to allow CREATE DB while connected to template1).
+	 * Otherwise we might copy inconsistent data.
+	 *
+	 * This should be last among the basic error checks, because it involves
+	 * potential waiting; we may as well throw an error first if we're gonna
+	 * throw one.
+	 */
+	if (CountOtherDBBackends(src_dboid, &notherbackends, &npreparedxacts))
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_IN_USE),
+				 errmsg("source database \"%s\" is being accessed by other users",
+						dbtemplate),
+				 errdetail_busy_db(notherbackends, npreparedxacts)));
+
+	/*
+	 * Select an OID for the new database, checking that it doesn't have a
+	 * filename conflict with anything already existing in the tablespace
+	 * directories.
+	 */
+	pg_database_rel = table_open(DatabaseRelationId, RowExclusiveLock);
+
+	/*
+	 * If database OID is configured, check if the OID is already in use or
+	 * data directory already exists.
+	 */
+	if (OidIsValid(dboid))
+	{
+		char	   *existing_dbname = get_database_name(dboid);
+
+		if (existing_dbname != NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE)),
+					errmsg("database OID %u is already in use by database \"%s\"",
+						   dboid, existing_dbname));
+
+		if (check_db_file_conflict(dboid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE)),
+					errmsg("data directory with the specified OID %u already exists", dboid));
+	}
+	else
+	{
+		/* Select an OID for the new database if is not explicitly configured. */
+		do
+		{
+			dboid = GetNewOidWithIndex(pg_database_rel, DatabaseOidIndexId,
+									   Anum_pg_database_oid);
+		} while (check_db_file_conflict(dboid));
+	}
+
+	/*
+	 * Insert a new tuple into pg_database.  This establishes our ownership of
+	 * the new database name (anyone else trying to insert the same name will
+	 * block on the unique index, and fail after we commit).
+	 */
+
+	Assert((dblocprovider == COLLPROVIDER_ICU && dbiculocale) ||
+		   (dblocprovider != COLLPROVIDER_ICU && !dbiculocale));
+
+	/* Form tuple */
+	MemSet(new_record, 0, sizeof(new_record));
+	MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+
+	new_record[Anum_pg_database_oid - 1] = ObjectIdGetDatum(dboid);
+	new_record[Anum_pg_database_datname - 1] =
+		DirectFunctionCall1(namein, CStringGetDatum(dbname));
+	new_record[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(datdba);
+	new_record[Anum_pg_database_encoding - 1] = Int32GetDatum(encoding);
+	new_record[Anum_pg_database_datlocprovider - 1] = CharGetDatum(dblocprovider);
+	new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate);
+	new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(dballowconnections);
+	new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit);
+	new_record[Anum_pg_database_datfrozenxid - 1] = TransactionIdGetDatum(src_frozenxid);
+	new_record[Anum_pg_database_datminmxid - 1] = TransactionIdGetDatum(src_minmxid);
+	new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_deftablespace);
+	new_record[Anum_pg_database_datcollate - 1] = CStringGetTextDatum(dbcollate);
+	new_record[Anum_pg_database_datctype - 1] = CStringGetTextDatum(dbctype);
+	if (dbiculocale)
+		new_record[Anum_pg_database_daticulocale - 1] = CStringGetTextDatum(dbiculocale);
+	else
+		new_record_nulls[Anum_pg_database_daticulocale - 1] = true;
+	if (dbcollversion)
+		new_record[Anum_pg_database_datcollversion - 1] = CStringGetTextDatum(dbcollversion);
+	else
+		new_record_nulls[Anum_pg_database_datcollversion - 1] = true;
+
+	/*
+	 * We deliberately set datacl to default (NULL), rather than copying it
+	 * from the template database.  Copying it would be a bad idea when the
+	 * owner is not the same as the template's owner.
+	 */
+	new_record_nulls[Anum_pg_database_datacl - 1] = true;
+
+	tuple = heap_form_tuple(RelationGetDescr(pg_database_rel),
+							new_record, new_record_nulls);
+
+	CatalogTupleInsert(pg_database_rel, tuple);
+
+	/*
+	 * Now generate additional catalog entries associated with the new DB
+	 */
+
+	/* Register owner dependency */
+	recordDependencyOnOwner(DatabaseRelationId, dboid, datdba);
+
+	/* Create pg_shdepend entries for objects within database */
+	copyTemplateDependencies(src_dboid, dboid);
+
+	/* Post creation hook for new database */
+	InvokeObjectPostCreateHook(DatabaseRelationId, dboid, 0);
+
+	/*
+	 * If we're going to be reading data for the to-be-created database into
+	 * shared_buffers, take a lock on it. Nobody should know that this
+	 * database exists yet, but it's good to maintain the invariant that a
+	 * lock an AccessExclusiveLock on the database is sufficient to drop all
+	 * of its buffers without worrying about more being read later.
+	 *
+	 * Note that we need to do this before entering the
+	 * PG_ENSURE_ERROR_CLEANUP block below, because createdb_failure_callback
+	 * expects this lock to be held already.
+	 */
+	if (dbstrategy == CREATEDB_WAL_LOG)
+		LockSharedObject(DatabaseRelationId, dboid, 0, AccessShareLock);
+
+	/*
+	 * Once we start copying subdirectories, we need to be able to clean 'em
+	 * up if we fail.  Use an ENSURE block to make sure this happens.  (This
+	 * is not a 100% solution, because of the possibility of failure during
+	 * transaction commit after we leave this routine, but it should handle
+	 * most scenarios.)
+	 */
+	fparms.src_dboid = src_dboid;
+	fparms.dest_dboid = dboid;
+	fparms.strategy = dbstrategy;
+
+	PG_ENSURE_ERROR_CLEANUP(createdb_failure_callback,
+							PointerGetDatum(&fparms));
+	{
+		/*
+		 * If the user has asked to create a database with WAL_LOG strategy
+		 * then call CreateDatabaseUsingWalLog, which will copy the database
+		 * at the block level and it will WAL log each copied block.
+		 * Otherwise, call CreateDatabaseUsingFileCopy that will copy the
+		 * database file by file.
+		 */
+		if (dbstrategy == CREATEDB_WAL_LOG)
+			CreateDatabaseUsingWalLog(src_dboid, dboid, src_deftablespace,
+									  dst_deftablespace);
+		else
+			CreateDatabaseUsingFileCopy(src_dboid, dboid, src_deftablespace,
+										dst_deftablespace);
+
+		/*
+		 * Close pg_database, but keep lock till commit.
+		 */
+		table_close(pg_database_rel, NoLock);
+
+		/*
+		 * Force synchronous commit, thus minimizing the window between
+		 * creation of the database files and committal of the transaction. If
+		 * we crash before committing, we'll have a DB that's taking up disk
+		 * space but is not in pg_database, which is not good.
+		 */
+		ForceSyncCommit();
+	}
+	PG_END_ENSURE_ERROR_CLEANUP(createdb_failure_callback,
+								PointerGetDatum(&fparms));
+
+	return dboid;
+}
+
+/*
+ * Check whether chosen encoding matches chosen locale settings.  This
+ * restriction is necessary because libc's locale-specific code usually
+ * fails when presented with data in an encoding it's not expecting. We
+ * allow mismatch in four cases:
+ *
+ * 1. locale encoding = SQL_ASCII, which means that the locale is C/POSIX
+ * which works with any encoding.
+ *
+ * 2. locale encoding = -1, which means that we couldn't determine the
+ * locale's encoding and have to trust the user to get it right.
+ *
+ * 3. selected encoding is UTF8 and platform is win32. This is because
+ * UTF8 is a pseudo codepage that is supported in all locales since it's
+ * converted to UTF16 before being used.
+ *
+ * 4. selected encoding is SQL_ASCII, but only if you're a superuser. This
+ * is risky but we have historically allowed it --- notably, the
+ * regression tests require it.
+ *
+ * Note: if you change this policy, fix initdb to match.
+ */
+void
+check_encoding_locale_matches(int encoding, const char *collate, const char *ctype)
+{
+	int			ctype_encoding = pg_get_encoding_from_locale(ctype, true);
+	int			collate_encoding = pg_get_encoding_from_locale(collate, true);
+
+	if (!(ctype_encoding == encoding ||
+		  ctype_encoding == PG_SQL_ASCII ||
+		  ctype_encoding == -1 ||
+#ifdef WIN32
+		  encoding == PG_UTF8 ||
+#endif
+		  (encoding == PG_SQL_ASCII && superuser())))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("encoding \"%s\" does not match locale \"%s\"",
+						pg_encoding_to_char(encoding),
+						ctype),
+				 errdetail("The chosen LC_CTYPE setting requires encoding \"%s\".",
+						   pg_encoding_to_char(ctype_encoding))));
+
+	if (!(collate_encoding == encoding ||
+		  collate_encoding == PG_SQL_ASCII ||
+		  collate_encoding == -1 ||
+#ifdef WIN32
+		  encoding == PG_UTF8 ||
+#endif
+		  (encoding == PG_SQL_ASCII && superuser())))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("encoding \"%s\" does not match locale \"%s\"",
+						pg_encoding_to_char(encoding),
+						collate),
+				 errdetail("The chosen LC_COLLATE setting requires encoding \"%s\".",
+						   pg_encoding_to_char(collate_encoding))));
+}
+
+/* Error cleanup callback for createdb */
+static void
+createdb_failure_callback(int code, Datum arg)
+{
+	createdb_failure_params *fparms = (createdb_failure_params *) DatumGetPointer(arg);
+
+	/*
+	 * If we were copying database at block levels then drop pages for the
+	 * destination database that are in the shared buffer cache.  And tell
+	 * checkpointer to forget any pending fsync and unlink requests for files
+	 * in the database.  The reasoning behind doing this is same as explained
+	 * in dropdb function.  But unlike dropdb we don't need to call
+	 * pgstat_drop_database because this database is still not created so
+	 * there should not be any stat for this.
+	 */
+	if (fparms->strategy == CREATEDB_WAL_LOG)
+	{
+		DropDatabaseBuffers(fparms->dest_dboid);
+		ForgetDatabaseSyncRequests(fparms->dest_dboid);
+
+		/* Release lock on the target database. */
+		UnlockSharedObject(DatabaseRelationId, fparms->dest_dboid, 0,
+						   AccessShareLock);
+	}
+
+	/*
+	 * Release lock on source database before doing recursive remove. This is
+	 * not essential but it seems desirable to release the lock as soon as
+	 * possible.
+	 */
+	UnlockSharedObject(DatabaseRelationId, fparms->src_dboid, 0, ShareLock);
+
+	/* Throw away any successfully copied subdirectories */
+	remove_dbtablespaces(fparms->dest_dboid);
+}
+
+
+/*
+ * DROP DATABASE
+ */
+void
+dropdb(const char *dbname, bool missing_ok, bool force)
+{
+	Oid			db_id;
+	bool		db_istemplate;
+	Relation	pgdbrel;
+	HeapTuple	tup;
+	Form_pg_database datform;
+	int			notherbackends;
+	int			npreparedxacts;
+	int			nslots,
+				nslots_active;
+	int			nsubscriptions;
+
+	/*
+	 * Look up the target database's OID, and get exclusive lock on it. We
+	 * need this to ensure that no new backend starts up in the target
+	 * database while we are deleting it (see postinit.c), and that no one is
+	 * using it as a CREATE DATABASE template or trying to delete it for
+	 * themselves.
+	 */
+	pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock);
+
+	if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
+					 &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
+	{
+		if (!missing_ok)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_DATABASE),
+					 errmsg("database \"%s\" does not exist", dbname)));
+		}
+		else
+		{
+			/* Close pg_database, release the lock, since we changed nothing */
+			table_close(pgdbrel, RowExclusiveLock);
+			ereport(NOTICE,
+					(errmsg("database \"%s\" does not exist, skipping",
+							dbname)));
+			return;
+		}
+	}
+
+	/*
+	 * Permission checks
+	 */
+	if (!pg_database_ownercheck(db_id, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+					   dbname);
+
+	/* DROP hook for the database being removed */
+	InvokeObjectDropHook(DatabaseRelationId, db_id, 0);
+
+	/*
+	 * Disallow dropping a DB that is marked istemplate.  This is just to
+	 * prevent people from accidentally dropping template0 or template1; they
+	 * can do so if they're really determined ...
+	 */
+	if (db_istemplate)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot drop a template database")));
+
+	/* Obviously can't drop my own database */
+	if (db_id == MyDatabaseId)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_IN_USE),
+				 errmsg("cannot drop the currently open database")));
+
+	/*
+	 * Check whether there are active logical slots that refer to the
+	 * to-be-dropped database. The database lock we are holding prevents the
+	 * creation of new slots using the database or existing slots becoming
+	 * active.
+	 */
+	(void) ReplicationSlotsCountDBSlots(db_id, &nslots, &nslots_active);
+	if (nslots_active)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_IN_USE),
+				 errmsg("database \"%s\" is used by an active logical replication slot",
+						dbname),
+				 errdetail_plural("There is %d active slot.",
+								  "There are %d active slots.",
+								  nslots_active, nslots_active)));
+	}
+
+	/*
+	 * Check if there are subscriptions defined in the target database.
+	 *
+	 * We can't drop them automatically because they might be holding
+	 * resources in other databases/instances.
+	 */
+	if ((nsubscriptions = CountDBSubscriptions(db_id)) > 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_IN_USE),
+				 errmsg("database \"%s\" is being used by logical replication subscription",
+						dbname),
+				 errdetail_plural("There is %d subscription.",
+								  "There are %d subscriptions.",
+								  nsubscriptions, nsubscriptions)));
+
+
+	/*
+	 * Attempt to terminate all existing connections to the target database if
+	 * the user has requested to do so.
+	 */
+	if (force)
+		TerminateOtherDBBackends(db_id);
+
+	/*
+	 * Check for other backends in the target database.  (Because we hold the
+	 * database lock, no new ones can start after this.)
+	 *
+	 * As in CREATE DATABASE, check this after other error conditions.
+	 */
+	if (CountOtherDBBackends(db_id, &notherbackends, &npreparedxacts))
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_IN_USE),
+				 errmsg("database \"%s\" is being accessed by other users",
+						dbname),
+				 errdetail_busy_db(notherbackends, npreparedxacts)));
+
+	/*
+	 * Delete any comments or security labels associated with the database.
+	 */
+	DeleteSharedComments(db_id, DatabaseRelationId);
+	DeleteSharedSecurityLabel(db_id, DatabaseRelationId);
+
+	/*
+	 * Remove settings associated with this database
+	 */
+	DropSetting(db_id, InvalidOid);
+
+	/*
+	 * Remove shared dependency references for the database.
+	 */
+	dropDatabaseDependencies(db_id);
+
+	/*
+	 * Tell the cumulative stats system to forget it immediately, too.
+	 */
+	pgstat_drop_database(db_id);
+
+	tup = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(db_id));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for database %u", db_id);
+	datform = (Form_pg_database) GETSTRUCT(tup);
+
+	/*
+	 * Except for the deletion of the catalog row, subsequent actions are not
+	 * transactional (consider DropDatabaseBuffers() discarding modified
+	 * buffers). But we might crash or get interrupted below. To prevent
+	 * accesses to a database with invalid contents, mark the database as
+	 * invalid using an in-place update.
+	 *
+	 * We need to flush the WAL before continuing, to guarantee the
+	 * modification is durable before performing irreversible filesystem
+	 * operations.
+	 */
+	datform->datconnlimit = DATCONNLIMIT_INVALID_DB;
+	heap_inplace_update(pgdbrel, tup);
+	XLogFlush(XactLastRecEnd);
+
+	/*
+	 * Also delete the tuple - transactionally. If this transaction commits,
+	 * the row will be gone, but if we fail, dropdb() can be invoked again.
+	 */
+	CatalogTupleDelete(pgdbrel, &tup->t_self);
+
+	/*
+	 * Drop db-specific replication slots.
+	 */
+	ReplicationSlotsDropDBSlots(db_id);
+
+	/*
+	 * Drop pages for this database that are in the shared buffer cache. This
+	 * is important to ensure that no remaining backend tries to write out a
+	 * dirty buffer to the dead database later...
+	 */
+	DropDatabaseBuffers(db_id);
+
+	/*
+	 * Tell checkpointer to forget any pending fsync and unlink requests for
+	 * files in the database; else the fsyncs will fail at next checkpoint, or
+	 * worse, it will delete files that belong to a newly created database
+	 * with the same OID.
+	 */
+	ForgetDatabaseSyncRequests(db_id);
+
+	/*
+	 * Force a checkpoint to make sure the checkpointer has received the
+	 * message sent by ForgetDatabaseSyncRequests.
+	 */
+	RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+
+	/* Close all smgr fds in all backends. */
+	WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
+
+	/*
+	 * Remove all tablespace subdirs belonging to the database.
+	 */
+	remove_dbtablespaces(db_id);
+
+	/*
+	 * Close pg_database, but keep lock till commit.
+	 */
+	table_close(pgdbrel, NoLock);
+
+	/*
+	 * Force synchronous commit, thus minimizing the window between removal of
+	 * the database files and committal of the transaction. If we crash before
+	 * committing, we'll have a DB that's gone on disk but still there
+	 * according to pg_database, which is not good.
+	 */
+	ForceSyncCommit();
+}
+
+
+/*
+ * Rename database
+ */
+ObjectAddress
+RenameDatabase(const char *oldname, const char *newname)
+{
+	Oid			db_id;
+	HeapTuple	newtup;
+	Relation	rel;
+	int			notherbackends;
+	int			npreparedxacts;
+	ObjectAddress address;
+
+	/*
+	 * Look up the target database's OID, and get exclusive lock on it. We
+	 * need this for the same reasons as DROP DATABASE.
+	 */
+	rel = table_open(DatabaseRelationId, RowExclusiveLock);
+
+	if (!get_db_info(oldname, AccessExclusiveLock, &db_id, NULL, NULL,
+					 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_DATABASE),
+				 errmsg("database \"%s\" does not exist", oldname)));
+
+	/* must be owner */
+	if (!pg_database_ownercheck(db_id, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+					   oldname);
+
+	/* must have createdb rights */
+	if (!have_createdb_privilege())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied to rename database")));
+
+	/*
+	 * If built with appropriate switch, whine when regression-testing
+	 * conventions for database names are violated.
+	 */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+	if (strstr(newname, "regression") == NULL)
+		elog(WARNING, "databases created by regression test cases should have names including \"regression\"");
+#endif
+
+	/*
+	 * Make sure the new name doesn't exist.  See notes for same error in
+	 * CREATE DATABASE.
+	 */
+	if (OidIsValid(get_database_oid(newname, true)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_DATABASE),
+				 errmsg("database \"%s\" already exists", newname)));
+
+	/*
+	 * XXX Client applications probably store the current database somewhere,
+	 * so renaming it could cause confusion.  On the other hand, there may not
+	 * be an actual problem besides a little confusion, so think about this
+	 * and decide.
+	 */
+	if (db_id == MyDatabaseId)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("current database cannot be renamed")));
+
+	/*
+	 * Make sure the database does not have active sessions.  This is the same
+	 * concern as above, but applied to other sessions.
+	 *
+	 * As in CREATE DATABASE, check this after other error conditions.
+	 */
+	if (CountOtherDBBackends(db_id, &notherbackends, &npreparedxacts))
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_IN_USE),
+				 errmsg("database \"%s\" is being accessed by other users",
+						oldname),
+				 errdetail_busy_db(notherbackends, npreparedxacts)));
+
+	/* rename */
+	newtup = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(db_id));
+	if (!HeapTupleIsValid(newtup))
+		elog(ERROR, "cache lookup failed for database %u", db_id);
+	namestrcpy(&(((Form_pg_database) GETSTRUCT(newtup))->datname), newname);
+	CatalogTupleUpdate(rel, &newtup->t_self, newtup);
+
+	InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0);
+
+	ObjectAddressSet(address, DatabaseRelationId, db_id);
+
+	/*
+	 * Close pg_database, but keep lock till commit.
+	 */
+	table_close(rel, NoLock);
+
+	return address;
+}
+
+
+/*
+ * ALTER DATABASE SET TABLESPACE
+ */
+static void
+movedb(const char *dbname, const char *tblspcname)
+{
+	Oid			db_id;
+	Relation	pgdbrel;
+	int			notherbackends;
+	int			npreparedxacts;
+	HeapTuple	oldtuple,
+				newtuple;
+	Oid			src_tblspcoid,
+				dst_tblspcoid;
+	Datum		new_record[Natts_pg_database];
+	bool		new_record_nulls[Natts_pg_database];
+	bool		new_record_repl[Natts_pg_database];
+	ScanKeyData scankey;
+	SysScanDesc sysscan;
+	AclResult	aclresult;
+	char	   *src_dbpath;
+	char	   *dst_dbpath;
+	DIR		   *dstdir;
+	struct dirent *xlde;
+	movedb_failure_params fparms;
+
+	/*
+	 * Look up the target database's OID, and get exclusive lock on it. We
+	 * need this to ensure that no new backend starts up in the database while
+	 * we are moving it, and that no one is using it as a CREATE DATABASE
+	 * template or trying to delete it.
+	 */
+	pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock);
+
+	if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
+					 NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL, NULL, NULL, NULL))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_DATABASE),
+				 errmsg("database \"%s\" does not exist", dbname)));
+
+	/*
+	 * We actually need a session lock, so that the lock will persist across
+	 * the commit/restart below.  (We could almost get away with letting the
+	 * lock be released at commit, except that someone could try to move
+	 * relations of the DB back into the old directory while we rmtree() it.)
+	 */
+	LockSharedObjectForSession(DatabaseRelationId, db_id, 0,
+							   AccessExclusiveLock);
+
+	/*
+	 * Permission checks
+	 */
+	if (!pg_database_ownercheck(db_id, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+					   dbname);
+
+	/*
+	 * Obviously can't move the tables of my own database
+	 */
+	if (db_id == MyDatabaseId)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_IN_USE),
+				 errmsg("cannot change the tablespace of the currently open database")));
+
+	/*
+	 * Get tablespace's oid
+	 */
+	dst_tblspcoid = get_tablespace_oid(tblspcname, false);
+
+	/*
+	 * Permission checks
+	 */
+	aclresult = pg_tablespace_aclcheck(dst_tblspcoid, GetUserId(),
+									   ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_TABLESPACE,
+					   tblspcname);
+
+	/*
+	 * pg_global must never be the default tablespace
+	 */
+	if (dst_tblspcoid == GLOBALTABLESPACE_OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("pg_global cannot be used as default tablespace")));
+
+	/*
+	 * No-op if same tablespace
+	 */
+	if (src_tblspcoid == dst_tblspcoid)
+	{
+		table_close(pgdbrel, NoLock);
+		UnlockSharedObjectForSession(DatabaseRelationId, db_id, 0,
+									 AccessExclusiveLock);
+		return;
+	}
+
+	/*
+	 * Check for other backends in the target database.  (Because we hold the
+	 * database lock, no new ones can start after this.)
+	 *
+	 * As in CREATE DATABASE, check this after other error conditions.
+	 */
+	if (CountOtherDBBackends(db_id, &notherbackends, &npreparedxacts))
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_IN_USE),
+				 errmsg("database \"%s\" is being accessed by other users",
+						dbname),
+				 errdetail_busy_db(notherbackends, npreparedxacts)));
+
+	/*
+	 * Get old and new database paths
+	 */
+	src_dbpath = GetDatabasePath(db_id, src_tblspcoid);
+	dst_dbpath = GetDatabasePath(db_id, dst_tblspcoid);
+
+	/*
+	 * Force a checkpoint before proceeding. This will force all dirty
+	 * buffers, including those of unlogged tables, out to disk, to ensure
+	 * source database is up-to-date on disk for the copy.
+	 * FlushDatabaseBuffers() would suffice for that, but we also want to
+	 * process any pending unlink requests. Otherwise, the check for existing
+	 * files in the target directory might fail unnecessarily, not to mention
+	 * that the copy might fail due to source files getting deleted under it.
+	 * On Windows, this also ensures that background procs don't hold any open
+	 * files, which would cause rmdir() to fail.
+	 */
+	RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT
+					  | CHECKPOINT_FLUSH_ALL);
+
+	/* Close all smgr fds in all backends. */
+	WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
+
+	/*
+	 * Now drop all buffers holding data of the target database; they should
+	 * no longer be dirty so DropDatabaseBuffers is safe.
+	 *
+	 * It might seem that we could just let these buffers age out of shared
+	 * buffers naturally, since they should not get referenced anymore.  The
+	 * problem with that is that if the user later moves the database back to
+	 * its original tablespace, any still-surviving buffers would appear to
+	 * contain valid data again --- but they'd be missing any changes made in
+	 * the database while it was in the new tablespace.  In any case, freeing
+	 * buffers that should never be used again seems worth the cycles.
+	 *
+	 * Note: it'd be sufficient to get rid of buffers matching db_id and
+	 * src_tblspcoid, but bufmgr.c presently provides no API for that.
+	 */
+	DropDatabaseBuffers(db_id);
+
+	/*
+	 * Check for existence of files in the target directory, i.e., objects of
+	 * this database that are already in the target tablespace.  We can't
+	 * allow the move in such a case, because we would need to change those
+	 * relations' pg_class.reltablespace entries to zero, and we don't have
+	 * access to the DB's pg_class to do so.
+	 */
+	dstdir = AllocateDir(dst_dbpath);
+	if (dstdir != NULL)
+	{
+		while ((xlde = ReadDir(dstdir, dst_dbpath)) != NULL)
+		{
+			if (strcmp(xlde->d_name, ".") == 0 ||
+				strcmp(xlde->d_name, "..") == 0)
+				continue;
+
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("some relations of database \"%s\" are already in tablespace \"%s\"",
+							dbname, tblspcname),
+					 errhint("You must move them back to the database's default tablespace before using this command.")));
+		}
+
+		FreeDir(dstdir);
+
+		/*
+		 * The directory exists but is empty. We must remove it before using
+		 * the copydir function.
+		 */
+		if (rmdir(dst_dbpath) != 0)
+			elog(ERROR, "could not remove directory \"%s\": %m",
+				 dst_dbpath);
+	}
+
+	/*
+	 * Use an ENSURE block to make sure we remove the debris if the copy fails
+	 * (eg, due to out-of-disk-space).  This is not a 100% solution, because
+	 * of the possibility of failure during transaction commit, but it should
+	 * handle most scenarios.
+	 */
+	fparms.dest_dboid = db_id;
+	fparms.dest_tsoid = dst_tblspcoid;
+	PG_ENSURE_ERROR_CLEANUP(movedb_failure_callback,
+							PointerGetDatum(&fparms));
+	{
+		/*
+		 * Copy files from the old tablespace to the new one
+		 */
+		copydir(src_dbpath, dst_dbpath, false);
+
+		/*
+		 * Record the filesystem change in XLOG
+		 */
+		{
+			xl_dbase_create_file_copy_rec xlrec;
+
+			xlrec.db_id = db_id;
+			xlrec.tablespace_id = dst_tblspcoid;
+			xlrec.src_db_id = db_id;
+			xlrec.src_tablespace_id = src_tblspcoid;
+
+			XLogBeginInsert();
+			XLogRegisterData((char *) &xlrec,
+							 sizeof(xl_dbase_create_file_copy_rec));
+
+			(void) XLogInsert(RM_DBASE_ID,
+							  XLOG_DBASE_CREATE_FILE_COPY | XLR_SPECIAL_REL_UPDATE);
+		}
+
+		/*
+		 * Update the database's pg_database tuple
+		 */
+		ScanKeyInit(&scankey,
+					Anum_pg_database_datname,
+					BTEqualStrategyNumber, F_NAMEEQ,
+					CStringGetDatum(dbname));
+		sysscan = systable_beginscan(pgdbrel, DatabaseNameIndexId, true,
+									 NULL, 1, &scankey);
+		oldtuple = systable_getnext(sysscan);
+		if (!HeapTupleIsValid(oldtuple))	/* shouldn't happen... */
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_DATABASE),
+					 errmsg("database \"%s\" does not exist", dbname)));
+
+		MemSet(new_record, 0, sizeof(new_record));
+		MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+		MemSet(new_record_repl, false, sizeof(new_record_repl));
+
+		new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_tblspcoid);
+		new_record_repl[Anum_pg_database_dattablespace - 1] = true;
+
+		newtuple = heap_modify_tuple(oldtuple, RelationGetDescr(pgdbrel),
+									 new_record,
+									 new_record_nulls, new_record_repl);
+		CatalogTupleUpdate(pgdbrel, &oldtuple->t_self, newtuple);
+
+		InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0);
+
+		systable_endscan(sysscan);
+
+		/*
+		 * Force another checkpoint here.  As in CREATE DATABASE, this is to
+		 * ensure that we don't have to replay a committed
+		 * XLOG_DBASE_CREATE_FILE_COPY operation, which would cause us to lose
+		 * any unlogged operations done in the new DB tablespace before the
+		 * next checkpoint.
+		 */
+		RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+
+		/*
+		 * Force synchronous commit, thus minimizing the window between
+		 * copying the database files and committal of the transaction. If we
+		 * crash before committing, we'll leave an orphaned set of files on
+		 * disk, which is not fatal but not good either.
+		 */
+		ForceSyncCommit();
+
+		/*
+		 * Close pg_database, but keep lock till commit.
+		 */
+		table_close(pgdbrel, NoLock);
+	}
+	PG_END_ENSURE_ERROR_CLEANUP(movedb_failure_callback,
+								PointerGetDatum(&fparms));
+
+	/*
+	 * Commit the transaction so that the pg_database update is committed. If
+	 * we crash while removing files, the database won't be corrupt, we'll
+	 * just leave some orphaned files in the old directory.
+	 *
+	 * (This is OK because we know we aren't inside a transaction block.)
+	 *
+	 * XXX would it be safe/better to do this inside the ensure block?	Not
+	 * convinced it's a good idea; consider elog just after the transaction
+	 * really commits.
+	 */
+	PopActiveSnapshot();
+	CommitTransactionCommand();
+
+	/* Start new transaction for the remaining work; don't need a snapshot */
+	StartTransactionCommand();
+
+	/*
+	 * Remove files from the old tablespace
+	 */
+	if (!rmtree(src_dbpath, true))
+		ereport(WARNING,
+				(errmsg("some useless files may be left behind in old database directory \"%s\"",
+						src_dbpath)));
+
+	/*
+	 * Record the filesystem change in XLOG
+	 */
+	{
+		xl_dbase_drop_rec xlrec;
+
+		xlrec.db_id = db_id;
+		xlrec.ntablespaces = 1;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec));
+		XLogRegisterData((char *) &src_tblspcoid, sizeof(Oid));
+
+		(void) XLogInsert(RM_DBASE_ID,
+						  XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE);
+	}
+
+	/* Now it's safe to release the database lock */
+	UnlockSharedObjectForSession(DatabaseRelationId, db_id, 0,
+								 AccessExclusiveLock);
+
+	pfree(src_dbpath);
+	pfree(dst_dbpath);
+}
+
+/* Error cleanup callback for movedb */
+static void
+movedb_failure_callback(int code, Datum arg)
+{
+	movedb_failure_params *fparms = (movedb_failure_params *) DatumGetPointer(arg);
+	char	   *dstpath;
+
+	/* Get rid of anything we managed to copy to the target directory */
+	dstpath = GetDatabasePath(fparms->dest_dboid, fparms->dest_tsoid);
+
+	(void) rmtree(dstpath, true);
+
+	pfree(dstpath);
+}
+
+/*
+ * Process options and call dropdb function.
+ */
+void
+DropDatabase(ParseState *pstate, DropdbStmt *stmt)
+{
+	bool		force = false;
+	ListCell   *lc;
+
+	foreach(lc, stmt->options)
+	{
+		DefElem    *opt = (DefElem *) lfirst(lc);
+
+		if (strcmp(opt->defname, "force") == 0)
+			force = true;
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized DROP DATABASE option \"%s\"", opt->defname),
+					 parser_errposition(pstate, opt->location)));
+	}
+
+	dropdb(stmt->dbname, stmt->missing_ok, force);
+}
+
+/*
+ * ALTER DATABASE name ...
+ */
+Oid
+AlterDatabase(ParseState *pstate, AlterDatabaseStmt *stmt, bool isTopLevel)
+{
+	Relation	rel;
+	Oid			dboid;
+	HeapTuple	tuple,
+				newtuple;
+	Form_pg_database datform;
+	ScanKeyData scankey;
+	SysScanDesc scan;
+	ListCell   *option;
+	bool		dbistemplate = false;
+	bool		dballowconnections = true;
+	int			dbconnlimit = DATCONNLIMIT_UNLIMITED;
+	DefElem    *distemplate = NULL;
+	DefElem    *dallowconnections = NULL;
+	DefElem    *dconnlimit = NULL;
+	DefElem    *dtablespace = NULL;
+	Datum		new_record[Natts_pg_database];
+	bool		new_record_nulls[Natts_pg_database];
+	bool		new_record_repl[Natts_pg_database];
+
+	/* Extract options from the statement node tree */
+	foreach(option, stmt->options)
+	{
+		DefElem    *defel = (DefElem *) lfirst(option);
+
+		if (strcmp(defel->defname, "is_template") == 0)
+		{
+			if (distemplate)
+				errorConflictingDefElem(defel, pstate);
+			distemplate = defel;
+		}
+		else if (strcmp(defel->defname, "allow_connections") == 0)
+		{
+			if (dallowconnections)
+				errorConflictingDefElem(defel, pstate);
+			dallowconnections = defel;
+		}
+		else if (strcmp(defel->defname, "connection_limit") == 0)
+		{
+			if (dconnlimit)
+				errorConflictingDefElem(defel, pstate);
+			dconnlimit = defel;
+		}
+		else if (strcmp(defel->defname, "tablespace") == 0)
+		{
+			if (dtablespace)
+				errorConflictingDefElem(defel, pstate);
+			dtablespace = defel;
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("option \"%s\" not recognized", defel->defname),
+					 parser_errposition(pstate, defel->location)));
+	}
+
+	if (dtablespace)
+	{
+		/*
+		 * While the SET TABLESPACE syntax doesn't allow any other options,
+		 * somebody could write "WITH TABLESPACE ...".  Forbid any other
+		 * options from being specified in that case.
+		 */
+		if (list_length(stmt->options) != 1)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("option \"%s\" cannot be specified with other options",
+							dtablespace->defname),
+					 parser_errposition(pstate, dtablespace->location)));
+		/* this case isn't allowed within a transaction block */
+		PreventInTransactionBlock(isTopLevel, "ALTER DATABASE SET TABLESPACE");
+		movedb(stmt->dbname, defGetString(dtablespace));
+		return InvalidOid;
+	}
+
+	if (distemplate && distemplate->arg)
+		dbistemplate = defGetBoolean(distemplate);
+	if (dallowconnections && dallowconnections->arg)
+		dballowconnections = defGetBoolean(dallowconnections);
+	if (dconnlimit && dconnlimit->arg)
+	{
+		dbconnlimit = defGetInt32(dconnlimit);
+		if (dbconnlimit < DATCONNLIMIT_UNLIMITED)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("invalid connection limit: %d", dbconnlimit)));
+	}
+
+	/*
+	 * Get the old tuple.  We don't need a lock on the database per se,
+	 * because we're not going to do anything that would mess up incoming
+	 * connections.
+	 */
+	rel = table_open(DatabaseRelationId, RowExclusiveLock);
+	ScanKeyInit(&scankey,
+				Anum_pg_database_datname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(stmt->dbname));
+	scan = systable_beginscan(rel, DatabaseNameIndexId, true,
+							  NULL, 1, &scankey);
+	tuple = systable_getnext(scan);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_DATABASE),
+				 errmsg("database \"%s\" does not exist", stmt->dbname)));
+
+	datform = (Form_pg_database) GETSTRUCT(tuple);
+	dboid = datform->oid;
+
+	if (database_is_invalid_form(datform))
+	{
+		ereport(FATAL,
+				errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				errmsg("cannot alter invalid database \"%s\"", stmt->dbname),
+				errhint("Use DROP DATABASE to drop invalid databases."));
+	}
+
+	if (!pg_database_ownercheck(dboid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+					   stmt->dbname);
+
+	/*
+	 * In order to avoid getting locked out and having to go through
+	 * standalone mode, we refuse to disallow connections to the database
+	 * we're currently connected to.  Lockout can still happen with concurrent
+	 * sessions but the likeliness of that is not high enough to worry about.
+	 */
+	if (!dballowconnections && dboid == MyDatabaseId)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("cannot disallow connections for current database")));
+
+	/*
+	 * Build an updated tuple, perusing the information just obtained
+	 */
+	MemSet(new_record, 0, sizeof(new_record));
+	MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+	MemSet(new_record_repl, false, sizeof(new_record_repl));
+
+	if (distemplate)
+	{
+		new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate);
+		new_record_repl[Anum_pg_database_datistemplate - 1] = true;
+	}
+	if (dallowconnections)
+	{
+		new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(dballowconnections);
+		new_record_repl[Anum_pg_database_datallowconn - 1] = true;
+	}
+	if (dconnlimit)
+	{
+		new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit);
+		new_record_repl[Anum_pg_database_datconnlimit - 1] = true;
+	}
+
+	newtuple = heap_modify_tuple(tuple, RelationGetDescr(rel), new_record,
+								 new_record_nulls, new_record_repl);
+	CatalogTupleUpdate(rel, &tuple->t_self, newtuple);
+
+	InvokeObjectPostAlterHook(DatabaseRelationId, dboid, 0);
+
+	systable_endscan(scan);
+
+	/* Close pg_database, but keep lock till commit */
+	table_close(rel, NoLock);
+
+	return dboid;
+}
+
+
+/*
+ * ALTER DATABASE name REFRESH COLLATION VERSION
+ */
+ObjectAddress
+AlterDatabaseRefreshColl(AlterDatabaseRefreshCollStmt *stmt)
+{
+	Relation	rel;
+	ScanKeyData scankey;
+	SysScanDesc scan;
+	Oid			db_id;
+	HeapTuple	tuple;
+	Form_pg_database datForm;
+	ObjectAddress address;
+	Datum		datum;
+	bool		isnull;
+	char	   *oldversion;
+	char	   *newversion;
+
+	rel = table_open(DatabaseRelationId, RowExclusiveLock);
+	ScanKeyInit(&scankey,
+				Anum_pg_database_datname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(stmt->dbname));
+	scan = systable_beginscan(rel, DatabaseNameIndexId, true,
+							  NULL, 1, &scankey);
+	tuple = systable_getnext(scan);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_DATABASE),
+				 errmsg("database \"%s\" does not exist", stmt->dbname)));
+
+	datForm = (Form_pg_database) GETSTRUCT(tuple);
+	db_id = datForm->oid;
+
+	if (!pg_database_ownercheck(db_id, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+					   stmt->dbname);
+
+	datum = heap_getattr(tuple, Anum_pg_database_datcollversion, RelationGetDescr(rel), &isnull);
+	oldversion = isnull ? NULL : TextDatumGetCString(datum);
+
+	datum = heap_getattr(tuple, datForm->datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_daticulocale : Anum_pg_database_datcollate, RelationGetDescr(rel), &isnull);
+	if (isnull)
+		elog(ERROR, "unexpected null in pg_database");
+	newversion = get_collation_actual_version(datForm->datlocprovider, TextDatumGetCString(datum));
+
+	/* cannot change from NULL to non-NULL or vice versa */
+	if ((!oldversion && newversion) || (oldversion && !newversion))
+		elog(ERROR, "invalid collation version change");
+	else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
+	{
+		bool		nulls[Natts_pg_database] = {0};
+		bool		replaces[Natts_pg_database] = {0};
+		Datum		values[Natts_pg_database] = {0};
+
+		ereport(NOTICE,
+				(errmsg("changing version from %s to %s",
+						oldversion, newversion)));
+
+		values[Anum_pg_database_datcollversion - 1] = CStringGetTextDatum(newversion);
+		replaces[Anum_pg_database_datcollversion - 1] = true;
+
+		tuple = heap_modify_tuple(tuple, RelationGetDescr(rel),
+								  values, nulls, replaces);
+		CatalogTupleUpdate(rel, &tuple->t_self, tuple);
+		heap_freetuple(tuple);
+	}
+	else
+		ereport(NOTICE,
+				(errmsg("version has not changed")));
+
+	InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0);
+
+	ObjectAddressSet(address, DatabaseRelationId, db_id);
+
+	systable_endscan(scan);
+
+	table_close(rel, NoLock);
+
+	return address;
+}
+
+
+/*
+ * ALTER DATABASE name SET ...
+ */
+Oid
+AlterDatabaseSet(AlterDatabaseSetStmt *stmt)
+{
+	Oid			datid = get_database_oid(stmt->dbname, false);
+
+	/*
+	 * Obtain a lock on the database and make sure it didn't go away in the
+	 * meantime.
+	 */
+	shdepLockAndCheckObject(DatabaseRelationId, datid);
+
+	if (!pg_database_ownercheck(datid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+					   stmt->dbname);
+
+	AlterSetting(datid, InvalidOid, stmt->setstmt);
+
+	UnlockSharedObject(DatabaseRelationId, datid, 0, AccessShareLock);
+
+	return datid;
+}
+
+
+/*
+ * ALTER DATABASE name OWNER TO newowner
+ */
+ObjectAddress
+AlterDatabaseOwner(const char *dbname, Oid newOwnerId)
+{
+	Oid			db_id;
+	HeapTuple	tuple;
+	Relation	rel;
+	ScanKeyData scankey;
+	SysScanDesc scan;
+	Form_pg_database datForm;
+	ObjectAddress address;
+
+	/*
+	 * Get the old tuple.  We don't need a lock on the database per se,
+	 * because we're not going to do anything that would mess up incoming
+	 * connections.
+	 */
+	rel = table_open(DatabaseRelationId, RowExclusiveLock);
+	ScanKeyInit(&scankey,
+				Anum_pg_database_datname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(dbname));
+	scan = systable_beginscan(rel, DatabaseNameIndexId, true,
+							  NULL, 1, &scankey);
+	tuple = systable_getnext(scan);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_DATABASE),
+				 errmsg("database \"%s\" does not exist", dbname)));
+
+	datForm = (Form_pg_database) GETSTRUCT(tuple);
+	db_id = datForm->oid;
+
+	/*
+	 * If the new owner is the same as the existing owner, consider the
+	 * command to have succeeded.  This is to be consistent with other
+	 * objects.
+	 */
+	if (datForm->datdba != newOwnerId)
+	{
+		Datum		repl_val[Natts_pg_database];
+		bool		repl_null[Natts_pg_database];
+		bool		repl_repl[Natts_pg_database];
+		Acl		   *newAcl;
+		Datum		aclDatum;
+		bool		isNull;
+		HeapTuple	newtuple;
+
+		/* Otherwise, must be owner of the existing object */
+		if (!pg_database_ownercheck(db_id, GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+						   dbname);
+
+		/* Must be able to become new owner */
+		check_is_member_of_role(GetUserId(), newOwnerId);
+
+		/*
+		 * must have createdb rights
+		 *
+		 * NOTE: This is different from other alter-owner checks in that the
+		 * current user is checked for createdb privileges instead of the
+		 * destination owner.  This is consistent with the CREATE case for
+		 * databases.  Because superusers will always have this right, we need
+		 * no special case for them.
+		 */
+		if (!have_createdb_privilege())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("permission denied to change owner of database")));
+
+		memset(repl_null, false, sizeof(repl_null));
+		memset(repl_repl, false, sizeof(repl_repl));
+
+		repl_repl[Anum_pg_database_datdba - 1] = true;
+		repl_val[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(newOwnerId);
+
+		/*
+		 * Determine the modified ACL for the new owner.  This is only
+		 * necessary when the ACL is non-null.
+		 */
+		aclDatum = heap_getattr(tuple,
+								Anum_pg_database_datacl,
+								RelationGetDescr(rel),
+								&isNull);
+		if (!isNull)
+		{
+			newAcl = aclnewowner(DatumGetAclP(aclDatum),
+								 datForm->datdba, newOwnerId);
+			repl_repl[Anum_pg_database_datacl - 1] = true;
+			repl_val[Anum_pg_database_datacl - 1] = PointerGetDatum(newAcl);
+		}
+
+		newtuple = heap_modify_tuple(tuple, RelationGetDescr(rel), repl_val, repl_null, repl_repl);
+		CatalogTupleUpdate(rel, &newtuple->t_self, newtuple);
+
+		heap_freetuple(newtuple);
+
+		/* Update owner dependency reference */
+		changeDependencyOnOwner(DatabaseRelationId, db_id, newOwnerId);
+	}
+
+	InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0);
+
+	ObjectAddressSet(address, DatabaseRelationId, db_id);
+
+	systable_endscan(scan);
+
+	/* Close pg_database, but keep lock till commit */
+	table_close(rel, NoLock);
+
+	return address;
+}
+
+
+Datum
+pg_database_collation_actual_version(PG_FUNCTION_ARGS)
+{
+	Oid			dbid = PG_GETARG_OID(0);
+	HeapTuple	tp;
+	char		datlocprovider;
+	Datum		datum;
+	bool		isnull;
+	char	   *version;
+
+	tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbid));
+	if (!HeapTupleIsValid(tp))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("database with OID %u does not exist", dbid)));
+
+	datlocprovider = ((Form_pg_database) GETSTRUCT(tp))->datlocprovider;
+
+	datum = SysCacheGetAttr(DATABASEOID, tp, datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_daticulocale : Anum_pg_database_datcollate, &isnull);
+	if (isnull)
+		elog(ERROR, "unexpected null in pg_database");
+	version = get_collation_actual_version(datlocprovider, TextDatumGetCString(datum));
+
+	ReleaseSysCache(tp);
+
+	if (version)
+		PG_RETURN_TEXT_P(cstring_to_text(version));
+	else
+		PG_RETURN_NULL();
+}
+
+
+/*
+ * Helper functions
+ */
+
+/*
+ * Look up info about the database named "name".  If the database exists,
+ * obtain the specified lock type on it, fill in any of the remaining
+ * parameters that aren't NULL, and return true.  If no such database,
+ * return false.
+ */
+static bool
+get_db_info(const char *name, LOCKMODE lockmode,
+			Oid *dbIdP, Oid *ownerIdP,
+			int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
+			TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP,
+			Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale,
+			char *dbLocProvider,
+			char **dbCollversion)
+{
+	bool		result = false;
+	Relation	relation;
+
+	AssertArg(name);
+
+	/* Caller may wish to grab a better lock on pg_database beforehand... */
+	relation = table_open(DatabaseRelationId, AccessShareLock);
+
+	/*
+	 * Loop covers the rare case where the database is renamed before we can
+	 * lock it.  We try again just in case we can find a new one of the same
+	 * name.
+	 */
+	for (;;)
+	{
+		ScanKeyData scanKey;
+		SysScanDesc scan;
+		HeapTuple	tuple;
+		Oid			dbOid;
+
+		/*
+		 * there's no syscache for database-indexed-by-name, so must do it the
+		 * hard way
+		 */
+		ScanKeyInit(&scanKey,
+					Anum_pg_database_datname,
+					BTEqualStrategyNumber, F_NAMEEQ,
+					CStringGetDatum(name));
+
+		scan = systable_beginscan(relation, DatabaseNameIndexId, true,
+								  NULL, 1, &scanKey);
+
+		tuple = systable_getnext(scan);
+
+		if (!HeapTupleIsValid(tuple))
+		{
+			/* definitely no database of that name */
+			systable_endscan(scan);
+			break;
+		}
+
+		dbOid = ((Form_pg_database) GETSTRUCT(tuple))->oid;
+
+		systable_endscan(scan);
+
+		/*
+		 * Now that we have a database OID, we can try to lock the DB.
+		 */
+		if (lockmode != NoLock)
+			LockSharedObject(DatabaseRelationId, dbOid, 0, lockmode);
+
+		/*
+		 * And now, re-fetch the tuple by OID.  If it's still there and still
+		 * the same name, we win; else, drop the lock and loop back to try
+		 * again.
+		 */
+		tuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbOid));
+		if (HeapTupleIsValid(tuple))
+		{
+			Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
+
+			if (strcmp(name, NameStr(dbform->datname)) == 0)
+			{
+				Datum		datum;
+				bool		isnull;
+
+				/* oid of the database */
+				if (dbIdP)
+					*dbIdP = dbOid;
+				/* oid of the owner */
+				if (ownerIdP)
+					*ownerIdP = dbform->datdba;
+				/* character encoding */
+				if (encodingP)
+					*encodingP = dbform->encoding;
+				/* allowed as template? */
+				if (dbIsTemplateP)
+					*dbIsTemplateP = dbform->datistemplate;
+				/* allowing connections? */
+				if (dbAllowConnP)
+					*dbAllowConnP = dbform->datallowconn;
+				/* limit of frozen XIDs */
+				if (dbFrozenXidP)
+					*dbFrozenXidP = dbform->datfrozenxid;
+				/* minimum MultiXactId */
+				if (dbMinMultiP)
+					*dbMinMultiP = dbform->datminmxid;
+				/* default tablespace for this database */
+				if (dbTablespace)
+					*dbTablespace = dbform->dattablespace;
+				/* default locale settings for this database */
+				if (dbLocProvider)
+					*dbLocProvider = dbform->datlocprovider;
+				if (dbCollate)
+				{
+					datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datcollate, &isnull);
+					Assert(!isnull);
+					*dbCollate = TextDatumGetCString(datum);
+				}
+				if (dbCtype)
+				{
+					datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datctype, &isnull);
+					Assert(!isnull);
+					*dbCtype = TextDatumGetCString(datum);
+				}
+				if (dbIculocale)
+				{
+					datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_daticulocale, &isnull);
+					if (isnull)
+						*dbIculocale = NULL;
+					else
+						*dbIculocale = TextDatumGetCString(datum);
+				}
+				if (dbCollversion)
+				{
+					datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datcollversion, &isnull);
+					if (isnull)
+						*dbCollversion = NULL;
+					else
+						*dbCollversion = TextDatumGetCString(datum);
+				}
+				ReleaseSysCache(tuple);
+				result = true;
+				break;
+			}
+			/* can only get here if it was just renamed */
+			ReleaseSysCache(tuple);
+		}
+
+		if (lockmode != NoLock)
+			UnlockSharedObject(DatabaseRelationId, dbOid, 0, lockmode);
+	}
+
+	table_close(relation, AccessShareLock);
+
+	return result;
+}
+
+/* Check if current user has createdb privileges */
+static bool
+have_createdb_privilege(void)
+{
+	bool		result = false;
+	HeapTuple	utup;
+
+	/* Superusers can always do everything */
+	if (superuser())
+		return true;
+
+	utup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(GetUserId()));
+	if (HeapTupleIsValid(utup))
+	{
+		result = ((Form_pg_authid) GETSTRUCT(utup))->rolcreatedb;
+		ReleaseSysCache(utup);
+	}
+	return result;
+}
+
+/*
+ * Remove tablespace directories
+ *
+ * We don't know what tablespaces db_id is using, so iterate through all
+ * tablespaces removing <tablespace>/db_id
+ */
+static void
+remove_dbtablespaces(Oid db_id)
+{
+	Relation	rel;
+	TableScanDesc scan;
+	HeapTuple	tuple;
+	List	   *ltblspc = NIL;
+	ListCell   *cell;
+	int			ntblspc;
+	int			i;
+	Oid		   *tablespace_ids;
+
+	rel = table_open(TableSpaceRelationId, AccessShareLock);
+	scan = table_beginscan_catalog(rel, 0, NULL);
+	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	{
+		Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple);
+		Oid			dsttablespace = spcform->oid;
+		char	   *dstpath;
+		struct stat st;
+
+		/* Don't mess with the global tablespace */
+		if (dsttablespace == GLOBALTABLESPACE_OID)
+			continue;
+
+		dstpath = GetDatabasePath(db_id, dsttablespace);
+
+		if (lstat(dstpath, &st) < 0 || !S_ISDIR(st.st_mode))
+		{
+			/* Assume we can ignore it */
+			pfree(dstpath);
+			continue;
+		}
+
+		if (!rmtree(dstpath, true))
+			ereport(WARNING,
+					(errmsg("some useless files may be left behind in old database directory \"%s\"",
+							dstpath)));
+
+		ltblspc = lappend_oid(ltblspc, dsttablespace);
+		pfree(dstpath);
+	}
+
+	ntblspc = list_length(ltblspc);
+	if (ntblspc == 0)
+	{
+		table_endscan(scan);
+		table_close(rel, AccessShareLock);
+		return;
+	}
+
+	tablespace_ids = (Oid *) palloc(ntblspc * sizeof(Oid));
+	i = 0;
+	foreach(cell, ltblspc)
+		tablespace_ids[i++] = lfirst_oid(cell);
+
+	/* Record the filesystem change in XLOG */
+	{
+		xl_dbase_drop_rec xlrec;
+
+		xlrec.db_id = db_id;
+		xlrec.ntablespaces = ntblspc;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, MinSizeOfDbaseDropRec);
+		XLogRegisterData((char *) tablespace_ids, ntblspc * sizeof(Oid));
+
+		(void) XLogInsert(RM_DBASE_ID,
+						  XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE);
+	}
+
+	list_free(ltblspc);
+	pfree(tablespace_ids);
+
+	table_endscan(scan);
+	table_close(rel, AccessShareLock);
+}
+
+/*
+ * Check for existing files that conflict with a proposed new DB OID;
+ * return true if there are any
+ *
+ * If there were a subdirectory in any tablespace matching the proposed new
+ * OID, we'd get a create failure due to the duplicate name ... and then we'd
+ * try to remove that already-existing subdirectory during the cleanup in
+ * remove_dbtablespaces.  Nuking existing files seems like a bad idea, so
+ * instead we make this extra check before settling on the OID of the new
+ * database.  This exactly parallels what GetNewRelFileNode() does for table
+ * relfilenode values.
+ */
+static bool
+check_db_file_conflict(Oid db_id)
+{
+	bool		result = false;
+	Relation	rel;
+	TableScanDesc scan;
+	HeapTuple	tuple;
+
+	rel = table_open(TableSpaceRelationId, AccessShareLock);
+	scan = table_beginscan_catalog(rel, 0, NULL);
+	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	{
+		Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple);
+		Oid			dsttablespace = spcform->oid;
+		char	   *dstpath;
+		struct stat st;
+
+		/* Don't mess with the global tablespace */
+		if (dsttablespace == GLOBALTABLESPACE_OID)
+			continue;
+
+		dstpath = GetDatabasePath(db_id, dsttablespace);
+
+		if (lstat(dstpath, &st) == 0)
+		{
+			/* Found a conflicting file (or directory, whatever) */
+			pfree(dstpath);
+			result = true;
+			break;
+		}
+
+		pfree(dstpath);
+	}
+
+	table_endscan(scan);
+	table_close(rel, AccessShareLock);
+
+	return result;
+}
+
+/*
+ * Issue a suitable errdetail message for a busy database
+ */
+static int
+errdetail_busy_db(int notherbackends, int npreparedxacts)
+{
+	if (notherbackends > 0 && npreparedxacts > 0)
+
+		/*
+		 * We don't deal with singular versus plural here, since gettext
+		 * doesn't support multiple plurals in one string.
+		 */
+		errdetail("There are %d other session(s) and %d prepared transaction(s) using the database.",
+				  notherbackends, npreparedxacts);
+	else if (notherbackends > 0)
+		errdetail_plural("There is %d other session using the database.",
+						 "There are %d other sessions using the database.",
+						 notherbackends,
+						 notherbackends);
+	else
+		errdetail_plural("There is %d prepared transaction using the database.",
+						 "There are %d prepared transactions using the database.",
+						 npreparedxacts,
+						 npreparedxacts);
+	return 0;					/* just to keep ereport macro happy */
+}
+
+/*
+ * get_database_oid - given a database name, look up the OID
+ *
+ * If missing_ok is false, throw an error if database name not found.  If
+ * true, just return InvalidOid.
+ */
+Oid
+get_database_oid(const char *dbname, bool missing_ok)
+{
+	Relation	pg_database;
+	ScanKeyData entry[1];
+	SysScanDesc scan;
+	HeapTuple	dbtuple;
+	Oid			oid;
+
+	/*
+	 * There's no syscache for pg_database indexed by name, so we must look
+	 * the hard way.
+	 */
+	pg_database = table_open(DatabaseRelationId, AccessShareLock);
+	ScanKeyInit(&entry[0],
+				Anum_pg_database_datname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(dbname));
+	scan = systable_beginscan(pg_database, DatabaseNameIndexId, true,
+							  NULL, 1, entry);
+
+	dbtuple = systable_getnext(scan);
+
+	/* We assume that there can be at most one matching tuple */
+	if (HeapTupleIsValid(dbtuple))
+		oid = ((Form_pg_database) GETSTRUCT(dbtuple))->oid;
+	else
+		oid = InvalidOid;
+
+	systable_endscan(scan);
+	table_close(pg_database, AccessShareLock);
+
+	if (!OidIsValid(oid) && !missing_ok)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_DATABASE),
+				 errmsg("database \"%s\" does not exist",
+						dbname)));
+
+	return oid;
+}
+
+
+/*
+ * get_database_name - given a database OID, look up the name
+ *
+ * Returns a palloc'd string, or NULL if no such database.
+ */
+char *
+get_database_name(Oid dbid)
+{
+	HeapTuple	dbtuple;
+	char	   *result;
+
+	dbtuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbid));
+	if (HeapTupleIsValid(dbtuple))
+	{
+		result = pstrdup(NameStr(((Form_pg_database) GETSTRUCT(dbtuple))->datname));
+		ReleaseSysCache(dbtuple);
+	}
+	else
+		result = NULL;
+
+	return result;
+}
+
+
+/*
+ * While dropping a database the pg_database row is marked invalid, but the
+ * catalog contents still exist. Connections to such a database are not
+ * allowed.
+ */
+bool
+database_is_invalid_form(Form_pg_database datform)
+{
+	return datform->datconnlimit == DATCONNLIMIT_INVALID_DB;
+}
+
+
+/*
+ * Convenience wrapper around database_is_invalid_form()
+ */
+bool
+database_is_invalid_oid(Oid dboid)
+{
+	HeapTuple	dbtup;
+	Form_pg_database dbform;
+	bool		invalid;
+
+	dbtup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dboid));
+	if (!HeapTupleIsValid(dbtup))
+		elog(ERROR, "cache lookup failed for database %u", dboid);
+	dbform = (Form_pg_database) GETSTRUCT(dbtup);
+
+	invalid = database_is_invalid_form(dbform);
+
+	ReleaseSysCache(dbtup);
+
+	return invalid;
+}
+
+
+/*
+ * recovery_create_dbdir()
+ *
+ * During recovery, there's a case where we validly need to recover a missing
+ * tablespace directory so that recovery can continue.  This happens when
+ * recovery wants to create a database but the holding tablespace has been
+ * removed before the server stopped.  Since we expect that the directory will
+ * be gone before reaching recovery consistency, and we have no knowledge about
+ * the tablespace other than its OID here, we create a real directory under
+ * pg_tblspc here instead of restoring the symlink.
+ *
+ * If only_tblspc is true, then the requested directory must be in pg_tblspc/
+ */
+static void
+recovery_create_dbdir(char *path, bool only_tblspc)
+{
+	struct stat st;
+
+	Assert(RecoveryInProgress());
+
+	if (stat(path, &st) == 0)
+		return;
+
+	if (only_tblspc && strstr(path, "pg_tblspc/") == NULL)
+		elog(PANIC, "requested to created invalid directory: %s", path);
+
+	if (reachedConsistency && !allow_in_place_tablespaces)
+		ereport(PANIC,
+				errmsg("missing directory \"%s\"", path));
+
+	elog(reachedConsistency ? WARNING : DEBUG1,
+		 "creating missing directory: %s", path);
+
+	if (pg_mkdir_p(path, pg_dir_create_mode) != 0)
+		ereport(PANIC,
+				errmsg("could not create missing directory \"%s\": %m", path));
+}
+
+
+/*
+ * DATABASE resource manager's routines
+ */
+void
+dbase_redo(XLogReaderState *record)
+{
+	uint8		info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+
+	/* Backup blocks are not used in dbase records */
+	Assert(!XLogRecHasAnyBlockRefs(record));
+
+	if (info == XLOG_DBASE_CREATE_FILE_COPY)
+	{
+		xl_dbase_create_file_copy_rec *xlrec =
+		(xl_dbase_create_file_copy_rec *) XLogRecGetData(record);
+		char	   *src_path;
+		char	   *dst_path;
+		char	   *parent_path;
+		struct stat st;
+
+		src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id);
+		dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);
+
+		/*
+		 * Our theory for replaying a CREATE is to forcibly drop the target
+		 * subdirectory if present, then re-copy the source data. This may be
+		 * more work than needed, but it is simple to implement.
+		 */
+		if (stat(dst_path, &st) == 0 && S_ISDIR(st.st_mode))
+		{
+			if (!rmtree(dst_path, true))
+				/* If this failed, copydir() below is going to error. */
+				ereport(WARNING,
+						(errmsg("some useless files may be left behind in old database directory \"%s\"",
+								dst_path)));
+		}
+
+		/*
+		 * If the parent of the target path doesn't exist, create it now. This
+		 * enables us to create the target underneath later.  Note that if
+		 * the database dir is not in a tablespace, the parent will always
+		 * exist, so this never runs in that case.
+		 */
+		parent_path = pstrdup(dst_path);
+		get_parent_directory(parent_path);
+		if (stat(parent_path, &st) < 0)
+		{
+			if (errno != ENOENT)
+				ereport(FATAL,
+						errmsg("could not stat directory \"%s\": %m",
+							   dst_path));
+
+			recovery_create_dbdir(parent_path, true);
+		}
+		pfree(parent_path);
+
+		/*
+		 * There's a case where the copy source directory is missing for the
+		 * same reason above.  Create the emtpy source directory so that
+		 * copydir below doesn't fail.  The directory will be dropped soon by
+		 * recovery.
+		 */
+		if (stat(src_path, &st) < 0 && errno == ENOENT)
+			recovery_create_dbdir(src_path, false);
+
+		/*
+		 * Force dirty buffers out to disk, to ensure source database is
+		 * up-to-date for the copy.
+		 */
+		FlushDatabaseBuffers(xlrec->src_db_id);
+
+		/* Close all sgmr fds in all backends. */
+		WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
+
+		/*
+		 * Copy this subdirectory to the new location
+		 *
+		 * We don't need to copy subdirectories
+		 */
+		copydir(src_path, dst_path, false);
+
+		pfree(src_path);
+		pfree(dst_path);
+	}
+	else if (info == XLOG_DBASE_CREATE_WAL_LOG)
+	{
+		xl_dbase_create_wal_log_rec *xlrec =
+		(xl_dbase_create_wal_log_rec *) XLogRecGetData(record);
+		char	   *dbpath;
+		char	   *parent_path;
+
+		dbpath = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);
+
+		/* create the parent directory if needed and valid */
+		parent_path = pstrdup(dbpath);
+		get_parent_directory(parent_path);
+		recovery_create_dbdir(parent_path, true);
+
+		/* Create the database directory with the version file. */
+		CreateDirAndVersionFile(dbpath, xlrec->db_id, xlrec->tablespace_id,
+								true);
+		pfree(dbpath);
+	}
+	else if (info == XLOG_DBASE_DROP)
+	{
+		xl_dbase_drop_rec *xlrec = (xl_dbase_drop_rec *) XLogRecGetData(record);
+		char	   *dst_path;
+		int			i;
+
+		if (InHotStandby)
+		{
+			/*
+			 * Lock database while we resolve conflicts to ensure that
+			 * InitPostgres() cannot fully re-execute concurrently. This
+			 * avoids backends re-connecting automatically to same database,
+			 * which can happen in some cases.
+			 *
+			 * This will lock out walsenders trying to connect to db-specific
+			 * slots for logical decoding too, so it's safe for us to drop
+			 * slots.
+			 */
+			LockSharedObjectForSession(DatabaseRelationId, xlrec->db_id, 0, AccessExclusiveLock);
+			ResolveRecoveryConflictWithDatabase(xlrec->db_id);
+		}
+
+		/* Drop any database-specific replication slots */
+		ReplicationSlotsDropDBSlots(xlrec->db_id);
+
+		/* Drop pages for this database that are in the shared buffer cache */
+		DropDatabaseBuffers(xlrec->db_id);
+
+		/* Also, clean out any fsync requests that might be pending in md.c */
+		ForgetDatabaseSyncRequests(xlrec->db_id);
+
+		/* Clean out the xlog relcache too */
+		XLogDropDatabase(xlrec->db_id);
+
+		/* Close all sgmr fds in all backends. */
+		WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
+
+		for (i = 0; i < xlrec->ntablespaces; i++)
+		{
+			dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_ids[i]);
+
+			/* And remove the physical files */
+			if (!rmtree(dst_path, true))
+				ereport(WARNING,
+						(errmsg("some useless files may be left behind in old database directory \"%s\"",
+								dst_path)));
+			pfree(dst_path);
+		}
+
+		if (InHotStandby)
+		{
+			/*
+			 * Release locks prior to commit. XXX There is a race condition
+			 * here that may allow backends to reconnect, but the window for
+			 * this is small because the gap between here and commit is mostly
+			 * fairly small and it is unlikely that people will be dropping
+			 * databases that we are trying to connect to anyway.
+			 */
+			UnlockSharedObjectForSession(DatabaseRelationId, xlrec->db_id, 0, AccessExclusiveLock);
+		}
+	}
+	else
+		elog(PANIC, "dbase_redo: unknown op code %u", info);
+}
diff --git a/src/backend/commands/define.c b/src/backend/commands/define.c
new file mode 100644
index 0000000..1e07fa9
--- /dev/null
+++ b/src/backend/commands/define.c
@@ -0,0 +1,391 @@
+/*-------------------------------------------------------------------------
+ *
+ * define.c
+ *	  Support routines for various kinds of object creation.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/define.c
+ *
+ * DESCRIPTION
+ *	  The "DefineFoo" routines take the parse tree and pick out the
+ *	  appropriate arguments/flags, passing the results to the
+ *	  corresponding "FooDefine" routines (in src/catalog) that do
+ *	  the actual catalog-munging.  These routines also verify permission
+ *	  of the user to execute the command.
+ *
+ * NOTES
+ *	  These things must be defined and committed in the following order:
+ *		"create function":
+ *				input/output, recv/send procedures
+ *		"create type":
+ *				type
+ *		"create operator":
+ *				operators
+ *
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <math.h>
+
+#include "catalog/namespace.h"
+#include "commands/defrem.h"
+#include "nodes/makefuncs.h"
+#include "parser/parse_type.h"
+#include "parser/scansup.h"
+#include "utils/builtins.h"
+
+/*
+ * Extract a string value (otherwise uninterpreted) from a DefElem.
+ */
+char *
+defGetString(DefElem *def)
+{
+	if (def->arg == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("%s requires a parameter",
+						def->defname)));
+	switch (nodeTag(def->arg))
+	{
+		case T_Integer:
+			return psprintf("%ld", (long) intVal(def->arg));
+		case T_Float:
+			return castNode(Float, def->arg)->fval;
+		case T_Boolean:
+			return boolVal(def->arg) ? "true" : "false";
+		case T_String:
+			return strVal(def->arg);
+		case T_TypeName:
+			return TypeNameToString((TypeName *) def->arg);
+		case T_List:
+			return NameListToString((List *) def->arg);
+		case T_A_Star:
+			return pstrdup("*");
+		default:
+			elog(ERROR, "unrecognized node type: %d", (int) nodeTag(def->arg));
+	}
+	return NULL;				/* keep compiler quiet */
+}
+
+/*
+ * Extract a numeric value (actually double) from a DefElem.
+ */
+double
+defGetNumeric(DefElem *def)
+{
+	if (def->arg == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("%s requires a numeric value",
+						def->defname)));
+	switch (nodeTag(def->arg))
+	{
+		case T_Integer:
+			return (double) intVal(def->arg);
+		case T_Float:
+			return floatVal(def->arg);
+		default:
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("%s requires a numeric value",
+							def->defname)));
+	}
+	return 0;					/* keep compiler quiet */
+}
+
+/*
+ * Extract a boolean value from a DefElem.
+ */
+bool
+defGetBoolean(DefElem *def)
+{
+	/*
+	 * If no parameter given, assume "true" is meant.
+	 */
+	if (def->arg == NULL)
+		return true;
+
+	/*
+	 * Allow 0, 1, "true", "false", "on", "off"
+	 */
+	switch (nodeTag(def->arg))
+	{
+		case T_Integer:
+			switch (intVal(def->arg))
+			{
+				case 0:
+					return false;
+				case 1:
+					return true;
+				default:
+					/* otherwise, error out below */
+					break;
+			}
+			break;
+		default:
+			{
+				char	   *sval = defGetString(def);
+
+				/*
+				 * The set of strings accepted here should match up with the
+				 * grammar's opt_boolean_or_string production.
+				 */
+				if (pg_strcasecmp(sval, "true") == 0)
+					return true;
+				if (pg_strcasecmp(sval, "false") == 0)
+					return false;
+				if (pg_strcasecmp(sval, "on") == 0)
+					return true;
+				if (pg_strcasecmp(sval, "off") == 0)
+					return false;
+			}
+			break;
+	}
+	ereport(ERROR,
+			(errcode(ERRCODE_SYNTAX_ERROR),
+			 errmsg("%s requires a Boolean value",
+					def->defname)));
+	return false;				/* keep compiler quiet */
+}
+
+/*
+ * Extract an int32 value from a DefElem.
+ */
+int32
+defGetInt32(DefElem *def)
+{
+	if (def->arg == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("%s requires an integer value",
+						def->defname)));
+	switch (nodeTag(def->arg))
+	{
+		case T_Integer:
+			return (int32) intVal(def->arg);
+		default:
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("%s requires an integer value",
+							def->defname)));
+	}
+	return 0;					/* keep compiler quiet */
+}
+
+/*
+ * Extract an int64 value from a DefElem.
+ */
+int64
+defGetInt64(DefElem *def)
+{
+	if (def->arg == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("%s requires a numeric value",
+						def->defname)));
+	switch (nodeTag(def->arg))
+	{
+		case T_Integer:
+			return (int64) intVal(def->arg);
+		case T_Float:
+
+			/*
+			 * Values too large for int4 will be represented as Float
+			 * constants by the lexer.  Accept these if they are valid int8
+			 * strings.
+			 */
+			return DatumGetInt64(DirectFunctionCall1(int8in,
+													 CStringGetDatum(castNode(Float, def->arg)->fval)));
+		default:
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("%s requires a numeric value",
+							def->defname)));
+	}
+	return 0;					/* keep compiler quiet */
+}
+
+/*
+ * Extract an OID value from a DefElem.
+ */
+Oid
+defGetObjectId(DefElem *def)
+{
+	if (def->arg == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("%s requires a numeric value",
+						def->defname)));
+	switch (nodeTag(def->arg))
+	{
+		case T_Integer:
+			return (Oid) intVal(def->arg);
+		case T_Float:
+
+			/*
+			 * Values too large for int4 will be represented as Float
+			 * constants by the lexer.  Accept these if they are valid OID
+			 * strings.
+			 */
+			return DatumGetObjectId(DirectFunctionCall1(oidin,
+														CStringGetDatum(castNode(Float, def->arg)->fval)));
+		default:
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("%s requires a numeric value",
+							def->defname)));
+	}
+	return 0;					/* keep compiler quiet */
+}
+
+/*
+ * Extract a possibly-qualified name (as a List of Strings) from a DefElem.
+ */
+List *
+defGetQualifiedName(DefElem *def)
+{
+	if (def->arg == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("%s requires a parameter",
+						def->defname)));
+	switch (nodeTag(def->arg))
+	{
+		case T_TypeName:
+			return ((TypeName *) def->arg)->names;
+		case T_List:
+			return (List *) def->arg;
+		case T_String:
+			/* Allow quoted name for backwards compatibility */
+			return list_make1(def->arg);
+		default:
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("argument of %s must be a name",
+							def->defname)));
+	}
+	return NIL;					/* keep compiler quiet */
+}
+
+/*
+ * Extract a TypeName from a DefElem.
+ *
+ * Note: we do not accept a List arg here, because the parser will only
+ * return a bare List when the name looks like an operator name.
+ */
+TypeName *
+defGetTypeName(DefElem *def)
+{
+	if (def->arg == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("%s requires a parameter",
+						def->defname)));
+	switch (nodeTag(def->arg))
+	{
+		case T_TypeName:
+			return (TypeName *) def->arg;
+		case T_String:
+			/* Allow quoted typename for backwards compatibility */
+			return makeTypeNameFromNameList(list_make1(def->arg));
+		default:
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("argument of %s must be a type name",
+							def->defname)));
+	}
+	return NULL;				/* keep compiler quiet */
+}
+
+/*
+ * Extract a type length indicator (either absolute bytes, or
+ * -1 for "variable") from a DefElem.
+ */
+int
+defGetTypeLength(DefElem *def)
+{
+	if (def->arg == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("%s requires a parameter",
+						def->defname)));
+	switch (nodeTag(def->arg))
+	{
+		case T_Integer:
+			return intVal(def->arg);
+		case T_Float:
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("%s requires an integer value",
+							def->defname)));
+			break;
+		case T_String:
+			if (pg_strcasecmp(strVal(def->arg), "variable") == 0)
+				return -1;		/* variable length */
+			break;
+		case T_TypeName:
+			/* cope if grammar chooses to believe "variable" is a typename */
+			if (pg_strcasecmp(TypeNameToString((TypeName *) def->arg),
+							  "variable") == 0)
+				return -1;		/* variable length */
+			break;
+		case T_List:
+			/* must be an operator name */
+			break;
+		default:
+			elog(ERROR, "unrecognized node type: %d", (int) nodeTag(def->arg));
+	}
+	ereport(ERROR,
+			(errcode(ERRCODE_SYNTAX_ERROR),
+			 errmsg("invalid argument for %s: \"%s\"",
+					def->defname, defGetString(def))));
+	return 0;					/* keep compiler quiet */
+}
+
+/*
+ * Extract a list of string values (otherwise uninterpreted) from a DefElem.
+ */
+List *
+defGetStringList(DefElem *def)
+{
+	ListCell   *cell;
+
+	if (def->arg == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("%s requires a parameter",
+						def->defname)));
+	if (nodeTag(def->arg) != T_List)
+		elog(ERROR, "unrecognized node type: %d", (int) nodeTag(def->arg));
+
+	foreach(cell, (List *) def->arg)
+	{
+		Node	   *str = (Node *) lfirst(cell);
+
+		if (!IsA(str, String))
+			elog(ERROR, "unexpected node type in name list: %d",
+				 (int) nodeTag(str));
+	}
+
+	return (List *) def->arg;
+}
+
+/*
+ * Raise an error about a conflicting DefElem.
+ */
+void
+errorConflictingDefElem(DefElem *defel, ParseState *pstate)
+{
+	ereport(ERROR,
+			errcode(ERRCODE_SYNTAX_ERROR),
+			errmsg("conflicting or redundant options"),
+			parser_errposition(pstate, defel->location));
+}
diff --git a/src/backend/commands/discard.c b/src/backend/commands/discard.c
new file mode 100644
index 0000000..c583539
--- /dev/null
+++ b/src/backend/commands/discard.c
@@ -0,0 +1,78 @@
+/*-------------------------------------------------------------------------
+ *
+ * discard.c
+ *	  The implementation of the DISCARD command
+ *
+ * Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/discard.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/xact.h"
+#include "catalog/namespace.h"
+#include "commands/async.h"
+#include "commands/discard.h"
+#include "commands/prepare.h"
+#include "commands/sequence.h"
+#include "utils/guc.h"
+#include "utils/portal.h"
+
+static void DiscardAll(bool isTopLevel);
+
+/*
+ * DISCARD { ALL | SEQUENCES | TEMP | PLANS }
+ */
+void
+DiscardCommand(DiscardStmt *stmt, bool isTopLevel)
+{
+	switch (stmt->target)
+	{
+		case DISCARD_ALL:
+			DiscardAll(isTopLevel);
+			break;
+
+		case DISCARD_PLANS:
+			ResetPlanCache();
+			break;
+
+		case DISCARD_SEQUENCES:
+			ResetSequenceCaches();
+			break;
+
+		case DISCARD_TEMP:
+			ResetTempTableNamespace();
+			break;
+
+		default:
+			elog(ERROR, "unrecognized DISCARD target: %d", stmt->target);
+	}
+}
+
+static void
+DiscardAll(bool isTopLevel)
+{
+	/*
+	 * Disallow DISCARD ALL in a transaction block. This is arguably
+	 * inconsistent (we don't make a similar check in the command sequence
+	 * that DISCARD ALL is equivalent to), but the idea is to catch mistakes:
+	 * DISCARD ALL inside a transaction block would leave the transaction
+	 * still uncommitted.
+	 */
+	PreventInTransactionBlock(isTopLevel, "DISCARD ALL");
+
+	/* Closing portals might run user-defined code, so do that first. */
+	PortalHashTableDeleteAll();
+	SetPGVariable("session_authorization", NIL, false);
+	ResetAllOptions();
+	DropAllPreparedStatements();
+	Async_UnlistenAll();
+	LockReleaseAll(USER_LOCKMETHOD, true);
+	ResetPlanCache();
+	ResetTempTableNamespace();
+	ResetSequenceCaches();
+}
diff --git a/src/backend/commands/dropcmds.c b/src/backend/commands/dropcmds.c
new file mode 100644
index 0000000..c9b5732
--- /dev/null
+++ b/src/backend/commands/dropcmds.c
@@ -0,0 +1,493 @@
+/*-------------------------------------------------------------------------
+ *
+ * dropcmds.c
+ *	  handle various "DROP" operations
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/dropcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/dependency.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaddress.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_proc.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "parser/parse_type.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+
+static void does_not_exist_skipping(ObjectType objtype,
+									Node *object);
+static bool owningrel_does_not_exist_skipping(List *object,
+											  const char **msg, char **name);
+static bool schema_does_not_exist_skipping(List *object,
+										   const char **msg, char **name);
+static bool type_in_list_does_not_exist_skipping(List *typenames,
+												 const char **msg, char **name);
+
+
+/*
+ * Drop one or more objects.
+ *
+ * We don't currently handle all object types here.  Relations, for example,
+ * require special handling, because (for example) indexes have additional
+ * locking requirements.
+ *
+ * We look up all the objects first, and then delete them in a single
+ * performMultipleDeletions() call.  This avoids unnecessary DROP RESTRICT
+ * errors if there are dependencies between them.
+ */
+void
+RemoveObjects(DropStmt *stmt)
+{
+	ObjectAddresses *objects;
+	ListCell   *cell1;
+
+	objects = new_object_addresses();
+
+	foreach(cell1, stmt->objects)
+	{
+		ObjectAddress address;
+		Node	   *object = lfirst(cell1);
+		Relation	relation = NULL;
+		Oid			namespaceId;
+
+		/* Get an ObjectAddress for the object. */
+		address = get_object_address(stmt->removeType,
+									 object,
+									 &relation,
+									 AccessExclusiveLock,
+									 stmt->missing_ok);
+
+		/*
+		 * Issue NOTICE if supplied object was not found.  Note this is only
+		 * relevant in the missing_ok case, because otherwise
+		 * get_object_address would have thrown an error.
+		 */
+		if (!OidIsValid(address.objectId))
+		{
+			Assert(stmt->missing_ok);
+			does_not_exist_skipping(stmt->removeType, object);
+			continue;
+		}
+
+		/*
+		 * Although COMMENT ON FUNCTION, SECURITY LABEL ON FUNCTION, etc. are
+		 * happy to operate on an aggregate as on any other function, we have
+		 * historically not allowed this for DROP FUNCTION.
+		 */
+		if (stmt->removeType == OBJECT_FUNCTION)
+		{
+			if (get_func_prokind(address.objectId) == PROKIND_AGGREGATE)
+				ereport(ERROR,
+						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+						 errmsg("\"%s\" is an aggregate function",
+								NameListToString(castNode(ObjectWithArgs, object)->objname)),
+						 errhint("Use DROP AGGREGATE to drop aggregate functions.")));
+		}
+
+		/* Check permissions. */
+		namespaceId = get_object_namespace(&address);
+		if (!OidIsValid(namespaceId) ||
+			!pg_namespace_ownercheck(namespaceId, GetUserId()))
+			check_object_ownership(GetUserId(), stmt->removeType, address,
+								   object, relation);
+
+		/*
+		 * Make note if a temporary namespace has been accessed in this
+		 * transaction.
+		 */
+		if (OidIsValid(namespaceId) && isTempNamespace(namespaceId))
+			MyXactFlags |= XACT_FLAGS_ACCESSEDTEMPNAMESPACE;
+
+		/* Release any relcache reference count, but keep lock until commit. */
+		if (relation)
+			table_close(relation, NoLock);
+
+		add_exact_object_address(&address, objects);
+	}
+
+	/* Here we really delete them. */
+	performMultipleDeletions(objects, stmt->behavior, 0);
+
+	free_object_addresses(objects);
+}
+
+/*
+ * owningrel_does_not_exist_skipping
+ *		Subroutine for RemoveObjects
+ *
+ * After determining that a specification for a rule or trigger returns that
+ * the specified object does not exist, test whether its owning relation, and
+ * its schema, exist or not; if they do, return false --- the trigger or rule
+ * itself is missing instead.  If the owning relation or its schema do not
+ * exist, fill the error message format string and name, and return true.
+ */
+static bool
+owningrel_does_not_exist_skipping(List *object, const char **msg, char **name)
+{
+	List	   *parent_object;
+	RangeVar   *parent_rel;
+
+	parent_object = list_truncate(list_copy(object),
+								  list_length(object) - 1);
+
+	if (schema_does_not_exist_skipping(parent_object, msg, name))
+		return true;
+
+	parent_rel = makeRangeVarFromNameList(parent_object);
+
+	if (!OidIsValid(RangeVarGetRelid(parent_rel, NoLock, true)))
+	{
+		*msg = gettext_noop("relation \"%s\" does not exist, skipping");
+		*name = NameListToString(parent_object);
+
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * schema_does_not_exist_skipping
+ *		Subroutine for RemoveObjects
+ *
+ * After determining that a specification for a schema-qualifiable object
+ * refers to an object that does not exist, test whether the specified schema
+ * exists or not.  If no schema was specified, or if the schema does exist,
+ * return false -- the object itself is missing instead.  If the specified
+ * schema does not exist, fill the error message format string and the
+ * specified schema name, and return true.
+ */
+static bool
+schema_does_not_exist_skipping(List *object, const char **msg, char **name)
+{
+	RangeVar   *rel;
+
+	rel = makeRangeVarFromNameList(object);
+
+	if (rel->schemaname != NULL &&
+		!OidIsValid(LookupNamespaceNoError(rel->schemaname)))
+	{
+		*msg = gettext_noop("schema \"%s\" does not exist, skipping");
+		*name = rel->schemaname;
+
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * type_in_list_does_not_exist_skipping
+ *		Subroutine for RemoveObjects
+ *
+ * After determining that a specification for a function, cast, aggregate or
+ * operator returns that the specified object does not exist, test whether the
+ * involved datatypes, and their schemas, exist or not; if they do, return
+ * false --- the original object itself is missing instead.  If the datatypes
+ * or schemas do not exist, fill the error message format string and the
+ * missing name, and return true.
+ *
+ * First parameter is a list of TypeNames.
+ */
+static bool
+type_in_list_does_not_exist_skipping(List *typenames, const char **msg,
+									 char **name)
+{
+	ListCell   *l;
+
+	foreach(l, typenames)
+	{
+		TypeName   *typeName = lfirst_node(TypeName, l);
+
+		if (typeName != NULL)
+		{
+			if (!OidIsValid(LookupTypeNameOid(NULL, typeName, true)))
+			{
+				/* type doesn't exist, try to find why */
+				if (schema_does_not_exist_skipping(typeName->names, msg, name))
+					return true;
+
+				*msg = gettext_noop("type \"%s\" does not exist, skipping");
+				*name = TypeNameToString(typeName);
+
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+/*
+ * does_not_exist_skipping
+ *		Subroutine for RemoveObjects
+ *
+ * Generate a NOTICE stating that the named object was not found, and is
+ * being skipped.  This is only relevant when "IF EXISTS" is used; otherwise,
+ * get_object_address() in RemoveObjects would have thrown an ERROR.
+ */
+static void
+does_not_exist_skipping(ObjectType objtype, Node *object)
+{
+	const char *msg = NULL;
+	char	   *name = NULL;
+	char	   *args = NULL;
+
+	switch (objtype)
+	{
+		case OBJECT_ACCESS_METHOD:
+			msg = gettext_noop("access method \"%s\" does not exist, skipping");
+			name = strVal(object);
+			break;
+		case OBJECT_TYPE:
+		case OBJECT_DOMAIN:
+			{
+				TypeName   *typ = castNode(TypeName, object);
+
+				if (!schema_does_not_exist_skipping(typ->names, &msg, &name))
+				{
+					msg = gettext_noop("type \"%s\" does not exist, skipping");
+					name = TypeNameToString(typ);
+				}
+			}
+			break;
+		case OBJECT_COLLATION:
+			if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+			{
+				msg = gettext_noop("collation \"%s\" does not exist, skipping");
+				name = NameListToString(castNode(List, object));
+			}
+			break;
+		case OBJECT_CONVERSION:
+			if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+			{
+				msg = gettext_noop("conversion \"%s\" does not exist, skipping");
+				name = NameListToString(castNode(List, object));
+			}
+			break;
+		case OBJECT_SCHEMA:
+			msg = gettext_noop("schema \"%s\" does not exist, skipping");
+			name = strVal(object);
+			break;
+		case OBJECT_STATISTIC_EXT:
+			if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+			{
+				msg = gettext_noop("statistics object \"%s\" does not exist, skipping");
+				name = NameListToString(castNode(List, object));
+			}
+			break;
+		case OBJECT_TSPARSER:
+			if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+			{
+				msg = gettext_noop("text search parser \"%s\" does not exist, skipping");
+				name = NameListToString(castNode(List, object));
+			}
+			break;
+		case OBJECT_TSDICTIONARY:
+			if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+			{
+				msg = gettext_noop("text search dictionary \"%s\" does not exist, skipping");
+				name = NameListToString(castNode(List, object));
+			}
+			break;
+		case OBJECT_TSTEMPLATE:
+			if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+			{
+				msg = gettext_noop("text search template \"%s\" does not exist, skipping");
+				name = NameListToString(castNode(List, object));
+			}
+			break;
+		case OBJECT_TSCONFIGURATION:
+			if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+			{
+				msg = gettext_noop("text search configuration \"%s\" does not exist, skipping");
+				name = NameListToString(castNode(List, object));
+			}
+			break;
+		case OBJECT_EXTENSION:
+			msg = gettext_noop("extension \"%s\" does not exist, skipping");
+			name = strVal(object);
+			break;
+		case OBJECT_FUNCTION:
+			{
+				ObjectWithArgs *owa = castNode(ObjectWithArgs, object);
+
+				if (!schema_does_not_exist_skipping(owa->objname, &msg, &name) &&
+					!type_in_list_does_not_exist_skipping(owa->objargs, &msg, &name))
+				{
+					msg = gettext_noop("function %s(%s) does not exist, skipping");
+					name = NameListToString(owa->objname);
+					args = TypeNameListToString(owa->objargs);
+				}
+				break;
+			}
+		case OBJECT_PROCEDURE:
+			{
+				ObjectWithArgs *owa = castNode(ObjectWithArgs, object);
+
+				if (!schema_does_not_exist_skipping(owa->objname, &msg, &name) &&
+					!type_in_list_does_not_exist_skipping(owa->objargs, &msg, &name))
+				{
+					msg = gettext_noop("procedure %s(%s) does not exist, skipping");
+					name = NameListToString(owa->objname);
+					args = TypeNameListToString(owa->objargs);
+				}
+				break;
+			}
+		case OBJECT_ROUTINE:
+			{
+				ObjectWithArgs *owa = castNode(ObjectWithArgs, object);
+
+				if (!schema_does_not_exist_skipping(owa->objname, &msg, &name) &&
+					!type_in_list_does_not_exist_skipping(owa->objargs, &msg, &name))
+				{
+					msg = gettext_noop("routine %s(%s) does not exist, skipping");
+					name = NameListToString(owa->objname);
+					args = TypeNameListToString(owa->objargs);
+				}
+				break;
+			}
+		case OBJECT_AGGREGATE:
+			{
+				ObjectWithArgs *owa = castNode(ObjectWithArgs, object);
+
+				if (!schema_does_not_exist_skipping(owa->objname, &msg, &name) &&
+					!type_in_list_does_not_exist_skipping(owa->objargs, &msg, &name))
+				{
+					msg = gettext_noop("aggregate %s(%s) does not exist, skipping");
+					name = NameListToString(owa->objname);
+					args = TypeNameListToString(owa->objargs);
+				}
+				break;
+			}
+		case OBJECT_OPERATOR:
+			{
+				ObjectWithArgs *owa = castNode(ObjectWithArgs, object);
+
+				if (!schema_does_not_exist_skipping(owa->objname, &msg, &name) &&
+					!type_in_list_does_not_exist_skipping(owa->objargs, &msg, &name))
+				{
+					msg = gettext_noop("operator %s does not exist, skipping");
+					name = NameListToString(owa->objname);
+				}
+				break;
+			}
+		case OBJECT_LANGUAGE:
+			msg = gettext_noop("language \"%s\" does not exist, skipping");
+			name = strVal(object);
+			break;
+		case OBJECT_CAST:
+			{
+				if (!type_in_list_does_not_exist_skipping(list_make1(linitial(castNode(List, object))), &msg, &name) &&
+					!type_in_list_does_not_exist_skipping(list_make1(lsecond(castNode(List, object))), &msg, &name))
+				{
+					/* XXX quote or no quote? */
+					msg = gettext_noop("cast from type %s to type %s does not exist, skipping");
+					name = TypeNameToString(linitial_node(TypeName, castNode(List, object)));
+					args = TypeNameToString(lsecond_node(TypeName, castNode(List, object)));
+				}
+			}
+			break;
+		case OBJECT_TRANSFORM:
+			if (!type_in_list_does_not_exist_skipping(list_make1(linitial(castNode(List, object))), &msg, &name))
+			{
+				msg = gettext_noop("transform for type %s language \"%s\" does not exist, skipping");
+				name = TypeNameToString(linitial_node(TypeName, castNode(List, object)));
+				args = strVal(lsecond(castNode(List, object)));
+			}
+			break;
+		case OBJECT_TRIGGER:
+			if (!owningrel_does_not_exist_skipping(castNode(List, object), &msg, &name))
+			{
+				msg = gettext_noop("trigger \"%s\" for relation \"%s\" does not exist, skipping");
+				name = strVal(llast(castNode(List, object)));
+				args = NameListToString(list_truncate(list_copy(castNode(List, object)),
+													  list_length(castNode(List, object)) - 1));
+			}
+			break;
+		case OBJECT_POLICY:
+			if (!owningrel_does_not_exist_skipping(castNode(List, object), &msg, &name))
+			{
+				msg = gettext_noop("policy \"%s\" for relation \"%s\" does not exist, skipping");
+				name = strVal(llast(castNode(List, object)));
+				args = NameListToString(list_truncate(list_copy(castNode(List, object)),
+													  list_length(castNode(List, object)) - 1));
+			}
+			break;
+		case OBJECT_EVENT_TRIGGER:
+			msg = gettext_noop("event trigger \"%s\" does not exist, skipping");
+			name = strVal(object);
+			break;
+		case OBJECT_RULE:
+			if (!owningrel_does_not_exist_skipping(castNode(List, object), &msg, &name))
+			{
+				msg = gettext_noop("rule \"%s\" for relation \"%s\" does not exist, skipping");
+				name = strVal(llast(castNode(List, object)));
+				args = NameListToString(list_truncate(list_copy(castNode(List, object)),
+													  list_length(castNode(List, object)) - 1));
+			}
+			break;
+		case OBJECT_FDW:
+			msg = gettext_noop("foreign-data wrapper \"%s\" does not exist, skipping");
+			name = strVal(object);
+			break;
+		case OBJECT_FOREIGN_SERVER:
+			msg = gettext_noop("server \"%s\" does not exist, skipping");
+			name = strVal(object);
+			break;
+		case OBJECT_OPCLASS:
+			{
+				List	   *opcname = list_copy_tail(castNode(List, object), 1);
+
+				if (!schema_does_not_exist_skipping(opcname, &msg, &name))
+				{
+					msg = gettext_noop("operator class \"%s\" does not exist for access method \"%s\", skipping");
+					name = NameListToString(opcname);
+					args = strVal(linitial(castNode(List, object)));
+				}
+			}
+			break;
+		case OBJECT_OPFAMILY:
+			{
+				List	   *opfname = list_copy_tail(castNode(List, object), 1);
+
+				if (!schema_does_not_exist_skipping(opfname, &msg, &name))
+				{
+					msg = gettext_noop("operator family \"%s\" does not exist for access method \"%s\", skipping");
+					name = NameListToString(opfname);
+					args = strVal(linitial(castNode(List, object)));
+				}
+			}
+			break;
+		case OBJECT_PUBLICATION:
+			msg = gettext_noop("publication \"%s\" does not exist, skipping");
+			name = strVal(object);
+			break;
+		default:
+			elog(ERROR, "unrecognized object type: %d", (int) objtype);
+			break;
+	}
+
+	if (!args)
+		ereport(NOTICE, (errmsg(msg, name)));
+	else
+		ereport(NOTICE, (errmsg(msg, name, args)));
+}
diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c
new file mode 100644
index 0000000..356aac4
--- /dev/null
+++ b/src/backend/commands/event_trigger.c
@@ -0,0 +1,2182 @@
+/*-------------------------------------------------------------------------
+ *
+ * event_trigger.c
+ *	  PostgreSQL EVENT TRIGGER support code.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/event_trigger.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_event_trigger.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_trigger.h"
+#include "catalog/pg_ts_config.h"
+#include "catalog/pg_type.h"
+#include "commands/dbcommands.h"
+#include "commands/event_trigger.h"
+#include "commands/extension.h"
+#include "commands/trigger.h"
+#include "funcapi.h"
+#include "lib/ilist.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "pgstat.h"
+#include "tcop/deparse_utility.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/evtcache.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+typedef struct EventTriggerQueryState
+{
+	/* memory context for this state's objects */
+	MemoryContext cxt;
+
+	/* sql_drop */
+	slist_head	SQLDropList;
+	bool		in_sql_drop;
+
+	/* table_rewrite */
+	Oid			table_rewrite_oid;	/* InvalidOid, or set for table_rewrite
+									 * event */
+	int			table_rewrite_reason;	/* AT_REWRITE reason */
+
+	/* Support for command collection */
+	bool		commandCollectionInhibited;
+	CollectedCommand *currentCommand;
+	List	   *commandList;	/* list of CollectedCommand; see
+								 * deparse_utility.h */
+	struct EventTriggerQueryState *previous;
+} EventTriggerQueryState;
+
+static EventTriggerQueryState *currentEventTriggerState = NULL;
+
+/* Support for dropped objects */
+typedef struct SQLDropObject
+{
+	ObjectAddress address;
+	const char *schemaname;
+	const char *objname;
+	const char *objidentity;
+	const char *objecttype;
+	List	   *addrnames;
+	List	   *addrargs;
+	bool		original;
+	bool		normal;
+	bool		istemp;
+	slist_node	next;
+} SQLDropObject;
+
+static void AlterEventTriggerOwner_internal(Relation rel,
+											HeapTuple tup,
+											Oid newOwnerId);
+static void error_duplicate_filter_variable(const char *defname);
+static Datum filter_list_to_array(List *filterlist);
+static Oid	insert_event_trigger_tuple(const char *trigname, const char *eventname,
+									   Oid evtOwner, Oid funcoid, List *tags);
+static void validate_ddl_tags(const char *filtervar, List *taglist);
+static void validate_table_rewrite_tags(const char *filtervar, List *taglist);
+static void EventTriggerInvoke(List *fn_oid_list, EventTriggerData *trigdata);
+static const char *stringify_grant_objtype(ObjectType objtype);
+static const char *stringify_adefprivs_objtype(ObjectType objtype);
+
+/*
+ * Create an event trigger.
+ */
+Oid
+CreateEventTrigger(CreateEventTrigStmt *stmt)
+{
+	HeapTuple	tuple;
+	Oid			funcoid;
+	Oid			funcrettype;
+	Oid			evtowner = GetUserId();
+	ListCell   *lc;
+	List	   *tags = NULL;
+
+	/*
+	 * It would be nice to allow database owners or even regular users to do
+	 * this, but there are obvious privilege escalation risks which would have
+	 * to somehow be plugged first.
+	 */
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied to create event trigger \"%s\"",
+						stmt->trigname),
+				 errhint("Must be superuser to create an event trigger.")));
+
+	/* Validate event name. */
+	if (strcmp(stmt->eventname, "ddl_command_start") != 0 &&
+		strcmp(stmt->eventname, "ddl_command_end") != 0 &&
+		strcmp(stmt->eventname, "sql_drop") != 0 &&
+		strcmp(stmt->eventname, "table_rewrite") != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("unrecognized event name \"%s\"",
+						stmt->eventname)));
+
+	/* Validate filter conditions. */
+	foreach(lc, stmt->whenclause)
+	{
+		DefElem    *def = (DefElem *) lfirst(lc);
+
+		if (strcmp(def->defname, "tag") == 0)
+		{
+			if (tags != NULL)
+				error_duplicate_filter_variable(def->defname);
+			tags = (List *) def->arg;
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized filter variable \"%s\"", def->defname)));
+	}
+
+	/* Validate tag list, if any. */
+	if ((strcmp(stmt->eventname, "ddl_command_start") == 0 ||
+		 strcmp(stmt->eventname, "ddl_command_end") == 0 ||
+		 strcmp(stmt->eventname, "sql_drop") == 0)
+		&& tags != NULL)
+		validate_ddl_tags("tag", tags);
+	else if (strcmp(stmt->eventname, "table_rewrite") == 0
+			 && tags != NULL)
+		validate_table_rewrite_tags("tag", tags);
+
+	/*
+	 * Give user a nice error message if an event trigger of the same name
+	 * already exists.
+	 */
+	tuple = SearchSysCache1(EVENTTRIGGERNAME, CStringGetDatum(stmt->trigname));
+	if (HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("event trigger \"%s\" already exists",
+						stmt->trigname)));
+
+	/* Find and validate the trigger function. */
+	funcoid = LookupFuncName(stmt->funcname, 0, NULL, false);
+	funcrettype = get_func_rettype(funcoid);
+	if (funcrettype != EVENT_TRIGGEROID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("function %s must return type %s",
+						NameListToString(stmt->funcname), "event_trigger")));
+
+	/* Insert catalog entries. */
+	return insert_event_trigger_tuple(stmt->trigname, stmt->eventname,
+									  evtowner, funcoid, tags);
+}
+
+/*
+ * Validate DDL command tags.
+ */
+static void
+validate_ddl_tags(const char *filtervar, List *taglist)
+{
+	ListCell   *lc;
+
+	foreach(lc, taglist)
+	{
+		const char *tagstr = strVal(lfirst(lc));
+		CommandTag	commandTag = GetCommandTagEnum(tagstr);
+
+		if (commandTag == CMDTAG_UNKNOWN)
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("filter value \"%s\" not recognized for filter variable \"%s\"",
+							tagstr, filtervar)));
+		if (!command_tag_event_trigger_ok(commandTag))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			/* translator: %s represents an SQL statement name */
+					 errmsg("event triggers are not supported for %s",
+							tagstr)));
+	}
+}
+
+/*
+ * Validate DDL command tags for event table_rewrite.
+ */
+static void
+validate_table_rewrite_tags(const char *filtervar, List *taglist)
+{
+	ListCell   *lc;
+
+	foreach(lc, taglist)
+	{
+		const char *tagstr = strVal(lfirst(lc));
+		CommandTag	commandTag = GetCommandTagEnum(tagstr);
+
+		if (!command_tag_table_rewrite_ok(commandTag))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			/* translator: %s represents an SQL statement name */
+					 errmsg("event triggers are not supported for %s",
+							tagstr)));
+	}
+}
+
+/*
+ * Complain about a duplicate filter variable.
+ */
+static void
+error_duplicate_filter_variable(const char *defname)
+{
+	ereport(ERROR,
+			(errcode(ERRCODE_SYNTAX_ERROR),
+			 errmsg("filter variable \"%s\" specified more than once",
+					defname)));
+}
+
+/*
+ * Insert the new pg_event_trigger row and record dependencies.
+ */
+static Oid
+insert_event_trigger_tuple(const char *trigname, const char *eventname, Oid evtOwner,
+						   Oid funcoid, List *taglist)
+{
+	Relation	tgrel;
+	Oid			trigoid;
+	HeapTuple	tuple;
+	Datum		values[Natts_pg_trigger];
+	bool		nulls[Natts_pg_trigger];
+	NameData	evtnamedata,
+				evteventdata;
+	ObjectAddress myself,
+				referenced;
+
+	/* Open pg_event_trigger. */
+	tgrel = table_open(EventTriggerRelationId, RowExclusiveLock);
+
+	/* Build the new pg_trigger tuple. */
+	trigoid = GetNewOidWithIndex(tgrel, EventTriggerOidIndexId,
+								 Anum_pg_event_trigger_oid);
+	values[Anum_pg_event_trigger_oid - 1] = ObjectIdGetDatum(trigoid);
+	memset(nulls, false, sizeof(nulls));
+	namestrcpy(&evtnamedata, trigname);
+	values[Anum_pg_event_trigger_evtname - 1] = NameGetDatum(&evtnamedata);
+	namestrcpy(&evteventdata, eventname);
+	values[Anum_pg_event_trigger_evtevent - 1] = NameGetDatum(&evteventdata);
+	values[Anum_pg_event_trigger_evtowner - 1] = ObjectIdGetDatum(evtOwner);
+	values[Anum_pg_event_trigger_evtfoid - 1] = ObjectIdGetDatum(funcoid);
+	values[Anum_pg_event_trigger_evtenabled - 1] =
+		CharGetDatum(TRIGGER_FIRES_ON_ORIGIN);
+	if (taglist == NIL)
+		nulls[Anum_pg_event_trigger_evttags - 1] = true;
+	else
+		values[Anum_pg_event_trigger_evttags - 1] =
+			filter_list_to_array(taglist);
+
+	/* Insert heap tuple. */
+	tuple = heap_form_tuple(tgrel->rd_att, values, nulls);
+	CatalogTupleInsert(tgrel, tuple);
+	heap_freetuple(tuple);
+
+	/* Depend on owner. */
+	recordDependencyOnOwner(EventTriggerRelationId, trigoid, evtOwner);
+
+	/* Depend on event trigger function. */
+	myself.classId = EventTriggerRelationId;
+	myself.objectId = trigoid;
+	myself.objectSubId = 0;
+	referenced.classId = ProcedureRelationId;
+	referenced.objectId = funcoid;
+	referenced.objectSubId = 0;
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+	/* Depend on extension, if any. */
+	recordDependencyOnCurrentExtension(&myself, false);
+
+	/* Post creation hook for new event trigger */
+	InvokeObjectPostCreateHook(EventTriggerRelationId, trigoid, 0);
+
+	/* Close pg_event_trigger. */
+	table_close(tgrel, RowExclusiveLock);
+
+	return trigoid;
+}
+
+/*
+ * In the parser, a clause like WHEN tag IN ('cmd1', 'cmd2') is represented
+ * by a DefElem whose value is a List of String nodes; in the catalog, we
+ * store the list of strings as a text array.  This function transforms the
+ * former representation into the latter one.
+ *
+ * For cleanliness, we store command tags in the catalog as text.  It's
+ * possible (although not currently anticipated) that we might have
+ * a case-sensitive filter variable in the future, in which case this would
+ * need some further adjustment.
+ */
+static Datum
+filter_list_to_array(List *filterlist)
+{
+	ListCell   *lc;
+	Datum	   *data;
+	int			i = 0,
+				l = list_length(filterlist);
+
+	data = (Datum *) palloc(l * sizeof(Datum));
+
+	foreach(lc, filterlist)
+	{
+		const char *value = strVal(lfirst(lc));
+		char	   *result,
+				   *p;
+
+		result = pstrdup(value);
+		for (p = result; *p; p++)
+			*p = pg_ascii_toupper((unsigned char) *p);
+		data[i++] = PointerGetDatum(cstring_to_text(result));
+		pfree(result);
+	}
+
+	return PointerGetDatum(construct_array(data, l, TEXTOID,
+										   -1, false, TYPALIGN_INT));
+}
+
+/*
+ * ALTER EVENT TRIGGER foo ENABLE|DISABLE|ENABLE ALWAYS|REPLICA
+ */
+Oid
+AlterEventTrigger(AlterEventTrigStmt *stmt)
+{
+	Relation	tgrel;
+	HeapTuple	tup;
+	Oid			trigoid;
+	Form_pg_event_trigger evtForm;
+	char		tgenabled = stmt->tgenabled;
+
+	tgrel = table_open(EventTriggerRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(EVENTTRIGGERNAME,
+							  CStringGetDatum(stmt->trigname));
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("event trigger \"%s\" does not exist",
+						stmt->trigname)));
+
+	evtForm = (Form_pg_event_trigger) GETSTRUCT(tup);
+	trigoid = evtForm->oid;
+
+	if (!pg_event_trigger_ownercheck(trigoid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_EVENT_TRIGGER,
+					   stmt->trigname);
+
+	/* tuple is a copy, so we can modify it below */
+	evtForm->evtenabled = tgenabled;
+
+	CatalogTupleUpdate(tgrel, &tup->t_self, tup);
+
+	InvokeObjectPostAlterHook(EventTriggerRelationId,
+							  trigoid, 0);
+
+	/* clean up */
+	heap_freetuple(tup);
+	table_close(tgrel, RowExclusiveLock);
+
+	return trigoid;
+}
+
+/*
+ * Change event trigger's owner -- by name
+ */
+ObjectAddress
+AlterEventTriggerOwner(const char *name, Oid newOwnerId)
+{
+	Oid			evtOid;
+	HeapTuple	tup;
+	Form_pg_event_trigger evtForm;
+	Relation	rel;
+	ObjectAddress address;
+
+	rel = table_open(EventTriggerRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(EVENTTRIGGERNAME, CStringGetDatum(name));
+
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("event trigger \"%s\" does not exist", name)));
+
+	evtForm = (Form_pg_event_trigger) GETSTRUCT(tup);
+	evtOid = evtForm->oid;
+
+	AlterEventTriggerOwner_internal(rel, tup, newOwnerId);
+
+	ObjectAddressSet(address, EventTriggerRelationId, evtOid);
+
+	heap_freetuple(tup);
+
+	table_close(rel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * Change event trigger owner, by OID
+ */
+void
+AlterEventTriggerOwner_oid(Oid trigOid, Oid newOwnerId)
+{
+	HeapTuple	tup;
+	Relation	rel;
+
+	rel = table_open(EventTriggerRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(EVENTTRIGGEROID, ObjectIdGetDatum(trigOid));
+
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("event trigger with OID %u does not exist", trigOid)));
+
+	AlterEventTriggerOwner_internal(rel, tup, newOwnerId);
+
+	heap_freetuple(tup);
+
+	table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Internal workhorse for changing an event trigger's owner
+ */
+static void
+AlterEventTriggerOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId)
+{
+	Form_pg_event_trigger form;
+
+	form = (Form_pg_event_trigger) GETSTRUCT(tup);
+
+	if (form->evtowner == newOwnerId)
+		return;
+
+	if (!pg_event_trigger_ownercheck(form->oid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_EVENT_TRIGGER,
+					   NameStr(form->evtname));
+
+	/* New owner must be a superuser */
+	if (!superuser_arg(newOwnerId))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied to change owner of event trigger \"%s\"",
+						NameStr(form->evtname)),
+				 errhint("The owner of an event trigger must be a superuser.")));
+
+	form->evtowner = newOwnerId;
+	CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+	/* Update owner dependency reference */
+	changeDependencyOnOwner(EventTriggerRelationId,
+							form->oid,
+							newOwnerId);
+
+	InvokeObjectPostAlterHook(EventTriggerRelationId,
+							  form->oid, 0);
+}
+
+/*
+ * get_event_trigger_oid - Look up an event trigger by name to find its OID.
+ *
+ * If missing_ok is false, throw an error if trigger not found.  If
+ * true, just return InvalidOid.
+ */
+Oid
+get_event_trigger_oid(const char *trigname, bool missing_ok)
+{
+	Oid			oid;
+
+	oid = GetSysCacheOid1(EVENTTRIGGERNAME, Anum_pg_event_trigger_oid,
+						  CStringGetDatum(trigname));
+	if (!OidIsValid(oid) && !missing_ok)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("event trigger \"%s\" does not exist", trigname)));
+	return oid;
+}
+
+/*
+ * Return true when we want to fire given Event Trigger and false otherwise,
+ * filtering on the session replication role and the event trigger registered
+ * tags matching.
+ */
+static bool
+filter_event_trigger(CommandTag tag, EventTriggerCacheItem *item)
+{
+	/*
+	 * Filter by session replication role, knowing that we never see disabled
+	 * items down here.
+	 */
+	if (SessionReplicationRole == SESSION_REPLICATION_ROLE_REPLICA)
+	{
+		if (item->enabled == TRIGGER_FIRES_ON_ORIGIN)
+			return false;
+	}
+	else
+	{
+		if (item->enabled == TRIGGER_FIRES_ON_REPLICA)
+			return false;
+	}
+
+	/* Filter by tags, if any were specified. */
+	if (!bms_is_empty(item->tagset) && !bms_is_member(tag, item->tagset))
+		return false;
+
+	/* if we reach that point, we're not filtering out this item */
+	return true;
+}
+
+/*
+ * Setup for running triggers for the given event.  Return value is an OID list
+ * of functions to run; if there are any, trigdata is filled with an
+ * appropriate EventTriggerData for them to receive.
+ */
+static List *
+EventTriggerCommonSetup(Node *parsetree,
+						EventTriggerEvent event, const char *eventstr,
+						EventTriggerData *trigdata)
+{
+	CommandTag	tag;
+	List	   *cachelist;
+	ListCell   *lc;
+	List	   *runlist = NIL;
+
+	/*
+	 * We want the list of command tags for which this procedure is actually
+	 * invoked to match up exactly with the list that CREATE EVENT TRIGGER
+	 * accepts.  This debugging cross-check will throw an error if this
+	 * function is invoked for a command tag that CREATE EVENT TRIGGER won't
+	 * accept.  (Unfortunately, there doesn't seem to be any simple, automated
+	 * way to verify that CREATE EVENT TRIGGER doesn't accept extra stuff that
+	 * never reaches this control point.)
+	 *
+	 * If this cross-check fails for you, you probably need to either adjust
+	 * standard_ProcessUtility() not to invoke event triggers for the command
+	 * type in question, or you need to adjust event_trigger_ok to accept the
+	 * relevant command tag.
+	 */
+#ifdef USE_ASSERT_CHECKING
+	{
+		CommandTag	dbgtag;
+
+		dbgtag = CreateCommandTag(parsetree);
+		if (event == EVT_DDLCommandStart ||
+			event == EVT_DDLCommandEnd ||
+			event == EVT_SQLDrop)
+		{
+			if (!command_tag_event_trigger_ok(dbgtag))
+				elog(ERROR, "unexpected command tag \"%s\"", GetCommandTagName(dbgtag));
+		}
+		else if (event == EVT_TableRewrite)
+		{
+			if (!command_tag_table_rewrite_ok(dbgtag))
+				elog(ERROR, "unexpected command tag \"%s\"", GetCommandTagName(dbgtag));
+		}
+	}
+#endif
+
+	/* Use cache to find triggers for this event; fast exit if none. */
+	cachelist = EventCacheLookup(event);
+	if (cachelist == NIL)
+		return NIL;
+
+	/* Get the command tag. */
+	tag = CreateCommandTag(parsetree);
+
+	/*
+	 * Filter list of event triggers by command tag, and copy them into our
+	 * memory context.  Once we start running the command triggers, or indeed
+	 * once we do anything at all that touches the catalogs, an invalidation
+	 * might leave cachelist pointing at garbage, so we must do this before we
+	 * can do much else.
+	 */
+	foreach(lc, cachelist)
+	{
+		EventTriggerCacheItem *item = lfirst(lc);
+
+		if (filter_event_trigger(tag, item))
+		{
+			/* We must plan to fire this trigger. */
+			runlist = lappend_oid(runlist, item->fnoid);
+		}
+	}
+
+	/* don't spend any more time on this if no functions to run */
+	if (runlist == NIL)
+		return NIL;
+
+	trigdata->type = T_EventTriggerData;
+	trigdata->event = eventstr;
+	trigdata->parsetree = parsetree;
+	trigdata->tag = tag;
+
+	return runlist;
+}
+
+/*
+ * Fire ddl_command_start triggers.
+ */
+void
+EventTriggerDDLCommandStart(Node *parsetree)
+{
+	List	   *runlist;
+	EventTriggerData trigdata;
+
+	/*
+	 * Event Triggers are completely disabled in standalone mode.  There are
+	 * (at least) two reasons for this:
+	 *
+	 * 1. A sufficiently broken event trigger might not only render the
+	 * database unusable, but prevent disabling itself to fix the situation.
+	 * In this scenario, restarting in standalone mode provides an escape
+	 * hatch.
+	 *
+	 * 2. BuildEventTriggerCache relies on systable_beginscan_ordered, and
+	 * therefore will malfunction if pg_event_trigger's indexes are damaged.
+	 * To allow recovery from a damaged index, we need some operating mode
+	 * wherein event triggers are disabled.  (Or we could implement
+	 * heapscan-and-sort logic for that case, but having disaster recovery
+	 * scenarios depend on code that's otherwise untested isn't appetizing.)
+	 */
+	if (!IsUnderPostmaster)
+		return;
+
+	runlist = EventTriggerCommonSetup(parsetree,
+									  EVT_DDLCommandStart,
+									  "ddl_command_start",
+									  &trigdata);
+	if (runlist == NIL)
+		return;
+
+	/* Run the triggers. */
+	EventTriggerInvoke(runlist, &trigdata);
+
+	/* Cleanup. */
+	list_free(runlist);
+
+	/*
+	 * Make sure anything the event triggers did will be visible to the main
+	 * command.
+	 */
+	CommandCounterIncrement();
+}
+
+/*
+ * Fire ddl_command_end triggers.
+ */
+void
+EventTriggerDDLCommandEnd(Node *parsetree)
+{
+	List	   *runlist;
+	EventTriggerData trigdata;
+
+	/*
+	 * See EventTriggerDDLCommandStart for a discussion about why event
+	 * triggers are disabled in single user mode.
+	 */
+	if (!IsUnderPostmaster)
+		return;
+
+	/*
+	 * Also do nothing if our state isn't set up, which it won't be if there
+	 * weren't any relevant event triggers at the start of the current DDL
+	 * command.  This test might therefore seem optional, but it's important
+	 * because EventTriggerCommonSetup might find triggers that didn't exist
+	 * at the time the command started.  Although this function itself
+	 * wouldn't crash, the event trigger functions would presumably call
+	 * pg_event_trigger_ddl_commands which would fail.  Better to do nothing
+	 * until the next command.
+	 */
+	if (!currentEventTriggerState)
+		return;
+
+	runlist = EventTriggerCommonSetup(parsetree,
+									  EVT_DDLCommandEnd, "ddl_command_end",
+									  &trigdata);
+	if (runlist == NIL)
+		return;
+
+	/*
+	 * Make sure anything the main command did will be visible to the event
+	 * triggers.
+	 */
+	CommandCounterIncrement();
+
+	/* Run the triggers. */
+	EventTriggerInvoke(runlist, &trigdata);
+
+	/* Cleanup. */
+	list_free(runlist);
+}
+
+/*
+ * Fire sql_drop triggers.
+ */
+void
+EventTriggerSQLDrop(Node *parsetree)
+{
+	List	   *runlist;
+	EventTriggerData trigdata;
+
+	/*
+	 * See EventTriggerDDLCommandStart for a discussion about why event
+	 * triggers are disabled in single user mode.
+	 */
+	if (!IsUnderPostmaster)
+		return;
+
+	/*
+	 * Use current state to determine whether this event fires at all.  If
+	 * there are no triggers for the sql_drop event, then we don't have
+	 * anything to do here.  Note that dropped object collection is disabled
+	 * if this is the case, so even if we were to try to run, the list would
+	 * be empty.
+	 */
+	if (!currentEventTriggerState ||
+		slist_is_empty(&currentEventTriggerState->SQLDropList))
+		return;
+
+	runlist = EventTriggerCommonSetup(parsetree,
+									  EVT_SQLDrop, "sql_drop",
+									  &trigdata);
+
+	/*
+	 * Nothing to do if run list is empty.  Note this typically can't happen,
+	 * because if there are no sql_drop events, then objects-to-drop wouldn't
+	 * have been collected in the first place and we would have quit above.
+	 * But it could occur if event triggers were dropped partway through.
+	 */
+	if (runlist == NIL)
+		return;
+
+	/*
+	 * Make sure anything the main command did will be visible to the event
+	 * triggers.
+	 */
+	CommandCounterIncrement();
+
+	/*
+	 * Make sure pg_event_trigger_dropped_objects only works when running
+	 * these triggers.  Use PG_TRY to ensure in_sql_drop is reset even when
+	 * one trigger fails.  (This is perhaps not necessary, as the currentState
+	 * variable will be removed shortly by our caller, but it seems better to
+	 * play safe.)
+	 */
+	currentEventTriggerState->in_sql_drop = true;
+
+	/* Run the triggers. */
+	PG_TRY();
+	{
+		EventTriggerInvoke(runlist, &trigdata);
+	}
+	PG_FINALLY();
+	{
+		currentEventTriggerState->in_sql_drop = false;
+	}
+	PG_END_TRY();
+
+	/* Cleanup. */
+	list_free(runlist);
+}
+
+
+/*
+ * Fire table_rewrite triggers.
+ */
+void
+EventTriggerTableRewrite(Node *parsetree, Oid tableOid, int reason)
+{
+	List	   *runlist;
+	EventTriggerData trigdata;
+
+	/*
+	 * See EventTriggerDDLCommandStart for a discussion about why event
+	 * triggers are disabled in single user mode.
+	 */
+	if (!IsUnderPostmaster)
+		return;
+
+	/*
+	 * Also do nothing if our state isn't set up, which it won't be if there
+	 * weren't any relevant event triggers at the start of the current DDL
+	 * command.  This test might therefore seem optional, but it's
+	 * *necessary*, because EventTriggerCommonSetup might find triggers that
+	 * didn't exist at the time the command started.
+	 */
+	if (!currentEventTriggerState)
+		return;
+
+	runlist = EventTriggerCommonSetup(parsetree,
+									  EVT_TableRewrite,
+									  "table_rewrite",
+									  &trigdata);
+	if (runlist == NIL)
+		return;
+
+	/*
+	 * Make sure pg_event_trigger_table_rewrite_oid only works when running
+	 * these triggers. Use PG_TRY to ensure table_rewrite_oid is reset even
+	 * when one trigger fails. (This is perhaps not necessary, as the
+	 * currentState variable will be removed shortly by our caller, but it
+	 * seems better to play safe.)
+	 */
+	currentEventTriggerState->table_rewrite_oid = tableOid;
+	currentEventTriggerState->table_rewrite_reason = reason;
+
+	/* Run the triggers. */
+	PG_TRY();
+	{
+		EventTriggerInvoke(runlist, &trigdata);
+	}
+	PG_FINALLY();
+	{
+		currentEventTriggerState->table_rewrite_oid = InvalidOid;
+		currentEventTriggerState->table_rewrite_reason = 0;
+	}
+	PG_END_TRY();
+
+	/* Cleanup. */
+	list_free(runlist);
+
+	/*
+	 * Make sure anything the event triggers did will be visible to the main
+	 * command.
+	 */
+	CommandCounterIncrement();
+}
+
+/*
+ * Invoke each event trigger in a list of event triggers.
+ */
+static void
+EventTriggerInvoke(List *fn_oid_list, EventTriggerData *trigdata)
+{
+	MemoryContext context;
+	MemoryContext oldcontext;
+	ListCell   *lc;
+	bool		first = true;
+
+	/* Guard against stack overflow due to recursive event trigger */
+	check_stack_depth();
+
+	/*
+	 * Let's evaluate event triggers in their own memory context, so that any
+	 * leaks get cleaned up promptly.
+	 */
+	context = AllocSetContextCreate(CurrentMemoryContext,
+									"event trigger context",
+									ALLOCSET_DEFAULT_SIZES);
+	oldcontext = MemoryContextSwitchTo(context);
+
+	/* Call each event trigger. */
+	foreach(lc, fn_oid_list)
+	{
+		LOCAL_FCINFO(fcinfo, 0);
+		Oid			fnoid = lfirst_oid(lc);
+		FmgrInfo	flinfo;
+		PgStat_FunctionCallUsage fcusage;
+
+		elog(DEBUG1, "EventTriggerInvoke %u", fnoid);
+
+		/*
+		 * We want each event trigger to be able to see the results of the
+		 * previous event trigger's action.  Caller is responsible for any
+		 * command-counter increment that is needed between the event trigger
+		 * and anything else in the transaction.
+		 */
+		if (first)
+			first = false;
+		else
+			CommandCounterIncrement();
+
+		/* Look up the function */
+		fmgr_info(fnoid, &flinfo);
+
+		/* Call the function, passing no arguments but setting a context. */
+		InitFunctionCallInfoData(*fcinfo, &flinfo, 0,
+								 InvalidOid, (Node *) trigdata, NULL);
+		pgstat_init_function_usage(fcinfo, &fcusage);
+		FunctionCallInvoke(fcinfo);
+		pgstat_end_function_usage(&fcusage, true);
+
+		/* Reclaim memory. */
+		MemoryContextReset(context);
+	}
+
+	/* Restore old memory context and delete the temporary one. */
+	MemoryContextSwitchTo(oldcontext);
+	MemoryContextDelete(context);
+}
+
+/*
+ * Do event triggers support this object type?
+ */
+bool
+EventTriggerSupportsObjectType(ObjectType obtype)
+{
+	switch (obtype)
+	{
+		case OBJECT_DATABASE:
+		case OBJECT_TABLESPACE:
+		case OBJECT_ROLE:
+		case OBJECT_PARAMETER_ACL:
+			/* no support for global objects */
+			return false;
+		case OBJECT_EVENT_TRIGGER:
+			/* no support for event triggers on event triggers */
+			return false;
+		case OBJECT_ACCESS_METHOD:
+		case OBJECT_AGGREGATE:
+		case OBJECT_AMOP:
+		case OBJECT_AMPROC:
+		case OBJECT_ATTRIBUTE:
+		case OBJECT_CAST:
+		case OBJECT_COLUMN:
+		case OBJECT_COLLATION:
+		case OBJECT_CONVERSION:
+		case OBJECT_DEFACL:
+		case OBJECT_DEFAULT:
+		case OBJECT_DOMAIN:
+		case OBJECT_DOMCONSTRAINT:
+		case OBJECT_EXTENSION:
+		case OBJECT_FDW:
+		case OBJECT_FOREIGN_SERVER:
+		case OBJECT_FOREIGN_TABLE:
+		case OBJECT_FUNCTION:
+		case OBJECT_INDEX:
+		case OBJECT_LANGUAGE:
+		case OBJECT_LARGEOBJECT:
+		case OBJECT_MATVIEW:
+		case OBJECT_OPCLASS:
+		case OBJECT_OPERATOR:
+		case OBJECT_OPFAMILY:
+		case OBJECT_POLICY:
+		case OBJECT_PROCEDURE:
+		case OBJECT_PUBLICATION:
+		case OBJECT_PUBLICATION_NAMESPACE:
+		case OBJECT_PUBLICATION_REL:
+		case OBJECT_ROUTINE:
+		case OBJECT_RULE:
+		case OBJECT_SCHEMA:
+		case OBJECT_SEQUENCE:
+		case OBJECT_SUBSCRIPTION:
+		case OBJECT_STATISTIC_EXT:
+		case OBJECT_TABCONSTRAINT:
+		case OBJECT_TABLE:
+		case OBJECT_TRANSFORM:
+		case OBJECT_TRIGGER:
+		case OBJECT_TSCONFIGURATION:
+		case OBJECT_TSDICTIONARY:
+		case OBJECT_TSPARSER:
+		case OBJECT_TSTEMPLATE:
+		case OBJECT_TYPE:
+		case OBJECT_USER_MAPPING:
+		case OBJECT_VIEW:
+			return true;
+
+			/*
+			 * There's intentionally no default: case here; we want the
+			 * compiler to warn if a new ObjectType hasn't been handled above.
+			 */
+	}
+
+	/* Shouldn't get here, but if we do, say "no support" */
+	return false;
+}
+
+/*
+ * Do event triggers support this object class?
+ */
+bool
+EventTriggerSupportsObjectClass(ObjectClass objclass)
+{
+	switch (objclass)
+	{
+		case OCLASS_DATABASE:
+		case OCLASS_TBLSPACE:
+		case OCLASS_ROLE:
+		case OCLASS_PARAMETER_ACL:
+			/* no support for global objects */
+			return false;
+		case OCLASS_EVENT_TRIGGER:
+			/* no support for event triggers on event triggers */
+			return false;
+		case OCLASS_CLASS:
+		case OCLASS_PROC:
+		case OCLASS_TYPE:
+		case OCLASS_CAST:
+		case OCLASS_COLLATION:
+		case OCLASS_CONSTRAINT:
+		case OCLASS_CONVERSION:
+		case OCLASS_DEFAULT:
+		case OCLASS_LANGUAGE:
+		case OCLASS_LARGEOBJECT:
+		case OCLASS_OPERATOR:
+		case OCLASS_OPCLASS:
+		case OCLASS_OPFAMILY:
+		case OCLASS_AM:
+		case OCLASS_AMOP:
+		case OCLASS_AMPROC:
+		case OCLASS_REWRITE:
+		case OCLASS_TRIGGER:
+		case OCLASS_SCHEMA:
+		case OCLASS_STATISTIC_EXT:
+		case OCLASS_TSPARSER:
+		case OCLASS_TSDICT:
+		case OCLASS_TSTEMPLATE:
+		case OCLASS_TSCONFIG:
+		case OCLASS_FDW:
+		case OCLASS_FOREIGN_SERVER:
+		case OCLASS_USER_MAPPING:
+		case OCLASS_DEFACL:
+		case OCLASS_EXTENSION:
+		case OCLASS_POLICY:
+		case OCLASS_PUBLICATION:
+		case OCLASS_PUBLICATION_NAMESPACE:
+		case OCLASS_PUBLICATION_REL:
+		case OCLASS_SUBSCRIPTION:
+		case OCLASS_TRANSFORM:
+			return true;
+
+			/*
+			 * There's intentionally no default: case here; we want the
+			 * compiler to warn if a new OCLASS hasn't been handled above.
+			 */
+	}
+
+	/* Shouldn't get here, but if we do, say "no support" */
+	return false;
+}
+
+/*
+ * Prepare event trigger state for a new complete query to run, if necessary;
+ * returns whether this was done.  If it was, EventTriggerEndCompleteQuery must
+ * be called when the query is done, regardless of whether it succeeds or fails
+ * -- so use of a PG_TRY block is mandatory.
+ */
+bool
+EventTriggerBeginCompleteQuery(void)
+{
+	EventTriggerQueryState *state;
+	MemoryContext cxt;
+
+	/*
+	 * Currently, sql_drop, table_rewrite, ddl_command_end events are the only
+	 * reason to have event trigger state at all; so if there are none, don't
+	 * install one.
+	 */
+	if (!trackDroppedObjectsNeeded())
+		return false;
+
+	cxt = AllocSetContextCreate(TopMemoryContext,
+								"event trigger state",
+								ALLOCSET_DEFAULT_SIZES);
+	state = MemoryContextAlloc(cxt, sizeof(EventTriggerQueryState));
+	state->cxt = cxt;
+	slist_init(&(state->SQLDropList));
+	state->in_sql_drop = false;
+	state->table_rewrite_oid = InvalidOid;
+
+	state->commandCollectionInhibited = currentEventTriggerState ?
+		currentEventTriggerState->commandCollectionInhibited : false;
+	state->currentCommand = NULL;
+	state->commandList = NIL;
+	state->previous = currentEventTriggerState;
+	currentEventTriggerState = state;
+
+	return true;
+}
+
+/*
+ * Query completed (or errored out) -- clean up local state, return to previous
+ * one.
+ *
+ * Note: it's an error to call this routine if EventTriggerBeginCompleteQuery
+ * returned false previously.
+ *
+ * Note: this might be called in the PG_CATCH block of a failing transaction,
+ * so be wary of running anything unnecessary.  (In particular, it's probably
+ * unwise to try to allocate memory.)
+ */
+void
+EventTriggerEndCompleteQuery(void)
+{
+	EventTriggerQueryState *prevstate;
+
+	prevstate = currentEventTriggerState->previous;
+
+	/* this avoids the need for retail pfree of SQLDropList items: */
+	MemoryContextDelete(currentEventTriggerState->cxt);
+
+	currentEventTriggerState = prevstate;
+}
+
+/*
+ * Do we need to keep close track of objects being dropped?
+ *
+ * This is useful because there is a cost to running with them enabled.
+ */
+bool
+trackDroppedObjectsNeeded(void)
+{
+	/*
+	 * true if any sql_drop, table_rewrite, ddl_command_end event trigger
+	 * exists
+	 */
+	return list_length(EventCacheLookup(EVT_SQLDrop)) > 0 ||
+		list_length(EventCacheLookup(EVT_TableRewrite)) > 0 ||
+		list_length(EventCacheLookup(EVT_DDLCommandEnd)) > 0;
+}
+
+/*
+ * Support for dropped objects information on event trigger functions.
+ *
+ * We keep the list of objects dropped by the current command in current
+ * state's SQLDropList (comprising SQLDropObject items).  Each time a new
+ * command is to start, a clean EventTriggerQueryState is created; commands
+ * that drop objects do the dependency.c dance to drop objects, which
+ * populates the current state's SQLDropList; when the event triggers are
+ * invoked they can consume the list via pg_event_trigger_dropped_objects().
+ * When the command finishes, the EventTriggerQueryState is cleared, and
+ * the one from the previous command is restored (when no command is in
+ * execution, the current state is NULL).
+ *
+ * All this lets us support the case that an event trigger function drops
+ * objects "reentrantly".
+ */
+
+/*
+ * Register one object as being dropped by the current command.
+ */
+void
+EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool normal)
+{
+	SQLDropObject *obj;
+	MemoryContext oldcxt;
+
+	if (!currentEventTriggerState)
+		return;
+
+	Assert(EventTriggerSupportsObjectClass(getObjectClass(object)));
+
+	/* don't report temp schemas except my own */
+	if (object->classId == NamespaceRelationId &&
+		(isAnyTempNamespace(object->objectId) &&
+		 !isTempNamespace(object->objectId)))
+		return;
+
+	oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+	obj = palloc0(sizeof(SQLDropObject));
+	obj->address = *object;
+	obj->original = original;
+	obj->normal = normal;
+
+	/*
+	 * Obtain schema names from the object's catalog tuple, if one exists;
+	 * this lets us skip objects in temp schemas.  We trust that
+	 * ObjectProperty contains all object classes that can be
+	 * schema-qualified.
+	 */
+	if (is_objectclass_supported(object->classId))
+	{
+		Relation	catalog;
+		HeapTuple	tuple;
+
+		catalog = table_open(obj->address.classId, AccessShareLock);
+		tuple = get_catalog_object_by_oid(catalog,
+										  get_object_attnum_oid(object->classId),
+										  obj->address.objectId);
+
+		if (tuple)
+		{
+			AttrNumber	attnum;
+			Datum		datum;
+			bool		isnull;
+
+			attnum = get_object_attnum_namespace(obj->address.classId);
+			if (attnum != InvalidAttrNumber)
+			{
+				datum = heap_getattr(tuple, attnum,
+									 RelationGetDescr(catalog), &isnull);
+				if (!isnull)
+				{
+					Oid			namespaceId;
+
+					namespaceId = DatumGetObjectId(datum);
+					/* temp objects are only reported if they are my own */
+					if (isTempNamespace(namespaceId))
+					{
+						obj->schemaname = "pg_temp";
+						obj->istemp = true;
+					}
+					else if (isAnyTempNamespace(namespaceId))
+					{
+						pfree(obj);
+						table_close(catalog, AccessShareLock);
+						MemoryContextSwitchTo(oldcxt);
+						return;
+					}
+					else
+					{
+						obj->schemaname = get_namespace_name(namespaceId);
+						obj->istemp = false;
+					}
+				}
+			}
+
+			if (get_object_namensp_unique(obj->address.classId) &&
+				obj->address.objectSubId == 0)
+			{
+				attnum = get_object_attnum_name(obj->address.classId);
+				if (attnum != InvalidAttrNumber)
+				{
+					datum = heap_getattr(tuple, attnum,
+										 RelationGetDescr(catalog), &isnull);
+					if (!isnull)
+						obj->objname = pstrdup(NameStr(*DatumGetName(datum)));
+				}
+			}
+		}
+
+		table_close(catalog, AccessShareLock);
+	}
+	else
+	{
+		if (object->classId == NamespaceRelationId &&
+			isTempNamespace(object->objectId))
+			obj->istemp = true;
+	}
+
+	/* object identity, objname and objargs */
+	obj->objidentity =
+		getObjectIdentityParts(&obj->address, &obj->addrnames, &obj->addrargs,
+							   false);
+
+	/* object type */
+	obj->objecttype = getObjectTypeDescription(&obj->address, false);
+
+	slist_push_head(&(currentEventTriggerState->SQLDropList), &obj->next);
+
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * pg_event_trigger_dropped_objects
+ *
+ * Make the list of dropped objects available to the user function run by the
+ * Event Trigger.
+ */
+Datum
+pg_event_trigger_dropped_objects(PG_FUNCTION_ARGS)
+{
+	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+	slist_iter	iter;
+
+	/*
+	 * Protect this function from being called out of context
+	 */
+	if (!currentEventTriggerState ||
+		!currentEventTriggerState->in_sql_drop)
+		ereport(ERROR,
+				(errcode(ERRCODE_E_R_I_E_EVENT_TRIGGER_PROTOCOL_VIOLATED),
+				 errmsg("%s can only be called in a sql_drop event trigger function",
+						"pg_event_trigger_dropped_objects()")));
+
+	/* Build tuplestore to hold the result rows */
+	InitMaterializedSRF(fcinfo, 0);
+
+	slist_foreach(iter, &(currentEventTriggerState->SQLDropList))
+	{
+		SQLDropObject *obj;
+		int			i = 0;
+		Datum		values[12];
+		bool		nulls[12];
+
+		obj = slist_container(SQLDropObject, next, iter.cur);
+
+		MemSet(values, 0, sizeof(values));
+		MemSet(nulls, 0, sizeof(nulls));
+
+		/* classid */
+		values[i++] = ObjectIdGetDatum(obj->address.classId);
+
+		/* objid */
+		values[i++] = ObjectIdGetDatum(obj->address.objectId);
+
+		/* objsubid */
+		values[i++] = Int32GetDatum(obj->address.objectSubId);
+
+		/* original */
+		values[i++] = BoolGetDatum(obj->original);
+
+		/* normal */
+		values[i++] = BoolGetDatum(obj->normal);
+
+		/* is_temporary */
+		values[i++] = BoolGetDatum(obj->istemp);
+
+		/* object_type */
+		values[i++] = CStringGetTextDatum(obj->objecttype);
+
+		/* schema_name */
+		if (obj->schemaname)
+			values[i++] = CStringGetTextDatum(obj->schemaname);
+		else
+			nulls[i++] = true;
+
+		/* object_name */
+		if (obj->objname)
+			values[i++] = CStringGetTextDatum(obj->objname);
+		else
+			nulls[i++] = true;
+
+		/* object_identity */
+		if (obj->objidentity)
+			values[i++] = CStringGetTextDatum(obj->objidentity);
+		else
+			nulls[i++] = true;
+
+		/* address_names and address_args */
+		if (obj->addrnames)
+		{
+			values[i++] = PointerGetDatum(strlist_to_textarray(obj->addrnames));
+
+			if (obj->addrargs)
+				values[i++] = PointerGetDatum(strlist_to_textarray(obj->addrargs));
+			else
+				values[i++] = PointerGetDatum(construct_empty_array(TEXTOID));
+		}
+		else
+		{
+			nulls[i++] = true;
+			nulls[i++] = true;
+		}
+
+		tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+							 values, nulls);
+	}
+
+	return (Datum) 0;
+}
+
+/*
+ * pg_event_trigger_table_rewrite_oid
+ *
+ * Make the Oid of the table going to be rewritten available to the user
+ * function run by the Event Trigger.
+ */
+Datum
+pg_event_trigger_table_rewrite_oid(PG_FUNCTION_ARGS)
+{
+	/*
+	 * Protect this function from being called out of context
+	 */
+	if (!currentEventTriggerState ||
+		currentEventTriggerState->table_rewrite_oid == InvalidOid)
+		ereport(ERROR,
+				(errcode(ERRCODE_E_R_I_E_EVENT_TRIGGER_PROTOCOL_VIOLATED),
+				 errmsg("%s can only be called in a table_rewrite event trigger function",
+						"pg_event_trigger_table_rewrite_oid()")));
+
+	PG_RETURN_OID(currentEventTriggerState->table_rewrite_oid);
+}
+
+/*
+ * pg_event_trigger_table_rewrite_reason
+ *
+ * Make the rewrite reason available to the user.
+ */
+Datum
+pg_event_trigger_table_rewrite_reason(PG_FUNCTION_ARGS)
+{
+	/*
+	 * Protect this function from being called out of context
+	 */
+	if (!currentEventTriggerState ||
+		currentEventTriggerState->table_rewrite_reason == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_E_R_I_E_EVENT_TRIGGER_PROTOCOL_VIOLATED),
+				 errmsg("%s can only be called in a table_rewrite event trigger function",
+						"pg_event_trigger_table_rewrite_reason()")));
+
+	PG_RETURN_INT32(currentEventTriggerState->table_rewrite_reason);
+}
+
+/*-------------------------------------------------------------------------
+ * Support for DDL command deparsing
+ *
+ * The routines below enable an event trigger function to obtain a list of
+ * DDL commands as they are executed.  There are three main pieces to this
+ * feature:
+ *
+ * 1) Within ProcessUtilitySlow, or some sub-routine thereof, each DDL command
+ * adds a struct CollectedCommand representation of itself to the command list,
+ * using the routines below.
+ *
+ * 2) Some time after that, ddl_command_end fires and the command list is made
+ * available to the event trigger function via pg_event_trigger_ddl_commands();
+ * the complete command details are exposed as a column of type pg_ddl_command.
+ *
+ * 3) An extension can install a function capable of taking a value of type
+ * pg_ddl_command and transform it into some external, user-visible and/or
+ * -modifiable representation.
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Inhibit DDL command collection.
+ */
+void
+EventTriggerInhibitCommandCollection(void)
+{
+	if (!currentEventTriggerState)
+		return;
+
+	currentEventTriggerState->commandCollectionInhibited = true;
+}
+
+/*
+ * Re-establish DDL command collection.
+ */
+void
+EventTriggerUndoInhibitCommandCollection(void)
+{
+	if (!currentEventTriggerState)
+		return;
+
+	currentEventTriggerState->commandCollectionInhibited = false;
+}
+
+/*
+ * EventTriggerCollectSimpleCommand
+ *		Save data about a simple DDL command that was just executed
+ *
+ * address identifies the object being operated on.  secondaryObject is an
+ * object address that was related in some way to the executed command; its
+ * meaning is command-specific.
+ *
+ * For instance, for an ALTER obj SET SCHEMA command, objtype is the type of
+ * object being moved, objectId is its OID, and secondaryOid is the OID of the
+ * old schema.  (The destination schema OID can be obtained by catalog lookup
+ * of the object.)
+ */
+void
+EventTriggerCollectSimpleCommand(ObjectAddress address,
+								 ObjectAddress secondaryObject,
+								 Node *parsetree)
+{
+	MemoryContext oldcxt;
+	CollectedCommand *command;
+
+	/* ignore if event trigger context not set, or collection disabled */
+	if (!currentEventTriggerState ||
+		currentEventTriggerState->commandCollectionInhibited)
+		return;
+
+	oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+	command = palloc(sizeof(CollectedCommand));
+
+	command->type = SCT_Simple;
+	command->in_extension = creating_extension;
+
+	command->d.simple.address = address;
+	command->d.simple.secondaryObject = secondaryObject;
+	command->parsetree = copyObject(parsetree);
+
+	currentEventTriggerState->commandList = lappend(currentEventTriggerState->commandList,
+													command);
+
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * EventTriggerAlterTableStart
+ *		Prepare to receive data on an ALTER TABLE command about to be executed
+ *
+ * Note we don't collect the command immediately; instead we keep it in
+ * currentCommand, and only when we're done processing the subcommands we will
+ * add it to the command list.
+ */
+void
+EventTriggerAlterTableStart(Node *parsetree)
+{
+	MemoryContext oldcxt;
+	CollectedCommand *command;
+
+	/* ignore if event trigger context not set, or collection disabled */
+	if (!currentEventTriggerState ||
+		currentEventTriggerState->commandCollectionInhibited)
+		return;
+
+	oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+	command = palloc(sizeof(CollectedCommand));
+
+	command->type = SCT_AlterTable;
+	command->in_extension = creating_extension;
+
+	command->d.alterTable.classId = RelationRelationId;
+	command->d.alterTable.objectId = InvalidOid;
+	command->d.alterTable.subcmds = NIL;
+	command->parsetree = copyObject(parsetree);
+
+	command->parent = currentEventTriggerState->currentCommand;
+	currentEventTriggerState->currentCommand = command;
+
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * Remember the OID of the object being affected by an ALTER TABLE.
+ *
+ * This is needed because in some cases we don't know the OID until later.
+ */
+void
+EventTriggerAlterTableRelid(Oid objectId)
+{
+	if (!currentEventTriggerState ||
+		currentEventTriggerState->commandCollectionInhibited)
+		return;
+
+	currentEventTriggerState->currentCommand->d.alterTable.objectId = objectId;
+}
+
+/*
+ * EventTriggerCollectAlterTableSubcmd
+ *		Save data about a single part of an ALTER TABLE.
+ *
+ * Several different commands go through this path, but apart from ALTER TABLE
+ * itself, they are all concerned with AlterTableCmd nodes that are generated
+ * internally, so that's all that this code needs to handle at the moment.
+ */
+void
+EventTriggerCollectAlterTableSubcmd(Node *subcmd, ObjectAddress address)
+{
+	MemoryContext oldcxt;
+	CollectedATSubcmd *newsub;
+
+	/* ignore if event trigger context not set, or collection disabled */
+	if (!currentEventTriggerState ||
+		currentEventTriggerState->commandCollectionInhibited)
+		return;
+
+	Assert(IsA(subcmd, AlterTableCmd));
+	Assert(currentEventTriggerState->currentCommand != NULL);
+	Assert(OidIsValid(currentEventTriggerState->currentCommand->d.alterTable.objectId));
+
+	oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+	newsub = palloc(sizeof(CollectedATSubcmd));
+	newsub->address = address;
+	newsub->parsetree = copyObject(subcmd);
+
+	currentEventTriggerState->currentCommand->d.alterTable.subcmds =
+		lappend(currentEventTriggerState->currentCommand->d.alterTable.subcmds, newsub);
+
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * EventTriggerAlterTableEnd
+ *		Finish up saving an ALTER TABLE command, and add it to command list.
+ *
+ * FIXME this API isn't considering the possibility that an xact/subxact is
+ * aborted partway through.  Probably it's best to add an
+ * AtEOSubXact_EventTriggers() to fix this.
+ */
+void
+EventTriggerAlterTableEnd(void)
+{
+	CollectedCommand *parent;
+
+	/* ignore if event trigger context not set, or collection disabled */
+	if (!currentEventTriggerState ||
+		currentEventTriggerState->commandCollectionInhibited)
+		return;
+
+	parent = currentEventTriggerState->currentCommand->parent;
+
+	/* If no subcommands, don't collect */
+	if (list_length(currentEventTriggerState->currentCommand->d.alterTable.subcmds) != 0)
+	{
+		MemoryContext oldcxt;
+
+		oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+		currentEventTriggerState->commandList =
+			lappend(currentEventTriggerState->commandList,
+					currentEventTriggerState->currentCommand);
+
+		MemoryContextSwitchTo(oldcxt);
+	}
+	else
+		pfree(currentEventTriggerState->currentCommand);
+
+	currentEventTriggerState->currentCommand = parent;
+}
+
+/*
+ * EventTriggerCollectGrant
+ *		Save data about a GRANT/REVOKE command being executed
+ *
+ * This function creates a copy of the InternalGrant, as the original might
+ * not have the right lifetime.
+ */
+void
+EventTriggerCollectGrant(InternalGrant *istmt)
+{
+	MemoryContext oldcxt;
+	CollectedCommand *command;
+	InternalGrant *icopy;
+	ListCell   *cell;
+
+	/* ignore if event trigger context not set, or collection disabled */
+	if (!currentEventTriggerState ||
+		currentEventTriggerState->commandCollectionInhibited)
+		return;
+
+	oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+	/*
+	 * This is tedious, but necessary.
+	 */
+	icopy = palloc(sizeof(InternalGrant));
+	memcpy(icopy, istmt, sizeof(InternalGrant));
+	icopy->objects = list_copy(istmt->objects);
+	icopy->grantees = list_copy(istmt->grantees);
+	icopy->col_privs = NIL;
+	foreach(cell, istmt->col_privs)
+		icopy->col_privs = lappend(icopy->col_privs, copyObject(lfirst(cell)));
+
+	/* Now collect it, using the copied InternalGrant */
+	command = palloc(sizeof(CollectedCommand));
+	command->type = SCT_Grant;
+	command->in_extension = creating_extension;
+	command->d.grant.istmt = icopy;
+	command->parsetree = NULL;
+
+	currentEventTriggerState->commandList =
+		lappend(currentEventTriggerState->commandList, command);
+
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * EventTriggerCollectAlterOpFam
+ *		Save data about an ALTER OPERATOR FAMILY ADD/DROP command being
+ *		executed
+ */
+void
+EventTriggerCollectAlterOpFam(AlterOpFamilyStmt *stmt, Oid opfamoid,
+							  List *operators, List *procedures)
+{
+	MemoryContext oldcxt;
+	CollectedCommand *command;
+
+	/* ignore if event trigger context not set, or collection disabled */
+	if (!currentEventTriggerState ||
+		currentEventTriggerState->commandCollectionInhibited)
+		return;
+
+	oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+	command = palloc(sizeof(CollectedCommand));
+	command->type = SCT_AlterOpFamily;
+	command->in_extension = creating_extension;
+	ObjectAddressSet(command->d.opfam.address,
+					 OperatorFamilyRelationId, opfamoid);
+	command->d.opfam.operators = operators;
+	command->d.opfam.procedures = procedures;
+	command->parsetree = (Node *) copyObject(stmt);
+
+	currentEventTriggerState->commandList =
+		lappend(currentEventTriggerState->commandList, command);
+
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * EventTriggerCollectCreateOpClass
+ *		Save data about a CREATE OPERATOR CLASS command being executed
+ */
+void
+EventTriggerCollectCreateOpClass(CreateOpClassStmt *stmt, Oid opcoid,
+								 List *operators, List *procedures)
+{
+	MemoryContext oldcxt;
+	CollectedCommand *command;
+
+	/* ignore if event trigger context not set, or collection disabled */
+	if (!currentEventTriggerState ||
+		currentEventTriggerState->commandCollectionInhibited)
+		return;
+
+	oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+	command = palloc0(sizeof(CollectedCommand));
+	command->type = SCT_CreateOpClass;
+	command->in_extension = creating_extension;
+	ObjectAddressSet(command->d.createopc.address,
+					 OperatorClassRelationId, opcoid);
+	command->d.createopc.operators = operators;
+	command->d.createopc.procedures = procedures;
+	command->parsetree = (Node *) copyObject(stmt);
+
+	currentEventTriggerState->commandList =
+		lappend(currentEventTriggerState->commandList, command);
+
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * EventTriggerCollectAlterTSConfig
+ *		Save data about an ALTER TEXT SEARCH CONFIGURATION command being
+ *		executed
+ */
+void
+EventTriggerCollectAlterTSConfig(AlterTSConfigurationStmt *stmt, Oid cfgId,
+								 Oid *dictIds, int ndicts)
+{
+	MemoryContext oldcxt;
+	CollectedCommand *command;
+
+	/* ignore if event trigger context not set, or collection disabled */
+	if (!currentEventTriggerState ||
+		currentEventTriggerState->commandCollectionInhibited)
+		return;
+
+	oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+	command = palloc0(sizeof(CollectedCommand));
+	command->type = SCT_AlterTSConfig;
+	command->in_extension = creating_extension;
+	ObjectAddressSet(command->d.atscfg.address,
+					 TSConfigRelationId, cfgId);
+	command->d.atscfg.dictIds = palloc(sizeof(Oid) * ndicts);
+	memcpy(command->d.atscfg.dictIds, dictIds, sizeof(Oid) * ndicts);
+	command->d.atscfg.ndicts = ndicts;
+	command->parsetree = (Node *) copyObject(stmt);
+
+	currentEventTriggerState->commandList =
+		lappend(currentEventTriggerState->commandList, command);
+
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * EventTriggerCollectAlterDefPrivs
+ *		Save data about an ALTER DEFAULT PRIVILEGES command being
+ *		executed
+ */
+void
+EventTriggerCollectAlterDefPrivs(AlterDefaultPrivilegesStmt *stmt)
+{
+	MemoryContext oldcxt;
+	CollectedCommand *command;
+
+	/* ignore if event trigger context not set, or collection disabled */
+	if (!currentEventTriggerState ||
+		currentEventTriggerState->commandCollectionInhibited)
+		return;
+
+	oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+	command = palloc0(sizeof(CollectedCommand));
+	command->type = SCT_AlterDefaultPrivileges;
+	command->d.defprivs.objtype = stmt->action->objtype;
+	command->in_extension = creating_extension;
+	command->parsetree = (Node *) copyObject(stmt);
+
+	currentEventTriggerState->commandList =
+		lappend(currentEventTriggerState->commandList, command);
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * In a ddl_command_end event trigger, this function reports the DDL commands
+ * being run.
+ */
+Datum
+pg_event_trigger_ddl_commands(PG_FUNCTION_ARGS)
+{
+	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+	ListCell   *lc;
+
+	/*
+	 * Protect this function from being called out of context
+	 */
+	if (!currentEventTriggerState)
+		ereport(ERROR,
+				(errcode(ERRCODE_E_R_I_E_EVENT_TRIGGER_PROTOCOL_VIOLATED),
+				 errmsg("%s can only be called in an event trigger function",
+						"pg_event_trigger_ddl_commands()")));
+
+	/* Build tuplestore to hold the result rows */
+	InitMaterializedSRF(fcinfo, 0);
+
+	foreach(lc, currentEventTriggerState->commandList)
+	{
+		CollectedCommand *cmd = lfirst(lc);
+		Datum		values[9];
+		bool		nulls[9];
+		ObjectAddress addr;
+		int			i = 0;
+
+		/*
+		 * For IF NOT EXISTS commands that attempt to create an existing
+		 * object, the returned OID is Invalid.  Don't return anything.
+		 *
+		 * One might think that a viable alternative would be to look up the
+		 * Oid of the existing object and run the deparse with that.  But
+		 * since the parse tree might be different from the one that created
+		 * the object in the first place, we might not end up in a consistent
+		 * state anyway.
+		 */
+		if (cmd->type == SCT_Simple &&
+			!OidIsValid(cmd->d.simple.address.objectId))
+			continue;
+
+		MemSet(nulls, 0, sizeof(nulls));
+
+		switch (cmd->type)
+		{
+			case SCT_Simple:
+			case SCT_AlterTable:
+			case SCT_AlterOpFamily:
+			case SCT_CreateOpClass:
+			case SCT_AlterTSConfig:
+				{
+					char	   *identity;
+					char	   *type;
+					char	   *schema = NULL;
+
+					if (cmd->type == SCT_Simple)
+						addr = cmd->d.simple.address;
+					else if (cmd->type == SCT_AlterTable)
+						ObjectAddressSet(addr,
+										 cmd->d.alterTable.classId,
+										 cmd->d.alterTable.objectId);
+					else if (cmd->type == SCT_AlterOpFamily)
+						addr = cmd->d.opfam.address;
+					else if (cmd->type == SCT_CreateOpClass)
+						addr = cmd->d.createopc.address;
+					else if (cmd->type == SCT_AlterTSConfig)
+						addr = cmd->d.atscfg.address;
+
+					/*
+					 * If an object was dropped in the same command we may end
+					 * up in a situation where we generated a message but can
+					 * no longer look for the object information, so skip it
+					 * rather than failing.  This can happen for example with
+					 * some subcommand combinations of ALTER TABLE.
+					 */
+					identity = getObjectIdentity(&addr, true);
+					if (identity == NULL)
+						continue;
+
+					/* The type can never be NULL. */
+					type = getObjectTypeDescription(&addr, true);
+
+					/*
+					 * Obtain schema name, if any ("pg_temp" if a temp
+					 * object). If the object class is not in the supported
+					 * list here, we assume it's a schema-less object type,
+					 * and thus "schema" remains set to NULL.
+					 */
+					if (is_objectclass_supported(addr.classId))
+					{
+						AttrNumber	nspAttnum;
+
+						nspAttnum = get_object_attnum_namespace(addr.classId);
+						if (nspAttnum != InvalidAttrNumber)
+						{
+							Relation	catalog;
+							HeapTuple	objtup;
+							Oid			schema_oid;
+							bool		isnull;
+
+							catalog = table_open(addr.classId, AccessShareLock);
+							objtup = get_catalog_object_by_oid(catalog,
+															   get_object_attnum_oid(addr.classId),
+															   addr.objectId);
+							if (!HeapTupleIsValid(objtup))
+								elog(ERROR, "cache lookup failed for object %u/%u",
+									 addr.classId, addr.objectId);
+							schema_oid =
+								heap_getattr(objtup, nspAttnum,
+											 RelationGetDescr(catalog), &isnull);
+							if (isnull)
+								elog(ERROR,
+									 "invalid null namespace in object %u/%u/%d",
+									 addr.classId, addr.objectId, addr.objectSubId);
+							schema = get_namespace_name_or_temp(schema_oid);
+
+							table_close(catalog, AccessShareLock);
+						}
+					}
+
+					/* classid */
+					values[i++] = ObjectIdGetDatum(addr.classId);
+					/* objid */
+					values[i++] = ObjectIdGetDatum(addr.objectId);
+					/* objsubid */
+					values[i++] = Int32GetDatum(addr.objectSubId);
+					/* command tag */
+					values[i++] = CStringGetTextDatum(CreateCommandName(cmd->parsetree));
+					/* object_type */
+					values[i++] = CStringGetTextDatum(type);
+					/* schema */
+					if (schema == NULL)
+						nulls[i++] = true;
+					else
+						values[i++] = CStringGetTextDatum(schema);
+					/* identity */
+					values[i++] = CStringGetTextDatum(identity);
+					/* in_extension */
+					values[i++] = BoolGetDatum(cmd->in_extension);
+					/* command */
+					values[i++] = PointerGetDatum(cmd);
+				}
+				break;
+
+			case SCT_AlterDefaultPrivileges:
+				/* classid */
+				nulls[i++] = true;
+				/* objid */
+				nulls[i++] = true;
+				/* objsubid */
+				nulls[i++] = true;
+				/* command tag */
+				values[i++] = CStringGetTextDatum(CreateCommandName(cmd->parsetree));
+				/* object_type */
+				values[i++] = CStringGetTextDatum(stringify_adefprivs_objtype(cmd->d.defprivs.objtype));
+				/* schema */
+				nulls[i++] = true;
+				/* identity */
+				nulls[i++] = true;
+				/* in_extension */
+				values[i++] = BoolGetDatum(cmd->in_extension);
+				/* command */
+				values[i++] = PointerGetDatum(cmd);
+				break;
+
+			case SCT_Grant:
+				/* classid */
+				nulls[i++] = true;
+				/* objid */
+				nulls[i++] = true;
+				/* objsubid */
+				nulls[i++] = true;
+				/* command tag */
+				values[i++] = CStringGetTextDatum(cmd->d.grant.istmt->is_grant ?
+												  "GRANT" : "REVOKE");
+				/* object_type */
+				values[i++] = CStringGetTextDatum(stringify_grant_objtype(cmd->d.grant.istmt->objtype));
+				/* schema */
+				nulls[i++] = true;
+				/* identity */
+				nulls[i++] = true;
+				/* in_extension */
+				values[i++] = BoolGetDatum(cmd->in_extension);
+				/* command */
+				values[i++] = PointerGetDatum(cmd);
+				break;
+		}
+
+		tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+							 values, nulls);
+	}
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * Return the ObjectType as a string, as it would appear in GRANT and
+ * REVOKE commands.
+ */
+static const char *
+stringify_grant_objtype(ObjectType objtype)
+{
+	switch (objtype)
+	{
+		case OBJECT_COLUMN:
+			return "COLUMN";
+		case OBJECT_TABLE:
+			return "TABLE";
+		case OBJECT_SEQUENCE:
+			return "SEQUENCE";
+		case OBJECT_DATABASE:
+			return "DATABASE";
+		case OBJECT_DOMAIN:
+			return "DOMAIN";
+		case OBJECT_FDW:
+			return "FOREIGN DATA WRAPPER";
+		case OBJECT_FOREIGN_SERVER:
+			return "FOREIGN SERVER";
+		case OBJECT_FUNCTION:
+			return "FUNCTION";
+		case OBJECT_LANGUAGE:
+			return "LANGUAGE";
+		case OBJECT_LARGEOBJECT:
+			return "LARGE OBJECT";
+		case OBJECT_SCHEMA:
+			return "SCHEMA";
+		case OBJECT_PARAMETER_ACL:
+			return "PARAMETER";
+		case OBJECT_PROCEDURE:
+			return "PROCEDURE";
+		case OBJECT_ROUTINE:
+			return "ROUTINE";
+		case OBJECT_TABLESPACE:
+			return "TABLESPACE";
+		case OBJECT_TYPE:
+			return "TYPE";
+			/* these currently aren't used */
+		case OBJECT_ACCESS_METHOD:
+		case OBJECT_AGGREGATE:
+		case OBJECT_AMOP:
+		case OBJECT_AMPROC:
+		case OBJECT_ATTRIBUTE:
+		case OBJECT_CAST:
+		case OBJECT_COLLATION:
+		case OBJECT_CONVERSION:
+		case OBJECT_DEFAULT:
+		case OBJECT_DEFACL:
+		case OBJECT_DOMCONSTRAINT:
+		case OBJECT_EVENT_TRIGGER:
+		case OBJECT_EXTENSION:
+		case OBJECT_FOREIGN_TABLE:
+		case OBJECT_INDEX:
+		case OBJECT_MATVIEW:
+		case OBJECT_OPCLASS:
+		case OBJECT_OPERATOR:
+		case OBJECT_OPFAMILY:
+		case OBJECT_POLICY:
+		case OBJECT_PUBLICATION:
+		case OBJECT_PUBLICATION_NAMESPACE:
+		case OBJECT_PUBLICATION_REL:
+		case OBJECT_ROLE:
+		case OBJECT_RULE:
+		case OBJECT_STATISTIC_EXT:
+		case OBJECT_SUBSCRIPTION:
+		case OBJECT_TABCONSTRAINT:
+		case OBJECT_TRANSFORM:
+		case OBJECT_TRIGGER:
+		case OBJECT_TSCONFIGURATION:
+		case OBJECT_TSDICTIONARY:
+		case OBJECT_TSPARSER:
+		case OBJECT_TSTEMPLATE:
+		case OBJECT_USER_MAPPING:
+		case OBJECT_VIEW:
+			elog(ERROR, "unsupported object type: %d", (int) objtype);
+	}
+
+	return "???";				/* keep compiler quiet */
+}
+
+/*
+ * Return the ObjectType as a string; as above, but use the spelling
+ * in ALTER DEFAULT PRIVILEGES commands instead.  Generally this is just
+ * the plural.
+ */
+static const char *
+stringify_adefprivs_objtype(ObjectType objtype)
+{
+	switch (objtype)
+	{
+		case OBJECT_COLUMN:
+			return "COLUMNS";
+		case OBJECT_TABLE:
+			return "TABLES";
+		case OBJECT_SEQUENCE:
+			return "SEQUENCES";
+		case OBJECT_DATABASE:
+			return "DATABASES";
+		case OBJECT_DOMAIN:
+			return "DOMAINS";
+		case OBJECT_FDW:
+			return "FOREIGN DATA WRAPPERS";
+		case OBJECT_FOREIGN_SERVER:
+			return "FOREIGN SERVERS";
+		case OBJECT_FUNCTION:
+			return "FUNCTIONS";
+		case OBJECT_LANGUAGE:
+			return "LANGUAGES";
+		case OBJECT_LARGEOBJECT:
+			return "LARGE OBJECTS";
+		case OBJECT_SCHEMA:
+			return "SCHEMAS";
+		case OBJECT_PROCEDURE:
+			return "PROCEDURES";
+		case OBJECT_ROUTINE:
+			return "ROUTINES";
+		case OBJECT_TABLESPACE:
+			return "TABLESPACES";
+		case OBJECT_TYPE:
+			return "TYPES";
+			/* these currently aren't used */
+		case OBJECT_ACCESS_METHOD:
+		case OBJECT_AGGREGATE:
+		case OBJECT_AMOP:
+		case OBJECT_AMPROC:
+		case OBJECT_ATTRIBUTE:
+		case OBJECT_CAST:
+		case OBJECT_COLLATION:
+		case OBJECT_CONVERSION:
+		case OBJECT_DEFAULT:
+		case OBJECT_DEFACL:
+		case OBJECT_DOMCONSTRAINT:
+		case OBJECT_EVENT_TRIGGER:
+		case OBJECT_EXTENSION:
+		case OBJECT_FOREIGN_TABLE:
+		case OBJECT_INDEX:
+		case OBJECT_MATVIEW:
+		case OBJECT_OPCLASS:
+		case OBJECT_OPERATOR:
+		case OBJECT_OPFAMILY:
+		case OBJECT_PARAMETER_ACL:
+		case OBJECT_POLICY:
+		case OBJECT_PUBLICATION:
+		case OBJECT_PUBLICATION_NAMESPACE:
+		case OBJECT_PUBLICATION_REL:
+		case OBJECT_ROLE:
+		case OBJECT_RULE:
+		case OBJECT_STATISTIC_EXT:
+		case OBJECT_SUBSCRIPTION:
+		case OBJECT_TABCONSTRAINT:
+		case OBJECT_TRANSFORM:
+		case OBJECT_TRIGGER:
+		case OBJECT_TSCONFIGURATION:
+		case OBJECT_TSDICTIONARY:
+		case OBJECT_TSPARSER:
+		case OBJECT_TSTEMPLATE:
+		case OBJECT_USER_MAPPING:
+		case OBJECT_VIEW:
+			elog(ERROR, "unsupported object type: %d", (int) objtype);
+	}
+
+	return "???";				/* keep compiler quiet */
+}
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
new file mode 100644
index 0000000..060c618
--- /dev/null
+++ b/src/backend/commands/explain.c
@@ -0,0 +1,5022 @@
+/*-------------------------------------------------------------------------
+ *
+ * explain.c
+ *	  Explain query execution plans
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994-5, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/explain.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/xact.h"
+#include "catalog/pg_type.h"
+#include "commands/createas.h"
+#include "commands/defrem.h"
+#include "commands/prepare.h"
+#include "executor/nodeHash.h"
+#include "foreign/fdwapi.h"
+#include "jit/jit.h"
+#include "nodes/extensible.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/analyze.h"
+#include "parser/parsetree.h"
+#include "rewrite/rewriteHandler.h"
+#include "storage/bufmgr.h"
+#include "tcop/tcopprot.h"
+#include "utils/builtins.h"
+#include "utils/guc_tables.h"
+#include "utils/json.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/ruleutils.h"
+#include "utils/snapmgr.h"
+#include "utils/tuplesort.h"
+#include "utils/typcache.h"
+#include "utils/xml.h"
+
+
+/* Hook for plugins to get control in ExplainOneQuery() */
+ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL;
+
+/* Hook for plugins to get control in explain_get_index_name() */
+explain_get_index_name_hook_type explain_get_index_name_hook = NULL;
+
+
+/* OR-able flags for ExplainXMLTag() */
+#define X_OPENING 0
+#define X_CLOSING 1
+#define X_CLOSE_IMMEDIATE 2
+#define X_NOWHITESPACE 4
+
+static void ExplainOneQuery(Query *query, int cursorOptions,
+							IntoClause *into, ExplainState *es,
+							const char *queryString, ParamListInfo params,
+							QueryEnvironment *queryEnv);
+static void ExplainPrintJIT(ExplainState *es, int jit_flags,
+							JitInstrumentation *ji);
+static void report_triggers(ResultRelInfo *rInfo, bool show_relname,
+							ExplainState *es);
+static double elapsed_time(instr_time *starttime);
+static bool ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used);
+static void ExplainNode(PlanState *planstate, List *ancestors,
+						const char *relationship, const char *plan_name,
+						ExplainState *es);
+static void show_plan_tlist(PlanState *planstate, List *ancestors,
+							ExplainState *es);
+static void show_expression(Node *node, const char *qlabel,
+							PlanState *planstate, List *ancestors,
+							bool useprefix, ExplainState *es);
+static void show_qual(List *qual, const char *qlabel,
+					  PlanState *planstate, List *ancestors,
+					  bool useprefix, ExplainState *es);
+static void show_scan_qual(List *qual, const char *qlabel,
+						   PlanState *planstate, List *ancestors,
+						   ExplainState *es);
+static void show_upper_qual(List *qual, const char *qlabel,
+							PlanState *planstate, List *ancestors,
+							ExplainState *es);
+static void show_sort_keys(SortState *sortstate, List *ancestors,
+						   ExplainState *es);
+static void show_incremental_sort_keys(IncrementalSortState *incrsortstate,
+									   List *ancestors, ExplainState *es);
+static void show_merge_append_keys(MergeAppendState *mstate, List *ancestors,
+								   ExplainState *es);
+static void show_agg_keys(AggState *astate, List *ancestors,
+						  ExplainState *es);
+static void show_grouping_sets(PlanState *planstate, Agg *agg,
+							   List *ancestors, ExplainState *es);
+static void show_grouping_set_keys(PlanState *planstate,
+								   Agg *aggnode, Sort *sortnode,
+								   List *context, bool useprefix,
+								   List *ancestors, ExplainState *es);
+static void show_group_keys(GroupState *gstate, List *ancestors,
+							ExplainState *es);
+static void show_sort_group_keys(PlanState *planstate, const char *qlabel,
+								 int nkeys, int nPresortedKeys, AttrNumber *keycols,
+								 Oid *sortOperators, Oid *collations, bool *nullsFirst,
+								 List *ancestors, ExplainState *es);
+static void show_sortorder_options(StringInfo buf, Node *sortexpr,
+								   Oid sortOperator, Oid collation, bool nullsFirst);
+static void show_tablesample(TableSampleClause *tsc, PlanState *planstate,
+							 List *ancestors, ExplainState *es);
+static void show_sort_info(SortState *sortstate, ExplainState *es);
+static void show_incremental_sort_info(IncrementalSortState *incrsortstate,
+									   ExplainState *es);
+static void show_hash_info(HashState *hashstate, ExplainState *es);
+static void show_memoize_info(MemoizeState *mstate, List *ancestors,
+							  ExplainState *es);
+static void show_hashagg_info(AggState *hashstate, ExplainState *es);
+static void show_tidbitmap_info(BitmapHeapScanState *planstate,
+								ExplainState *es);
+static void show_instrumentation_count(const char *qlabel, int which,
+									   PlanState *planstate, ExplainState *es);
+static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es);
+static void show_eval_params(Bitmapset *bms_params, ExplainState *es);
+static const char *explain_get_index_name(Oid indexId);
+static void show_buffer_usage(ExplainState *es, const BufferUsage *usage,
+							  bool planning);
+static void show_wal_usage(ExplainState *es, const WalUsage *usage);
+static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir,
+									ExplainState *es);
+static void ExplainScanTarget(Scan *plan, ExplainState *es);
+static void ExplainModifyTarget(ModifyTable *plan, ExplainState *es);
+static void ExplainTargetRel(Plan *plan, Index rti, ExplainState *es);
+static void show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
+								  ExplainState *es);
+static void ExplainMemberNodes(PlanState **planstates, int nplans,
+							   List *ancestors, ExplainState *es);
+static void ExplainMissingMembers(int nplans, int nchildren, ExplainState *es);
+static void ExplainSubPlans(List *plans, List *ancestors,
+							const char *relationship, ExplainState *es);
+static void ExplainCustomChildren(CustomScanState *css,
+								  List *ancestors, ExplainState *es);
+static ExplainWorkersState *ExplainCreateWorkersState(int num_workers);
+static void ExplainOpenWorker(int n, ExplainState *es);
+static void ExplainCloseWorker(int n, ExplainState *es);
+static void ExplainFlushWorkersState(ExplainState *es);
+static void ExplainProperty(const char *qlabel, const char *unit,
+							const char *value, bool numeric, ExplainState *es);
+static void ExplainOpenSetAsideGroup(const char *objtype, const char *labelname,
+									 bool labeled, int depth, ExplainState *es);
+static void ExplainSaveGroup(ExplainState *es, int depth, int *state_save);
+static void ExplainRestoreGroup(ExplainState *es, int depth, int *state_save);
+static void ExplainDummyGroup(const char *objtype, const char *labelname,
+							  ExplainState *es);
+static void ExplainXMLTag(const char *tagname, int flags, ExplainState *es);
+static void ExplainIndentText(ExplainState *es);
+static void ExplainJSONLineEnding(ExplainState *es);
+static void ExplainYAMLLineStarting(ExplainState *es);
+static void escape_yaml(StringInfo buf, const char *str);
+
+
+
+/*
+ * ExplainQuery -
+ *	  execute an EXPLAIN command
+ */
+void
+ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
+			 ParamListInfo params, DestReceiver *dest)
+{
+	ExplainState *es = NewExplainState();
+	TupOutputState *tstate;
+	JumbleState *jstate = NULL;
+	Query	   *query;
+	List	   *rewritten;
+	ListCell   *lc;
+	bool		timing_set = false;
+	bool		summary_set = false;
+
+	/* Parse options list. */
+	foreach(lc, stmt->options)
+	{
+		DefElem    *opt = (DefElem *) lfirst(lc);
+
+		if (strcmp(opt->defname, "analyze") == 0)
+			es->analyze = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "verbose") == 0)
+			es->verbose = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "costs") == 0)
+			es->costs = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "buffers") == 0)
+			es->buffers = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "wal") == 0)
+			es->wal = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "settings") == 0)
+			es->settings = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "timing") == 0)
+		{
+			timing_set = true;
+			es->timing = defGetBoolean(opt);
+		}
+		else if (strcmp(opt->defname, "summary") == 0)
+		{
+			summary_set = true;
+			es->summary = defGetBoolean(opt);
+		}
+		else if (strcmp(opt->defname, "format") == 0)
+		{
+			char	   *p = defGetString(opt);
+
+			if (strcmp(p, "text") == 0)
+				es->format = EXPLAIN_FORMAT_TEXT;
+			else if (strcmp(p, "xml") == 0)
+				es->format = EXPLAIN_FORMAT_XML;
+			else if (strcmp(p, "json") == 0)
+				es->format = EXPLAIN_FORMAT_JSON;
+			else if (strcmp(p, "yaml") == 0)
+				es->format = EXPLAIN_FORMAT_YAML;
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("unrecognized value for EXPLAIN option \"%s\": \"%s\"",
+								opt->defname, p),
+						 parser_errposition(pstate, opt->location)));
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized EXPLAIN option \"%s\"",
+							opt->defname),
+					 parser_errposition(pstate, opt->location)));
+	}
+
+	if (es->wal && !es->analyze)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("EXPLAIN option WAL requires ANALYZE")));
+
+	/* if the timing was not set explicitly, set default value */
+	es->timing = (timing_set) ? es->timing : es->analyze;
+
+	/* check that timing is used with EXPLAIN ANALYZE */
+	if (es->timing && !es->analyze)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("EXPLAIN option TIMING requires ANALYZE")));
+
+	/* if the summary was not set explicitly, set default value */
+	es->summary = (summary_set) ? es->summary : es->analyze;
+
+	query = castNode(Query, stmt->query);
+	if (IsQueryIdEnabled())
+		jstate = JumbleQuery(query, pstate->p_sourcetext);
+
+	if (post_parse_analyze_hook)
+		(*post_parse_analyze_hook) (pstate, query, jstate);
+
+	/*
+	 * Parse analysis was done already, but we still have to run the rule
+	 * rewriter.  We do not do AcquireRewriteLocks: we assume the query either
+	 * came straight from the parser, or suitable locks were acquired by
+	 * plancache.c.
+	 */
+	rewritten = QueryRewrite(castNode(Query, stmt->query));
+
+	/* emit opening boilerplate */
+	ExplainBeginOutput(es);
+
+	if (rewritten == NIL)
+	{
+		/*
+		 * In the case of an INSTEAD NOTHING, tell at least that.  But in
+		 * non-text format, the output is delimited, so this isn't necessary.
+		 */
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+			appendStringInfoString(es->str, "Query rewrites to nothing\n");
+	}
+	else
+	{
+		ListCell   *l;
+
+		/* Explain every plan */
+		foreach(l, rewritten)
+		{
+			ExplainOneQuery(lfirst_node(Query, l),
+							CURSOR_OPT_PARALLEL_OK, NULL, es,
+							pstate->p_sourcetext, params, pstate->p_queryEnv);
+
+			/* Separate plans with an appropriate separator */
+			if (lnext(rewritten, l) != NULL)
+				ExplainSeparatePlans(es);
+		}
+	}
+
+	/* emit closing boilerplate */
+	ExplainEndOutput(es);
+	Assert(es->indent == 0);
+
+	/* output tuples */
+	tstate = begin_tup_output_tupdesc(dest, ExplainResultDesc(stmt),
+									  &TTSOpsVirtual);
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+		do_text_output_multiline(tstate, es->str->data);
+	else
+		do_text_output_oneline(tstate, es->str->data);
+	end_tup_output(tstate);
+
+	pfree(es->str->data);
+}
+
+/*
+ * Create a new ExplainState struct initialized with default options.
+ */
+ExplainState *
+NewExplainState(void)
+{
+	ExplainState *es = (ExplainState *) palloc0(sizeof(ExplainState));
+
+	/* Set default options (most fields can be left as zeroes). */
+	es->costs = true;
+	/* Prepare output buffer. */
+	es->str = makeStringInfo();
+
+	return es;
+}
+
+/*
+ * ExplainResultDesc -
+ *	  construct the result tupledesc for an EXPLAIN
+ */
+TupleDesc
+ExplainResultDesc(ExplainStmt *stmt)
+{
+	TupleDesc	tupdesc;
+	ListCell   *lc;
+	Oid			result_type = TEXTOID;
+
+	/* Check for XML format option */
+	foreach(lc, stmt->options)
+	{
+		DefElem    *opt = (DefElem *) lfirst(lc);
+
+		if (strcmp(opt->defname, "format") == 0)
+		{
+			char	   *p = defGetString(opt);
+
+			if (strcmp(p, "xml") == 0)
+				result_type = XMLOID;
+			else if (strcmp(p, "json") == 0)
+				result_type = JSONOID;
+			else
+				result_type = TEXTOID;
+			/* don't "break", as ExplainQuery will use the last value */
+		}
+	}
+
+	/* Need a tuple descriptor representing a single TEXT or XML column */
+	tupdesc = CreateTemplateTupleDesc(1);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "QUERY PLAN",
+					   result_type, -1, 0);
+	return tupdesc;
+}
+
+/*
+ * ExplainOneQuery -
+ *	  print out the execution plan for one Query
+ *
+ * "into" is NULL unless we are explaining the contents of a CreateTableAsStmt.
+ */
+static void
+ExplainOneQuery(Query *query, int cursorOptions,
+				IntoClause *into, ExplainState *es,
+				const char *queryString, ParamListInfo params,
+				QueryEnvironment *queryEnv)
+{
+	/* planner will not cope with utility statements */
+	if (query->commandType == CMD_UTILITY)
+	{
+		ExplainOneUtility(query->utilityStmt, into, es, queryString, params,
+						  queryEnv);
+		return;
+	}
+
+	/* if an advisor plugin is present, let it manage things */
+	if (ExplainOneQuery_hook)
+		(*ExplainOneQuery_hook) (query, cursorOptions, into, es,
+								 queryString, params, queryEnv);
+	else
+	{
+		PlannedStmt *plan;
+		instr_time	planstart,
+					planduration;
+		BufferUsage bufusage_start,
+					bufusage;
+
+		if (es->buffers)
+			bufusage_start = pgBufferUsage;
+		INSTR_TIME_SET_CURRENT(planstart);
+
+		/* plan the query */
+		plan = pg_plan_query(query, queryString, cursorOptions, params);
+
+		INSTR_TIME_SET_CURRENT(planduration);
+		INSTR_TIME_SUBTRACT(planduration, planstart);
+
+		/* calc differences of buffer counters. */
+		if (es->buffers)
+		{
+			memset(&bufusage, 0, sizeof(BufferUsage));
+			BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
+		}
+
+		/* run it (if needed) and produce output */
+		ExplainOnePlan(plan, into, es, queryString, params, queryEnv,
+					   &planduration, (es->buffers ? &bufusage : NULL));
+	}
+}
+
+/*
+ * ExplainOneUtility -
+ *	  print out the execution plan for one utility statement
+ *	  (In general, utility statements don't have plans, but there are some
+ *	  we treat as special cases)
+ *
+ * "into" is NULL unless we are explaining the contents of a CreateTableAsStmt.
+ *
+ * This is exported because it's called back from prepare.c in the
+ * EXPLAIN EXECUTE case.  In that case, we'll be dealing with a statement
+ * that's in the plan cache, so we have to ensure we don't modify it.
+ */
+void
+ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es,
+				  const char *queryString, ParamListInfo params,
+				  QueryEnvironment *queryEnv)
+{
+	if (utilityStmt == NULL)
+		return;
+
+	if (IsA(utilityStmt, CreateTableAsStmt))
+	{
+		/*
+		 * We have to rewrite the contained SELECT and then pass it back to
+		 * ExplainOneQuery.  Copy to be safe in the EXPLAIN EXECUTE case.
+		 */
+		CreateTableAsStmt *ctas = (CreateTableAsStmt *) utilityStmt;
+		List	   *rewritten;
+
+		/*
+		 * Check if the relation exists or not.  This is done at this stage to
+		 * avoid query planning or execution.
+		 */
+		if (CreateTableAsRelExists(ctas))
+		{
+			if (ctas->objtype == OBJECT_TABLE)
+				ExplainDummyGroup("CREATE TABLE AS", NULL, es);
+			else if (ctas->objtype == OBJECT_MATVIEW)
+				ExplainDummyGroup("CREATE MATERIALIZED VIEW", NULL, es);
+			else
+				elog(ERROR, "unexpected object type: %d",
+					 (int) ctas->objtype);
+			return;
+		}
+
+		rewritten = QueryRewrite(castNode(Query, copyObject(ctas->query)));
+		Assert(list_length(rewritten) == 1);
+		ExplainOneQuery(linitial_node(Query, rewritten),
+						CURSOR_OPT_PARALLEL_OK, ctas->into, es,
+						queryString, params, queryEnv);
+	}
+	else if (IsA(utilityStmt, DeclareCursorStmt))
+	{
+		/*
+		 * Likewise for DECLARE CURSOR.
+		 *
+		 * Notice that if you say EXPLAIN ANALYZE DECLARE CURSOR then we'll
+		 * actually run the query.  This is different from pre-8.3 behavior
+		 * but seems more useful than not running the query.  No cursor will
+		 * be created, however.
+		 */
+		DeclareCursorStmt *dcs = (DeclareCursorStmt *) utilityStmt;
+		List	   *rewritten;
+
+		rewritten = QueryRewrite(castNode(Query, copyObject(dcs->query)));
+		Assert(list_length(rewritten) == 1);
+		ExplainOneQuery(linitial_node(Query, rewritten),
+						dcs->options, NULL, es,
+						queryString, params, queryEnv);
+	}
+	else if (IsA(utilityStmt, ExecuteStmt))
+		ExplainExecuteQuery((ExecuteStmt *) utilityStmt, into, es,
+							queryString, params, queryEnv);
+	else if (IsA(utilityStmt, NotifyStmt))
+	{
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+			appendStringInfoString(es->str, "NOTIFY\n");
+		else
+			ExplainDummyGroup("Notify", NULL, es);
+	}
+	else
+	{
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+			appendStringInfoString(es->str,
+								   "Utility statements have no plan structure\n");
+		else
+			ExplainDummyGroup("Utility Statement", NULL, es);
+	}
+}
+
+/*
+ * ExplainOnePlan -
+ *		given a planned query, execute it if needed, and then print
+ *		EXPLAIN output
+ *
+ * "into" is NULL unless we are explaining the contents of a CreateTableAsStmt,
+ * in which case executing the query should result in creating that table.
+ *
+ * This is exported because it's called back from prepare.c in the
+ * EXPLAIN EXECUTE case, and because an index advisor plugin would need
+ * to call it.
+ */
+void
+ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
+			   const char *queryString, ParamListInfo params,
+			   QueryEnvironment *queryEnv, const instr_time *planduration,
+			   const BufferUsage *bufusage)
+{
+	DestReceiver *dest;
+	QueryDesc  *queryDesc;
+	instr_time	starttime;
+	double		totaltime = 0;
+	int			eflags;
+	int			instrument_option = 0;
+
+	Assert(plannedstmt->commandType != CMD_UTILITY);
+
+	if (es->analyze && es->timing)
+		instrument_option |= INSTRUMENT_TIMER;
+	else if (es->analyze)
+		instrument_option |= INSTRUMENT_ROWS;
+
+	if (es->buffers)
+		instrument_option |= INSTRUMENT_BUFFERS;
+	if (es->wal)
+		instrument_option |= INSTRUMENT_WAL;
+
+	/*
+	 * We always collect timing for the entire statement, even when node-level
+	 * timing is off, so we don't look at es->timing here.  (We could skip
+	 * this if !es->summary, but it's hardly worth the complication.)
+	 */
+	INSTR_TIME_SET_CURRENT(starttime);
+
+	/*
+	 * Use a snapshot with an updated command ID to ensure this query sees
+	 * results of any previously executed queries.
+	 */
+	PushCopiedSnapshot(GetActiveSnapshot());
+	UpdateActiveSnapshotCommandId();
+
+	/*
+	 * Normally we discard the query's output, but if explaining CREATE TABLE
+	 * AS, we'd better use the appropriate tuple receiver.
+	 */
+	if (into)
+		dest = CreateIntoRelDestReceiver(into);
+	else
+		dest = None_Receiver;
+
+	/* Create a QueryDesc for the query */
+	queryDesc = CreateQueryDesc(plannedstmt, queryString,
+								GetActiveSnapshot(), InvalidSnapshot,
+								dest, params, queryEnv, instrument_option);
+
+	/* Select execution options */
+	if (es->analyze)
+		eflags = 0;				/* default run-to-completion flags */
+	else
+		eflags = EXEC_FLAG_EXPLAIN_ONLY;
+	if (into)
+		eflags |= GetIntoRelEFlags(into);
+
+	/* call ExecutorStart to prepare the plan for execution */
+	ExecutorStart(queryDesc, eflags);
+
+	/* Execute the plan for statistics if asked for */
+	if (es->analyze)
+	{
+		ScanDirection dir;
+
+		/* EXPLAIN ANALYZE CREATE TABLE AS WITH NO DATA is weird */
+		if (into && into->skipData)
+			dir = NoMovementScanDirection;
+		else
+			dir = ForwardScanDirection;
+
+		/* run the plan */
+		ExecutorRun(queryDesc, dir, 0L, true);
+
+		/* run cleanup too */
+		ExecutorFinish(queryDesc);
+
+		/* We can't run ExecutorEnd 'till we're done printing the stats... */
+		totaltime += elapsed_time(&starttime);
+	}
+
+	ExplainOpenGroup("Query", NULL, true, es);
+
+	/* Create textual dump of plan tree */
+	ExplainPrintPlan(es, queryDesc);
+
+	/*
+	 * COMPUTE_QUERY_ID_REGRESS means COMPUTE_QUERY_ID_AUTO, but we don't show
+	 * the queryid in any of the EXPLAIN plans to keep stable the results
+	 * generated by regression test suites.
+	 */
+	if (es->verbose && plannedstmt->queryId != UINT64CONST(0) &&
+		compute_query_id != COMPUTE_QUERY_ID_REGRESS)
+	{
+		/*
+		 * Output the queryid as an int64 rather than a uint64 so we match
+		 * what would be seen in the BIGINT pg_stat_statements.queryid column.
+		 */
+		ExplainPropertyInteger("Query Identifier", NULL, (int64)
+							   plannedstmt->queryId, es);
+	}
+
+	/* Show buffer usage in planning */
+	if (bufusage)
+	{
+		ExplainOpenGroup("Planning", "Planning", true, es);
+		show_buffer_usage(es, bufusage, true);
+		ExplainCloseGroup("Planning", "Planning", true, es);
+	}
+
+	if (es->summary && planduration)
+	{
+		double		plantime = INSTR_TIME_GET_DOUBLE(*planduration);
+
+		ExplainPropertyFloat("Planning Time", "ms", 1000.0 * plantime, 3, es);
+	}
+
+	/* Print info about runtime of triggers */
+	if (es->analyze)
+		ExplainPrintTriggers(es, queryDesc);
+
+	/*
+	 * Print info about JITing. Tied to es->costs because we don't want to
+	 * display this in regression tests, as it'd cause output differences
+	 * depending on build options.  Might want to separate that out from COSTS
+	 * at a later stage.
+	 */
+	if (es->costs)
+		ExplainPrintJITSummary(es, queryDesc);
+
+	/*
+	 * Close down the query and free resources.  Include time for this in the
+	 * total execution time (although it should be pretty minimal).
+	 */
+	INSTR_TIME_SET_CURRENT(starttime);
+
+	ExecutorEnd(queryDesc);
+
+	FreeQueryDesc(queryDesc);
+
+	PopActiveSnapshot();
+
+	/* We need a CCI just in case query expanded to multiple plans */
+	if (es->analyze)
+		CommandCounterIncrement();
+
+	totaltime += elapsed_time(&starttime);
+
+	/*
+	 * We only report execution time if we actually ran the query (that is,
+	 * the user specified ANALYZE), and if summary reporting is enabled (the
+	 * user can set SUMMARY OFF to not have the timing information included in
+	 * the output).  By default, ANALYZE sets SUMMARY to true.
+	 */
+	if (es->summary && es->analyze)
+		ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3,
+							 es);
+
+	ExplainCloseGroup("Query", NULL, true, es);
+}
+
+/*
+ * ExplainPrintSettings -
+ *    Print summary of modified settings affecting query planning.
+ */
+static void
+ExplainPrintSettings(ExplainState *es)
+{
+	int			num;
+	struct config_generic **gucs;
+
+	/* bail out if information about settings not requested */
+	if (!es->settings)
+		return;
+
+	/* request an array of relevant settings */
+	gucs = get_explain_guc_options(&num);
+
+	if (es->format != EXPLAIN_FORMAT_TEXT)
+	{
+		ExplainOpenGroup("Settings", "Settings", true, es);
+
+		for (int i = 0; i < num; i++)
+		{
+			char	   *setting;
+			struct config_generic *conf = gucs[i];
+
+			setting = GetConfigOptionByName(conf->name, NULL, true);
+
+			ExplainPropertyText(conf->name, setting, es);
+		}
+
+		ExplainCloseGroup("Settings", "Settings", true, es);
+	}
+	else
+	{
+		StringInfoData str;
+
+		/* In TEXT mode, print nothing if there are no options */
+		if (num <= 0)
+			return;
+
+		initStringInfo(&str);
+
+		for (int i = 0; i < num; i++)
+		{
+			char	   *setting;
+			struct config_generic *conf = gucs[i];
+
+			if (i > 0)
+				appendStringInfoString(&str, ", ");
+
+			setting = GetConfigOptionByName(conf->name, NULL, true);
+
+			if (setting)
+				appendStringInfo(&str, "%s = '%s'", conf->name, setting);
+			else
+				appendStringInfo(&str, "%s = NULL", conf->name);
+		}
+
+		ExplainPropertyText("Settings", str.data, es);
+	}
+}
+
+/*
+ * ExplainPrintPlan -
+ *	  convert a QueryDesc's plan tree to text and append it to es->str
+ *
+ * The caller should have set up the options fields of *es, as well as
+ * initializing the output buffer es->str.  Also, output formatting state
+ * such as the indent level is assumed valid.  Plan-tree-specific fields
+ * in *es are initialized here.
+ *
+ * NB: will not work on utility statements
+ */
+void
+ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc)
+{
+	Bitmapset  *rels_used = NULL;
+	PlanState  *ps;
+
+	/* Set up ExplainState fields associated with this plan tree */
+	Assert(queryDesc->plannedstmt != NULL);
+	es->pstmt = queryDesc->plannedstmt;
+	es->rtable = queryDesc->plannedstmt->rtable;
+	ExplainPreScanNode(queryDesc->planstate, &rels_used);
+	es->rtable_names = select_rtable_names_for_explain(es->rtable, rels_used);
+	es->deparse_cxt = deparse_context_for_plan_tree(queryDesc->plannedstmt,
+													es->rtable_names);
+	es->printed_subplans = NULL;
+
+	/*
+	 * Sometimes we mark a Gather node as "invisible", which means that it's
+	 * not to be displayed in EXPLAIN output.  The purpose of this is to allow
+	 * running regression tests with force_parallel_mode=regress to get the
+	 * same results as running the same tests with force_parallel_mode=off.
+	 * Such marking is currently only supported on a Gather at the top of the
+	 * plan.  We skip that node, and we must also hide per-worker detail data
+	 * further down in the plan tree.
+	 */
+	ps = queryDesc->planstate;
+	if (IsA(ps, GatherState) && ((Gather *) ps->plan)->invisible)
+	{
+		ps = outerPlanState(ps);
+		es->hide_workers = true;
+	}
+	ExplainNode(ps, NIL, NULL, NULL, es);
+
+	/*
+	 * If requested, include information about GUC parameters with values that
+	 * don't match the built-in defaults.
+	 */
+	ExplainPrintSettings(es);
+}
+
+/*
+ * ExplainPrintTriggers -
+ *	  convert a QueryDesc's trigger statistics to text and append it to
+ *	  es->str
+ *
+ * The caller should have set up the options fields of *es, as well as
+ * initializing the output buffer es->str.  Other fields in *es are
+ * initialized here.
+ */
+void
+ExplainPrintTriggers(ExplainState *es, QueryDesc *queryDesc)
+{
+	ResultRelInfo *rInfo;
+	bool		show_relname;
+	List	   *resultrels;
+	List	   *routerels;
+	List	   *targrels;
+	ListCell   *l;
+
+	resultrels = queryDesc->estate->es_opened_result_relations;
+	routerels = queryDesc->estate->es_tuple_routing_result_relations;
+	targrels = queryDesc->estate->es_trig_target_relations;
+
+	ExplainOpenGroup("Triggers", "Triggers", false, es);
+
+	show_relname = (list_length(resultrels) > 1 ||
+					routerels != NIL || targrels != NIL);
+	foreach(l, resultrels)
+	{
+		rInfo = (ResultRelInfo *) lfirst(l);
+		report_triggers(rInfo, show_relname, es);
+	}
+
+	foreach(l, routerels)
+	{
+		rInfo = (ResultRelInfo *) lfirst(l);
+		report_triggers(rInfo, show_relname, es);
+	}
+
+	foreach(l, targrels)
+	{
+		rInfo = (ResultRelInfo *) lfirst(l);
+		report_triggers(rInfo, show_relname, es);
+	}
+
+	ExplainCloseGroup("Triggers", "Triggers", false, es);
+}
+
+/*
+ * ExplainPrintJITSummary -
+ *    Print summarized JIT instrumentation from leader and workers
+ */
+void
+ExplainPrintJITSummary(ExplainState *es, QueryDesc *queryDesc)
+{
+	JitInstrumentation ji = {0};
+
+	if (!(queryDesc->estate->es_jit_flags & PGJIT_PERFORM))
+		return;
+
+	/*
+	 * Work with a copy instead of modifying the leader state, since this
+	 * function may be called twice
+	 */
+	if (queryDesc->estate->es_jit)
+		InstrJitAgg(&ji, &queryDesc->estate->es_jit->instr);
+
+	/* If this process has done JIT in parallel workers, merge stats */
+	if (queryDesc->estate->es_jit_worker_instr)
+		InstrJitAgg(&ji, queryDesc->estate->es_jit_worker_instr);
+
+	ExplainPrintJIT(es, queryDesc->estate->es_jit_flags, &ji);
+}
+
+/*
+ * ExplainPrintJIT -
+ *	  Append information about JITing to es->str.
+ */
+static void
+ExplainPrintJIT(ExplainState *es, int jit_flags, JitInstrumentation *ji)
+{
+	instr_time	total_time;
+
+	/* don't print information if no JITing happened */
+	if (!ji || ji->created_functions == 0)
+		return;
+
+	/* calculate total time */
+	INSTR_TIME_SET_ZERO(total_time);
+	INSTR_TIME_ADD(total_time, ji->generation_counter);
+	INSTR_TIME_ADD(total_time, ji->inlining_counter);
+	INSTR_TIME_ADD(total_time, ji->optimization_counter);
+	INSTR_TIME_ADD(total_time, ji->emission_counter);
+
+	ExplainOpenGroup("JIT", "JIT", true, es);
+
+	/* for higher density, open code the text output format */
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		ExplainIndentText(es);
+		appendStringInfoString(es->str, "JIT:\n");
+		es->indent++;
+
+		ExplainPropertyInteger("Functions", NULL, ji->created_functions, es);
+
+		ExplainIndentText(es);
+		appendStringInfo(es->str, "Options: %s %s, %s %s, %s %s, %s %s\n",
+						 "Inlining", jit_flags & PGJIT_INLINE ? "true" : "false",
+						 "Optimization", jit_flags & PGJIT_OPT3 ? "true" : "false",
+						 "Expressions", jit_flags & PGJIT_EXPR ? "true" : "false",
+						 "Deforming", jit_flags & PGJIT_DEFORM ? "true" : "false");
+
+		if (es->analyze && es->timing)
+		{
+			ExplainIndentText(es);
+			appendStringInfo(es->str,
+							 "Timing: %s %.3f ms, %s %.3f ms, %s %.3f ms, %s %.3f ms, %s %.3f ms\n",
+							 "Generation", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->generation_counter),
+							 "Inlining", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->inlining_counter),
+							 "Optimization", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->optimization_counter),
+							 "Emission", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->emission_counter),
+							 "Total", 1000.0 * INSTR_TIME_GET_DOUBLE(total_time));
+		}
+
+		es->indent--;
+	}
+	else
+	{
+		ExplainPropertyInteger("Functions", NULL, ji->created_functions, es);
+
+		ExplainOpenGroup("Options", "Options", true, es);
+		ExplainPropertyBool("Inlining", jit_flags & PGJIT_INLINE, es);
+		ExplainPropertyBool("Optimization", jit_flags & PGJIT_OPT3, es);
+		ExplainPropertyBool("Expressions", jit_flags & PGJIT_EXPR, es);
+		ExplainPropertyBool("Deforming", jit_flags & PGJIT_DEFORM, es);
+		ExplainCloseGroup("Options", "Options", true, es);
+
+		if (es->analyze && es->timing)
+		{
+			ExplainOpenGroup("Timing", "Timing", true, es);
+
+			ExplainPropertyFloat("Generation", "ms",
+								 1000.0 * INSTR_TIME_GET_DOUBLE(ji->generation_counter),
+								 3, es);
+			ExplainPropertyFloat("Inlining", "ms",
+								 1000.0 * INSTR_TIME_GET_DOUBLE(ji->inlining_counter),
+								 3, es);
+			ExplainPropertyFloat("Optimization", "ms",
+								 1000.0 * INSTR_TIME_GET_DOUBLE(ji->optimization_counter),
+								 3, es);
+			ExplainPropertyFloat("Emission", "ms",
+								 1000.0 * INSTR_TIME_GET_DOUBLE(ji->emission_counter),
+								 3, es);
+			ExplainPropertyFloat("Total", "ms",
+								 1000.0 * INSTR_TIME_GET_DOUBLE(total_time),
+								 3, es);
+
+			ExplainCloseGroup("Timing", "Timing", true, es);
+		}
+	}
+
+	ExplainCloseGroup("JIT", "JIT", true, es);
+}
+
+/*
+ * ExplainQueryText -
+ *	  add a "Query Text" node that contains the actual text of the query
+ *
+ * The caller should have set up the options fields of *es, as well as
+ * initializing the output buffer es->str.
+ *
+ */
+void
+ExplainQueryText(ExplainState *es, QueryDesc *queryDesc)
+{
+	if (queryDesc->sourceText)
+		ExplainPropertyText("Query Text", queryDesc->sourceText, es);
+}
+
+/*
+ * report_triggers -
+ *		report execution stats for a single relation's triggers
+ */
+static void
+report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es)
+{
+	int			nt;
+
+	if (!rInfo->ri_TrigDesc || !rInfo->ri_TrigInstrument)
+		return;
+	for (nt = 0; nt < rInfo->ri_TrigDesc->numtriggers; nt++)
+	{
+		Trigger    *trig = rInfo->ri_TrigDesc->triggers + nt;
+		Instrumentation *instr = rInfo->ri_TrigInstrument + nt;
+		char	   *relname;
+		char	   *conname = NULL;
+
+		/* Must clean up instrumentation state */
+		InstrEndLoop(instr);
+
+		/*
+		 * We ignore triggers that were never invoked; they likely aren't
+		 * relevant to the current query type.
+		 */
+		if (instr->ntuples == 0)
+			continue;
+
+		ExplainOpenGroup("Trigger", NULL, true, es);
+
+		relname = RelationGetRelationName(rInfo->ri_RelationDesc);
+		if (OidIsValid(trig->tgconstraint))
+			conname = get_constraint_name(trig->tgconstraint);
+
+		/*
+		 * In text format, we avoid printing both the trigger name and the
+		 * constraint name unless VERBOSE is specified.  In non-text formats
+		 * we just print everything.
+		 */
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+		{
+			if (es->verbose || conname == NULL)
+				appendStringInfo(es->str, "Trigger %s", trig->tgname);
+			else
+				appendStringInfoString(es->str, "Trigger");
+			if (conname)
+				appendStringInfo(es->str, " for constraint %s", conname);
+			if (show_relname)
+				appendStringInfo(es->str, " on %s", relname);
+			if (es->timing)
+				appendStringInfo(es->str, ": time=%.3f calls=%.0f\n",
+								 1000.0 * instr->total, instr->ntuples);
+			else
+				appendStringInfo(es->str, ": calls=%.0f\n", instr->ntuples);
+		}
+		else
+		{
+			ExplainPropertyText("Trigger Name", trig->tgname, es);
+			if (conname)
+				ExplainPropertyText("Constraint Name", conname, es);
+			ExplainPropertyText("Relation", relname, es);
+			if (es->timing)
+				ExplainPropertyFloat("Time", "ms", 1000.0 * instr->total, 3,
+									 es);
+			ExplainPropertyFloat("Calls", NULL, instr->ntuples, 0, es);
+		}
+
+		if (conname)
+			pfree(conname);
+
+		ExplainCloseGroup("Trigger", NULL, true, es);
+	}
+}
+
+/* Compute elapsed time in seconds since given timestamp */
+static double
+elapsed_time(instr_time *starttime)
+{
+	instr_time	endtime;
+
+	INSTR_TIME_SET_CURRENT(endtime);
+	INSTR_TIME_SUBTRACT(endtime, *starttime);
+	return INSTR_TIME_GET_DOUBLE(endtime);
+}
+
+/*
+ * ExplainPreScanNode -
+ *	  Prescan the planstate tree to identify which RTEs are referenced
+ *
+ * Adds the relid of each referenced RTE to *rels_used.  The result controls
+ * which RTEs are assigned aliases by select_rtable_names_for_explain.
+ * This ensures that we don't confusingly assign un-suffixed aliases to RTEs
+ * that never appear in the EXPLAIN output (such as inheritance parents).
+ */
+static bool
+ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
+{
+	Plan	   *plan = planstate->plan;
+
+	switch (nodeTag(plan))
+	{
+		case T_SeqScan:
+		case T_SampleScan:
+		case T_IndexScan:
+		case T_IndexOnlyScan:
+		case T_BitmapHeapScan:
+		case T_TidScan:
+		case T_TidRangeScan:
+		case T_SubqueryScan:
+		case T_FunctionScan:
+		case T_TableFuncScan:
+		case T_ValuesScan:
+		case T_CteScan:
+		case T_NamedTuplestoreScan:
+		case T_WorkTableScan:
+			*rels_used = bms_add_member(*rels_used,
+										((Scan *) plan)->scanrelid);
+			break;
+		case T_ForeignScan:
+			*rels_used = bms_add_members(*rels_used,
+										 ((ForeignScan *) plan)->fs_relids);
+			break;
+		case T_CustomScan:
+			*rels_used = bms_add_members(*rels_used,
+										 ((CustomScan *) plan)->custom_relids);
+			break;
+		case T_ModifyTable:
+			*rels_used = bms_add_member(*rels_used,
+										((ModifyTable *) plan)->nominalRelation);
+			if (((ModifyTable *) plan)->exclRelRTI)
+				*rels_used = bms_add_member(*rels_used,
+											((ModifyTable *) plan)->exclRelRTI);
+			break;
+		case T_Append:
+			*rels_used = bms_add_members(*rels_used,
+										 ((Append *) plan)->apprelids);
+			break;
+		case T_MergeAppend:
+			*rels_used = bms_add_members(*rels_used,
+										 ((MergeAppend *) plan)->apprelids);
+			break;
+		default:
+			break;
+	}
+
+	return planstate_tree_walker(planstate, ExplainPreScanNode, rels_used);
+}
+
+/*
+ * ExplainNode -
+ *	  Appends a description of a plan tree to es->str
+ *
+ * planstate points to the executor state node for the current plan node.
+ * We need to work from a PlanState node, not just a Plan node, in order to
+ * get at the instrumentation data (if any) as well as the list of subplans.
+ *
+ * ancestors is a list of parent Plan and SubPlan nodes, most-closely-nested
+ * first.  These are needed in order to interpret PARAM_EXEC Params.
+ *
+ * relationship describes the relationship of this plan node to its parent
+ * (eg, "Outer", "Inner"); it can be null at top level.  plan_name is an
+ * optional name to be attached to the node.
+ *
+ * In text format, es->indent is controlled in this function since we only
+ * want it to change at plan-node boundaries (but a few subroutines will
+ * transiently increment it).  In non-text formats, es->indent corresponds
+ * to the nesting depth of logical output groups, and therefore is controlled
+ * by ExplainOpenGroup/ExplainCloseGroup.
+ */
+static void
+ExplainNode(PlanState *planstate, List *ancestors,
+			const char *relationship, const char *plan_name,
+			ExplainState *es)
+{
+	Plan	   *plan = planstate->plan;
+	const char *pname;			/* node type name for text output */
+	const char *sname;			/* node type name for non-text output */
+	const char *strategy = NULL;
+	const char *partialmode = NULL;
+	const char *operation = NULL;
+	const char *custom_name = NULL;
+	ExplainWorkersState *save_workers_state = es->workers_state;
+	int			save_indent = es->indent;
+	bool		haschildren;
+
+	/*
+	 * Prepare per-worker output buffers, if needed.  We'll append the data in
+	 * these to the main output string further down.
+	 */
+	if (planstate->worker_instrument && es->analyze && !es->hide_workers)
+		es->workers_state = ExplainCreateWorkersState(planstate->worker_instrument->num_workers);
+	else
+		es->workers_state = NULL;
+
+	/* Identify plan node type, and print generic details */
+	switch (nodeTag(plan))
+	{
+		case T_Result:
+			pname = sname = "Result";
+			break;
+		case T_ProjectSet:
+			pname = sname = "ProjectSet";
+			break;
+		case T_ModifyTable:
+			sname = "ModifyTable";
+			switch (((ModifyTable *) plan)->operation)
+			{
+				case CMD_INSERT:
+					pname = operation = "Insert";
+					break;
+				case CMD_UPDATE:
+					pname = operation = "Update";
+					break;
+				case CMD_DELETE:
+					pname = operation = "Delete";
+					break;
+				case CMD_MERGE:
+					pname = operation = "Merge";
+					break;
+				default:
+					pname = "???";
+					break;
+			}
+			break;
+		case T_Append:
+			pname = sname = "Append";
+			break;
+		case T_MergeAppend:
+			pname = sname = "Merge Append";
+			break;
+		case T_RecursiveUnion:
+			pname = sname = "Recursive Union";
+			break;
+		case T_BitmapAnd:
+			pname = sname = "BitmapAnd";
+			break;
+		case T_BitmapOr:
+			pname = sname = "BitmapOr";
+			break;
+		case T_NestLoop:
+			pname = sname = "Nested Loop";
+			break;
+		case T_MergeJoin:
+			pname = "Merge";	/* "Join" gets added by jointype switch */
+			sname = "Merge Join";
+			break;
+		case T_HashJoin:
+			pname = "Hash";		/* "Join" gets added by jointype switch */
+			sname = "Hash Join";
+			break;
+		case T_SeqScan:
+			pname = sname = "Seq Scan";
+			break;
+		case T_SampleScan:
+			pname = sname = "Sample Scan";
+			break;
+		case T_Gather:
+			pname = sname = "Gather";
+			break;
+		case T_GatherMerge:
+			pname = sname = "Gather Merge";
+			break;
+		case T_IndexScan:
+			pname = sname = "Index Scan";
+			break;
+		case T_IndexOnlyScan:
+			pname = sname = "Index Only Scan";
+			break;
+		case T_BitmapIndexScan:
+			pname = sname = "Bitmap Index Scan";
+			break;
+		case T_BitmapHeapScan:
+			pname = sname = "Bitmap Heap Scan";
+			break;
+		case T_TidScan:
+			pname = sname = "Tid Scan";
+			break;
+		case T_TidRangeScan:
+			pname = sname = "Tid Range Scan";
+			break;
+		case T_SubqueryScan:
+			pname = sname = "Subquery Scan";
+			break;
+		case T_FunctionScan:
+			pname = sname = "Function Scan";
+			break;
+		case T_TableFuncScan:
+			pname = sname = "Table Function Scan";
+			break;
+		case T_ValuesScan:
+			pname = sname = "Values Scan";
+			break;
+		case T_CteScan:
+			pname = sname = "CTE Scan";
+			break;
+		case T_NamedTuplestoreScan:
+			pname = sname = "Named Tuplestore Scan";
+			break;
+		case T_WorkTableScan:
+			pname = sname = "WorkTable Scan";
+			break;
+		case T_ForeignScan:
+			sname = "Foreign Scan";
+			switch (((ForeignScan *) plan)->operation)
+			{
+				case CMD_SELECT:
+					pname = "Foreign Scan";
+					operation = "Select";
+					break;
+				case CMD_INSERT:
+					pname = "Foreign Insert";
+					operation = "Insert";
+					break;
+				case CMD_UPDATE:
+					pname = "Foreign Update";
+					operation = "Update";
+					break;
+				case CMD_DELETE:
+					pname = "Foreign Delete";
+					operation = "Delete";
+					break;
+				default:
+					pname = "???";
+					break;
+			}
+			break;
+		case T_CustomScan:
+			sname = "Custom Scan";
+			custom_name = ((CustomScan *) plan)->methods->CustomName;
+			if (custom_name)
+				pname = psprintf("Custom Scan (%s)", custom_name);
+			else
+				pname = sname;
+			break;
+		case T_Material:
+			pname = sname = "Materialize";
+			break;
+		case T_Memoize:
+			pname = sname = "Memoize";
+			break;
+		case T_Sort:
+			pname = sname = "Sort";
+			break;
+		case T_IncrementalSort:
+			pname = sname = "Incremental Sort";
+			break;
+		case T_Group:
+			pname = sname = "Group";
+			break;
+		case T_Agg:
+			{
+				Agg		   *agg = (Agg *) plan;
+
+				sname = "Aggregate";
+				switch (agg->aggstrategy)
+				{
+					case AGG_PLAIN:
+						pname = "Aggregate";
+						strategy = "Plain";
+						break;
+					case AGG_SORTED:
+						pname = "GroupAggregate";
+						strategy = "Sorted";
+						break;
+					case AGG_HASHED:
+						pname = "HashAggregate";
+						strategy = "Hashed";
+						break;
+					case AGG_MIXED:
+						pname = "MixedAggregate";
+						strategy = "Mixed";
+						break;
+					default:
+						pname = "Aggregate ???";
+						strategy = "???";
+						break;
+				}
+
+				if (DO_AGGSPLIT_SKIPFINAL(agg->aggsplit))
+				{
+					partialmode = "Partial";
+					pname = psprintf("%s %s", partialmode, pname);
+				}
+				else if (DO_AGGSPLIT_COMBINE(agg->aggsplit))
+				{
+					partialmode = "Finalize";
+					pname = psprintf("%s %s", partialmode, pname);
+				}
+				else
+					partialmode = "Simple";
+			}
+			break;
+		case T_WindowAgg:
+			pname = sname = "WindowAgg";
+			break;
+		case T_Unique:
+			pname = sname = "Unique";
+			break;
+		case T_SetOp:
+			sname = "SetOp";
+			switch (((SetOp *) plan)->strategy)
+			{
+				case SETOP_SORTED:
+					pname = "SetOp";
+					strategy = "Sorted";
+					break;
+				case SETOP_HASHED:
+					pname = "HashSetOp";
+					strategy = "Hashed";
+					break;
+				default:
+					pname = "SetOp ???";
+					strategy = "???";
+					break;
+			}
+			break;
+		case T_LockRows:
+			pname = sname = "LockRows";
+			break;
+		case T_Limit:
+			pname = sname = "Limit";
+			break;
+		case T_Hash:
+			pname = sname = "Hash";
+			break;
+		default:
+			pname = sname = "???";
+			break;
+	}
+
+	ExplainOpenGroup("Plan",
+					 relationship ? NULL : "Plan",
+					 true, es);
+
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		if (plan_name)
+		{
+			ExplainIndentText(es);
+			appendStringInfo(es->str, "%s\n", plan_name);
+			es->indent++;
+		}
+		if (es->indent)
+		{
+			ExplainIndentText(es);
+			appendStringInfoString(es->str, "->  ");
+			es->indent += 2;
+		}
+		if (plan->parallel_aware)
+			appendStringInfoString(es->str, "Parallel ");
+		if (plan->async_capable)
+			appendStringInfoString(es->str, "Async ");
+		appendStringInfoString(es->str, pname);
+		es->indent++;
+	}
+	else
+	{
+		ExplainPropertyText("Node Type", sname, es);
+		if (strategy)
+			ExplainPropertyText("Strategy", strategy, es);
+		if (partialmode)
+			ExplainPropertyText("Partial Mode", partialmode, es);
+		if (operation)
+			ExplainPropertyText("Operation", operation, es);
+		if (relationship)
+			ExplainPropertyText("Parent Relationship", relationship, es);
+		if (plan_name)
+			ExplainPropertyText("Subplan Name", plan_name, es);
+		if (custom_name)
+			ExplainPropertyText("Custom Plan Provider", custom_name, es);
+		ExplainPropertyBool("Parallel Aware", plan->parallel_aware, es);
+		ExplainPropertyBool("Async Capable", plan->async_capable, es);
+	}
+
+	switch (nodeTag(plan))
+	{
+		case T_SeqScan:
+		case T_SampleScan:
+		case T_BitmapHeapScan:
+		case T_TidScan:
+		case T_TidRangeScan:
+		case T_SubqueryScan:
+		case T_FunctionScan:
+		case T_TableFuncScan:
+		case T_ValuesScan:
+		case T_CteScan:
+		case T_WorkTableScan:
+			ExplainScanTarget((Scan *) plan, es);
+			break;
+		case T_ForeignScan:
+		case T_CustomScan:
+			if (((Scan *) plan)->scanrelid > 0)
+				ExplainScanTarget((Scan *) plan, es);
+			break;
+		case T_IndexScan:
+			{
+				IndexScan  *indexscan = (IndexScan *) plan;
+
+				ExplainIndexScanDetails(indexscan->indexid,
+										indexscan->indexorderdir,
+										es);
+				ExplainScanTarget((Scan *) indexscan, es);
+			}
+			break;
+		case T_IndexOnlyScan:
+			{
+				IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan;
+
+				ExplainIndexScanDetails(indexonlyscan->indexid,
+										indexonlyscan->indexorderdir,
+										es);
+				ExplainScanTarget((Scan *) indexonlyscan, es);
+			}
+			break;
+		case T_BitmapIndexScan:
+			{
+				BitmapIndexScan *bitmapindexscan = (BitmapIndexScan *) plan;
+				const char *indexname =
+				explain_get_index_name(bitmapindexscan->indexid);
+
+				if (es->format == EXPLAIN_FORMAT_TEXT)
+					appendStringInfo(es->str, " on %s",
+									 quote_identifier(indexname));
+				else
+					ExplainPropertyText("Index Name", indexname, es);
+			}
+			break;
+		case T_ModifyTable:
+			ExplainModifyTarget((ModifyTable *) plan, es);
+			break;
+		case T_NestLoop:
+		case T_MergeJoin:
+		case T_HashJoin:
+			{
+				const char *jointype;
+
+				switch (((Join *) plan)->jointype)
+				{
+					case JOIN_INNER:
+						jointype = "Inner";
+						break;
+					case JOIN_LEFT:
+						jointype = "Left";
+						break;
+					case JOIN_FULL:
+						jointype = "Full";
+						break;
+					case JOIN_RIGHT:
+						jointype = "Right";
+						break;
+					case JOIN_SEMI:
+						jointype = "Semi";
+						break;
+					case JOIN_ANTI:
+						jointype = "Anti";
+						break;
+					default:
+						jointype = "???";
+						break;
+				}
+				if (es->format == EXPLAIN_FORMAT_TEXT)
+				{
+					/*
+					 * For historical reasons, the join type is interpolated
+					 * into the node type name...
+					 */
+					if (((Join *) plan)->jointype != JOIN_INNER)
+						appendStringInfo(es->str, " %s Join", jointype);
+					else if (!IsA(plan, NestLoop))
+						appendStringInfoString(es->str, " Join");
+				}
+				else
+					ExplainPropertyText("Join Type", jointype, es);
+			}
+			break;
+		case T_SetOp:
+			{
+				const char *setopcmd;
+
+				switch (((SetOp *) plan)->cmd)
+				{
+					case SETOPCMD_INTERSECT:
+						setopcmd = "Intersect";
+						break;
+					case SETOPCMD_INTERSECT_ALL:
+						setopcmd = "Intersect All";
+						break;
+					case SETOPCMD_EXCEPT:
+						setopcmd = "Except";
+						break;
+					case SETOPCMD_EXCEPT_ALL:
+						setopcmd = "Except All";
+						break;
+					default:
+						setopcmd = "???";
+						break;
+				}
+				if (es->format == EXPLAIN_FORMAT_TEXT)
+					appendStringInfo(es->str, " %s", setopcmd);
+				else
+					ExplainPropertyText("Command", setopcmd, es);
+			}
+			break;
+		default:
+			break;
+	}
+
+	if (es->costs)
+	{
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+		{
+			appendStringInfo(es->str, "  (cost=%.2f..%.2f rows=%.0f width=%d)",
+							 plan->startup_cost, plan->total_cost,
+							 plan->plan_rows, plan->plan_width);
+		}
+		else
+		{
+			ExplainPropertyFloat("Startup Cost", NULL, plan->startup_cost,
+								 2, es);
+			ExplainPropertyFloat("Total Cost", NULL, plan->total_cost,
+								 2, es);
+			ExplainPropertyFloat("Plan Rows", NULL, plan->plan_rows,
+								 0, es);
+			ExplainPropertyInteger("Plan Width", NULL, plan->plan_width,
+								   es);
+		}
+	}
+
+	/*
+	 * We have to forcibly clean up the instrumentation state because we
+	 * haven't done ExecutorEnd yet.  This is pretty grotty ...
+	 *
+	 * Note: contrib/auto_explain could cause instrumentation to be set up
+	 * even though we didn't ask for it here.  Be careful not to print any
+	 * instrumentation results the user didn't ask for.  But we do the
+	 * InstrEndLoop call anyway, if possible, to reduce the number of cases
+	 * auto_explain has to contend with.
+	 */
+	if (planstate->instrument)
+		InstrEndLoop(planstate->instrument);
+
+	if (es->analyze &&
+		planstate->instrument && planstate->instrument->nloops > 0)
+	{
+		double		nloops = planstate->instrument->nloops;
+		double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
+		double		total_ms = 1000.0 * planstate->instrument->total / nloops;
+		double		rows = planstate->instrument->ntuples / nloops;
+
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+		{
+			if (es->timing)
+				appendStringInfo(es->str,
+								 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
+								 startup_ms, total_ms, rows, nloops);
+			else
+				appendStringInfo(es->str,
+								 " (actual rows=%.0f loops=%.0f)",
+								 rows, nloops);
+		}
+		else
+		{
+			if (es->timing)
+			{
+				ExplainPropertyFloat("Actual Startup Time", "ms", startup_ms,
+									 3, es);
+				ExplainPropertyFloat("Actual Total Time", "ms", total_ms,
+									 3, es);
+			}
+			ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+			ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+		}
+	}
+	else if (es->analyze)
+	{
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+			appendStringInfoString(es->str, " (never executed)");
+		else
+		{
+			if (es->timing)
+			{
+				ExplainPropertyFloat("Actual Startup Time", "ms", 0.0, 3, es);
+				ExplainPropertyFloat("Actual Total Time", "ms", 0.0, 3, es);
+			}
+			ExplainPropertyFloat("Actual Rows", NULL, 0.0, 0, es);
+			ExplainPropertyFloat("Actual Loops", NULL, 0.0, 0, es);
+		}
+	}
+
+	/* in text format, first line ends here */
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+		appendStringInfoChar(es->str, '\n');
+
+	/* prepare per-worker general execution details */
+	if (es->workers_state && es->verbose)
+	{
+		WorkerInstrumentation *w = planstate->worker_instrument;
+
+		for (int n = 0; n < w->num_workers; n++)
+		{
+			Instrumentation *instrument = &w->instrument[n];
+			double		nloops = instrument->nloops;
+			double		startup_ms;
+			double		total_ms;
+			double		rows;
+
+			if (nloops <= 0)
+				continue;
+			startup_ms = 1000.0 * instrument->startup / nloops;
+			total_ms = 1000.0 * instrument->total / nloops;
+			rows = instrument->ntuples / nloops;
+
+			ExplainOpenWorker(n, es);
+
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+			{
+				ExplainIndentText(es);
+				if (es->timing)
+					appendStringInfo(es->str,
+									 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
+									 startup_ms, total_ms, rows, nloops);
+				else
+					appendStringInfo(es->str,
+									 "actual rows=%.0f loops=%.0f\n",
+									 rows, nloops);
+			}
+			else
+			{
+				if (es->timing)
+				{
+					ExplainPropertyFloat("Actual Startup Time", "ms",
+										 startup_ms, 3, es);
+					ExplainPropertyFloat("Actual Total Time", "ms",
+										 total_ms, 3, es);
+				}
+				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+			}
+
+			ExplainCloseWorker(n, es);
+		}
+	}
+
+	/* target list */
+	if (es->verbose)
+		show_plan_tlist(planstate, ancestors, es);
+
+	/* unique join */
+	switch (nodeTag(plan))
+	{
+		case T_NestLoop:
+		case T_MergeJoin:
+		case T_HashJoin:
+			/* try not to be too chatty about this in text mode */
+			if (es->format != EXPLAIN_FORMAT_TEXT ||
+				(es->verbose && ((Join *) plan)->inner_unique))
+				ExplainPropertyBool("Inner Unique",
+									((Join *) plan)->inner_unique,
+									es);
+			break;
+		default:
+			break;
+	}
+
+	/* quals, sort keys, etc */
+	switch (nodeTag(plan))
+	{
+		case T_IndexScan:
+			show_scan_qual(((IndexScan *) plan)->indexqualorig,
+						   "Index Cond", planstate, ancestors, es);
+			if (((IndexScan *) plan)->indexqualorig)
+				show_instrumentation_count("Rows Removed by Index Recheck", 2,
+										   planstate, es);
+			show_scan_qual(((IndexScan *) plan)->indexorderbyorig,
+						   "Order By", planstate, ancestors, es);
+			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+			if (plan->qual)
+				show_instrumentation_count("Rows Removed by Filter", 1,
+										   planstate, es);
+			break;
+		case T_IndexOnlyScan:
+			show_scan_qual(((IndexOnlyScan *) plan)->indexqual,
+						   "Index Cond", planstate, ancestors, es);
+			if (((IndexOnlyScan *) plan)->recheckqual)
+				show_instrumentation_count("Rows Removed by Index Recheck", 2,
+										   planstate, es);
+			show_scan_qual(((IndexOnlyScan *) plan)->indexorderby,
+						   "Order By", planstate, ancestors, es);
+			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+			if (plan->qual)
+				show_instrumentation_count("Rows Removed by Filter", 1,
+										   planstate, es);
+			if (es->analyze)
+				ExplainPropertyFloat("Heap Fetches", NULL,
+									 planstate->instrument->ntuples2, 0, es);
+			break;
+		case T_BitmapIndexScan:
+			show_scan_qual(((BitmapIndexScan *) plan)->indexqualorig,
+						   "Index Cond", planstate, ancestors, es);
+			break;
+		case T_BitmapHeapScan:
+			show_scan_qual(((BitmapHeapScan *) plan)->bitmapqualorig,
+						   "Recheck Cond", planstate, ancestors, es);
+			if (((BitmapHeapScan *) plan)->bitmapqualorig)
+				show_instrumentation_count("Rows Removed by Index Recheck", 2,
+										   planstate, es);
+			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+			if (plan->qual)
+				show_instrumentation_count("Rows Removed by Filter", 1,
+										   planstate, es);
+			if (es->analyze)
+				show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
+			break;
+		case T_SampleScan:
+			show_tablesample(((SampleScan *) plan)->tablesample,
+							 planstate, ancestors, es);
+			/* fall through to print additional fields the same as SeqScan */
+			/* FALLTHROUGH */
+		case T_SeqScan:
+		case T_ValuesScan:
+		case T_CteScan:
+		case T_NamedTuplestoreScan:
+		case T_WorkTableScan:
+		case T_SubqueryScan:
+			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+			if (plan->qual)
+				show_instrumentation_count("Rows Removed by Filter", 1,
+										   planstate, es);
+			break;
+		case T_Gather:
+			{
+				Gather	   *gather = (Gather *) plan;
+
+				show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+				if (plan->qual)
+					show_instrumentation_count("Rows Removed by Filter", 1,
+											   planstate, es);
+				ExplainPropertyInteger("Workers Planned", NULL,
+									   gather->num_workers, es);
+
+				/* Show params evaluated at gather node */
+				if (gather->initParam)
+					show_eval_params(gather->initParam, es);
+
+				if (es->analyze)
+				{
+					int			nworkers;
+
+					nworkers = ((GatherState *) planstate)->nworkers_launched;
+					ExplainPropertyInteger("Workers Launched", NULL,
+										   nworkers, es);
+				}
+
+				if (gather->single_copy || es->format != EXPLAIN_FORMAT_TEXT)
+					ExplainPropertyBool("Single Copy", gather->single_copy, es);
+			}
+			break;
+		case T_GatherMerge:
+			{
+				GatherMerge *gm = (GatherMerge *) plan;
+
+				show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+				if (plan->qual)
+					show_instrumentation_count("Rows Removed by Filter", 1,
+											   planstate, es);
+				ExplainPropertyInteger("Workers Planned", NULL,
+									   gm->num_workers, es);
+
+				/* Show params evaluated at gather-merge node */
+				if (gm->initParam)
+					show_eval_params(gm->initParam, es);
+
+				if (es->analyze)
+				{
+					int			nworkers;
+
+					nworkers = ((GatherMergeState *) planstate)->nworkers_launched;
+					ExplainPropertyInteger("Workers Launched", NULL,
+										   nworkers, es);
+				}
+			}
+			break;
+		case T_FunctionScan:
+			if (es->verbose)
+			{
+				List	   *fexprs = NIL;
+				ListCell   *lc;
+
+				foreach(lc, ((FunctionScan *) plan)->functions)
+				{
+					RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
+
+					fexprs = lappend(fexprs, rtfunc->funcexpr);
+				}
+				/* We rely on show_expression to insert commas as needed */
+				show_expression((Node *) fexprs,
+								"Function Call", planstate, ancestors,
+								es->verbose, es);
+			}
+			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+			if (plan->qual)
+				show_instrumentation_count("Rows Removed by Filter", 1,
+										   planstate, es);
+			break;
+		case T_TableFuncScan:
+			if (es->verbose)
+			{
+				TableFunc  *tablefunc = ((TableFuncScan *) plan)->tablefunc;
+
+				show_expression((Node *) tablefunc,
+								"Table Function Call", planstate, ancestors,
+								es->verbose, es);
+			}
+			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+			if (plan->qual)
+				show_instrumentation_count("Rows Removed by Filter", 1,
+										   planstate, es);
+			break;
+		case T_TidScan:
+			{
+				/*
+				 * The tidquals list has OR semantics, so be sure to show it
+				 * as an OR condition.
+				 */
+				List	   *tidquals = ((TidScan *) plan)->tidquals;
+
+				if (list_length(tidquals) > 1)
+					tidquals = list_make1(make_orclause(tidquals));
+				show_scan_qual(tidquals, "TID Cond", planstate, ancestors, es);
+				show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+				if (plan->qual)
+					show_instrumentation_count("Rows Removed by Filter", 1,
+											   planstate, es);
+			}
+			break;
+		case T_TidRangeScan:
+			{
+				/*
+				 * The tidrangequals list has AND semantics, so be sure to
+				 * show it as an AND condition.
+				 */
+				List	   *tidquals = ((TidRangeScan *) plan)->tidrangequals;
+
+				if (list_length(tidquals) > 1)
+					tidquals = list_make1(make_andclause(tidquals));
+				show_scan_qual(tidquals, "TID Cond", planstate, ancestors, es);
+				show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+				if (plan->qual)
+					show_instrumentation_count("Rows Removed by Filter", 1,
+											   planstate, es);
+			}
+			break;
+		case T_ForeignScan:
+			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+			if (plan->qual)
+				show_instrumentation_count("Rows Removed by Filter", 1,
+										   planstate, es);
+			show_foreignscan_info((ForeignScanState *) planstate, es);
+			break;
+		case T_CustomScan:
+			{
+				CustomScanState *css = (CustomScanState *) planstate;
+
+				show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+				if (plan->qual)
+					show_instrumentation_count("Rows Removed by Filter", 1,
+											   planstate, es);
+				if (css->methods->ExplainCustomScan)
+					css->methods->ExplainCustomScan(css, ancestors, es);
+			}
+			break;
+		case T_NestLoop:
+			show_upper_qual(((NestLoop *) plan)->join.joinqual,
+							"Join Filter", planstate, ancestors, es);
+			if (((NestLoop *) plan)->join.joinqual)
+				show_instrumentation_count("Rows Removed by Join Filter", 1,
+										   planstate, es);
+			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
+			if (plan->qual)
+				show_instrumentation_count("Rows Removed by Filter", 2,
+										   planstate, es);
+			break;
+		case T_MergeJoin:
+			show_upper_qual(((MergeJoin *) plan)->mergeclauses,
+							"Merge Cond", planstate, ancestors, es);
+			show_upper_qual(((MergeJoin *) plan)->join.joinqual,
+							"Join Filter", planstate, ancestors, es);
+			if (((MergeJoin *) plan)->join.joinqual)
+				show_instrumentation_count("Rows Removed by Join Filter", 1,
+										   planstate, es);
+			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
+			if (plan->qual)
+				show_instrumentation_count("Rows Removed by Filter", 2,
+										   planstate, es);
+			break;
+		case T_HashJoin:
+			show_upper_qual(((HashJoin *) plan)->hashclauses,
+							"Hash Cond", planstate, ancestors, es);
+			show_upper_qual(((HashJoin *) plan)->join.joinqual,
+							"Join Filter", planstate, ancestors, es);
+			if (((HashJoin *) plan)->join.joinqual)
+				show_instrumentation_count("Rows Removed by Join Filter", 1,
+										   planstate, es);
+			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
+			if (plan->qual)
+				show_instrumentation_count("Rows Removed by Filter", 2,
+										   planstate, es);
+			break;
+		case T_Agg:
+			show_agg_keys(castNode(AggState, planstate), ancestors, es);
+			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
+			show_hashagg_info((AggState *) planstate, es);
+			if (plan->qual)
+				show_instrumentation_count("Rows Removed by Filter", 1,
+										   planstate, es);
+			break;
+		case T_WindowAgg:
+			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
+			if (plan->qual)
+				show_instrumentation_count("Rows Removed by Filter", 1,
+										   planstate, es);
+			show_upper_qual(((WindowAgg *) plan)->runConditionOrig,
+							"Run Condition", planstate, ancestors, es);
+			break;
+		case T_Group:
+			show_group_keys(castNode(GroupState, planstate), ancestors, es);
+			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
+			if (plan->qual)
+				show_instrumentation_count("Rows Removed by Filter", 1,
+										   planstate, es);
+			break;
+		case T_Sort:
+			show_sort_keys(castNode(SortState, planstate), ancestors, es);
+			show_sort_info(castNode(SortState, planstate), es);
+			break;
+		case T_IncrementalSort:
+			show_incremental_sort_keys(castNode(IncrementalSortState, planstate),
+									   ancestors, es);
+			show_incremental_sort_info(castNode(IncrementalSortState, planstate),
+									   es);
+			break;
+		case T_MergeAppend:
+			show_merge_append_keys(castNode(MergeAppendState, planstate),
+								   ancestors, es);
+			break;
+		case T_Result:
+			show_upper_qual((List *) ((Result *) plan)->resconstantqual,
+							"One-Time Filter", planstate, ancestors, es);
+			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
+			if (plan->qual)
+				show_instrumentation_count("Rows Removed by Filter", 1,
+										   planstate, es);
+			break;
+		case T_ModifyTable:
+			show_modifytable_info(castNode(ModifyTableState, planstate), ancestors,
+								  es);
+			break;
+		case T_Hash:
+			show_hash_info(castNode(HashState, planstate), es);
+			break;
+		case T_Memoize:
+			show_memoize_info(castNode(MemoizeState, planstate), ancestors,
+							  es);
+			break;
+		default:
+			break;
+	}
+
+	/*
+	 * Prepare per-worker JIT instrumentation.  As with the overall JIT
+	 * summary, this is printed only if printing costs is enabled.
+	 */
+	if (es->workers_state && es->costs && es->verbose)
+	{
+		SharedJitInstrumentation *w = planstate->worker_jit_instrument;
+
+		if (w)
+		{
+			for (int n = 0; n < w->num_workers; n++)
+			{
+				ExplainOpenWorker(n, es);
+				ExplainPrintJIT(es, planstate->state->es_jit_flags,
+								&w->jit_instr[n]);
+				ExplainCloseWorker(n, es);
+			}
+		}
+	}
+
+	/* Show buffer/WAL usage */
+	if (es->buffers && planstate->instrument)
+		show_buffer_usage(es, &planstate->instrument->bufusage, false);
+	if (es->wal && planstate->instrument)
+		show_wal_usage(es, &planstate->instrument->walusage);
+
+	/* Prepare per-worker buffer/WAL usage */
+	if (es->workers_state && (es->buffers || es->wal) && es->verbose)
+	{
+		WorkerInstrumentation *w = planstate->worker_instrument;
+
+		for (int n = 0; n < w->num_workers; n++)
+		{
+			Instrumentation *instrument = &w->instrument[n];
+			double		nloops = instrument->nloops;
+
+			if (nloops <= 0)
+				continue;
+
+			ExplainOpenWorker(n, es);
+			if (es->buffers)
+				show_buffer_usage(es, &instrument->bufusage, false);
+			if (es->wal)
+				show_wal_usage(es, &instrument->walusage);
+			ExplainCloseWorker(n, es);
+		}
+	}
+
+	/* Show per-worker details for this plan node, then pop that stack */
+	if (es->workers_state)
+		ExplainFlushWorkersState(es);
+	es->workers_state = save_workers_state;
+
+	/*
+	 * If partition pruning was done during executor initialization, the
+	 * number of child plans we'll display below will be less than the number
+	 * of subplans that was specified in the plan.  To make this a bit less
+	 * mysterious, emit an indication that this happened.  Note that this
+	 * field is emitted now because we want it to be a property of the parent
+	 * node; it *cannot* be emitted within the Plans sub-node we'll open next.
+	 */
+	switch (nodeTag(plan))
+	{
+		case T_Append:
+			ExplainMissingMembers(((AppendState *) planstate)->as_nplans,
+								  list_length(((Append *) plan)->appendplans),
+								  es);
+			break;
+		case T_MergeAppend:
+			ExplainMissingMembers(((MergeAppendState *) planstate)->ms_nplans,
+								  list_length(((MergeAppend *) plan)->mergeplans),
+								  es);
+			break;
+		default:
+			break;
+	}
+
+	/* Get ready to display the child plans */
+	haschildren = planstate->initPlan ||
+		outerPlanState(planstate) ||
+		innerPlanState(planstate) ||
+		IsA(plan, Append) ||
+		IsA(plan, MergeAppend) ||
+		IsA(plan, BitmapAnd) ||
+		IsA(plan, BitmapOr) ||
+		IsA(plan, SubqueryScan) ||
+		(IsA(planstate, CustomScanState) &&
+		 ((CustomScanState *) planstate)->custom_ps != NIL) ||
+		planstate->subPlan;
+	if (haschildren)
+	{
+		ExplainOpenGroup("Plans", "Plans", false, es);
+		/* Pass current Plan as head of ancestors list for children */
+		ancestors = lcons(plan, ancestors);
+	}
+
+	/* initPlan-s */
+	if (planstate->initPlan)
+		ExplainSubPlans(planstate->initPlan, ancestors, "InitPlan", es);
+
+	/* lefttree */
+	if (outerPlanState(planstate))
+		ExplainNode(outerPlanState(planstate), ancestors,
+					"Outer", NULL, es);
+
+	/* righttree */
+	if (innerPlanState(planstate))
+		ExplainNode(innerPlanState(planstate), ancestors,
+					"Inner", NULL, es);
+
+	/* special child plans */
+	switch (nodeTag(plan))
+	{
+		case T_Append:
+			ExplainMemberNodes(((AppendState *) planstate)->appendplans,
+							   ((AppendState *) planstate)->as_nplans,
+							   ancestors, es);
+			break;
+		case T_MergeAppend:
+			ExplainMemberNodes(((MergeAppendState *) planstate)->mergeplans,
+							   ((MergeAppendState *) planstate)->ms_nplans,
+							   ancestors, es);
+			break;
+		case T_BitmapAnd:
+			ExplainMemberNodes(((BitmapAndState *) planstate)->bitmapplans,
+							   ((BitmapAndState *) planstate)->nplans,
+							   ancestors, es);
+			break;
+		case T_BitmapOr:
+			ExplainMemberNodes(((BitmapOrState *) planstate)->bitmapplans,
+							   ((BitmapOrState *) planstate)->nplans,
+							   ancestors, es);
+			break;
+		case T_SubqueryScan:
+			ExplainNode(((SubqueryScanState *) planstate)->subplan, ancestors,
+						"Subquery", NULL, es);
+			break;
+		case T_CustomScan:
+			ExplainCustomChildren((CustomScanState *) planstate,
+								  ancestors, es);
+			break;
+		default:
+			break;
+	}
+
+	/* subPlan-s */
+	if (planstate->subPlan)
+		ExplainSubPlans(planstate->subPlan, ancestors, "SubPlan", es);
+
+	/* end of child plans */
+	if (haschildren)
+	{
+		ancestors = list_delete_first(ancestors);
+		ExplainCloseGroup("Plans", "Plans", false, es);
+	}
+
+	/* in text format, undo whatever indentation we added */
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+		es->indent = save_indent;
+
+	ExplainCloseGroup("Plan",
+					  relationship ? NULL : "Plan",
+					  true, es);
+}
+
+/*
+ * Show the targetlist of a plan node
+ */
+static void
+show_plan_tlist(PlanState *planstate, List *ancestors, ExplainState *es)
+{
+	Plan	   *plan = planstate->plan;
+	List	   *context;
+	List	   *result = NIL;
+	bool		useprefix;
+	ListCell   *lc;
+
+	/* No work if empty tlist (this occurs eg in bitmap indexscans) */
+	if (plan->targetlist == NIL)
+		return;
+	/* The tlist of an Append isn't real helpful, so suppress it */
+	if (IsA(plan, Append))
+		return;
+	/* Likewise for MergeAppend and RecursiveUnion */
+	if (IsA(plan, MergeAppend))
+		return;
+	if (IsA(plan, RecursiveUnion))
+		return;
+
+	/*
+	 * Likewise for ForeignScan that executes a direct INSERT/UPDATE/DELETE
+	 *
+	 * Note: the tlist for a ForeignScan that executes a direct INSERT/UPDATE
+	 * might contain subplan output expressions that are confusing in this
+	 * context.  The tlist for a ForeignScan that executes a direct UPDATE/
+	 * DELETE always contains "junk" target columns to identify the exact row
+	 * to update or delete, which would be confusing in this context.  So, we
+	 * suppress it in all the cases.
+	 */
+	if (IsA(plan, ForeignScan) &&
+		((ForeignScan *) plan)->operation != CMD_SELECT)
+		return;
+
+	/* Set up deparsing context */
+	context = set_deparse_context_plan(es->deparse_cxt,
+									   plan,
+									   ancestors);
+	useprefix = list_length(es->rtable) > 1;
+
+	/* Deparse each result column (we now include resjunk ones) */
+	foreach(lc, plan->targetlist)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(lc);
+
+		result = lappend(result,
+						 deparse_expression((Node *) tle->expr, context,
+											useprefix, false));
+	}
+
+	/* Print results */
+	ExplainPropertyList("Output", result, es);
+}
+
+/*
+ * Show a generic expression
+ */
+static void
+show_expression(Node *node, const char *qlabel,
+				PlanState *planstate, List *ancestors,
+				bool useprefix, ExplainState *es)
+{
+	List	   *context;
+	char	   *exprstr;
+
+	/* Set up deparsing context */
+	context = set_deparse_context_plan(es->deparse_cxt,
+									   planstate->plan,
+									   ancestors);
+
+	/* Deparse the expression */
+	exprstr = deparse_expression(node, context, useprefix, false);
+
+	/* And add to es->str */
+	ExplainPropertyText(qlabel, exprstr, es);
+}
+
+/*
+ * Show a qualifier expression (which is a List with implicit AND semantics)
+ */
+static void
+show_qual(List *qual, const char *qlabel,
+		  PlanState *planstate, List *ancestors,
+		  bool useprefix, ExplainState *es)
+{
+	Node	   *node;
+
+	/* No work if empty qual */
+	if (qual == NIL)
+		return;
+
+	/* Convert AND list to explicit AND */
+	node = (Node *) make_ands_explicit(qual);
+
+	/* And show it */
+	show_expression(node, qlabel, planstate, ancestors, useprefix, es);
+}
+
+/*
+ * Show a qualifier expression for a scan plan node
+ */
+static void
+show_scan_qual(List *qual, const char *qlabel,
+			   PlanState *planstate, List *ancestors,
+			   ExplainState *es)
+{
+	bool		useprefix;
+
+	useprefix = (IsA(planstate->plan, SubqueryScan) || es->verbose);
+	show_qual(qual, qlabel, planstate, ancestors, useprefix, es);
+}
+
+/*
+ * Show a qualifier expression for an upper-level plan node
+ */
+static void
+show_upper_qual(List *qual, const char *qlabel,
+				PlanState *planstate, List *ancestors,
+				ExplainState *es)
+{
+	bool		useprefix;
+
+	useprefix = (list_length(es->rtable) > 1 || es->verbose);
+	show_qual(qual, qlabel, planstate, ancestors, useprefix, es);
+}
+
+/*
+ * Show the sort keys for a Sort node.
+ */
+static void
+show_sort_keys(SortState *sortstate, List *ancestors, ExplainState *es)
+{
+	Sort	   *plan = (Sort *) sortstate->ss.ps.plan;
+
+	show_sort_group_keys((PlanState *) sortstate, "Sort Key",
+						 plan->numCols, 0, plan->sortColIdx,
+						 plan->sortOperators, plan->collations,
+						 plan->nullsFirst,
+						 ancestors, es);
+}
+
+/*
+ * Show the sort keys for a IncrementalSort node.
+ */
+static void
+show_incremental_sort_keys(IncrementalSortState *incrsortstate,
+						   List *ancestors, ExplainState *es)
+{
+	IncrementalSort *plan = (IncrementalSort *) incrsortstate->ss.ps.plan;
+
+	show_sort_group_keys((PlanState *) incrsortstate, "Sort Key",
+						 plan->sort.numCols, plan->nPresortedCols,
+						 plan->sort.sortColIdx,
+						 plan->sort.sortOperators, plan->sort.collations,
+						 plan->sort.nullsFirst,
+						 ancestors, es);
+}
+
+/*
+ * Likewise, for a MergeAppend node.
+ */
+static void
+show_merge_append_keys(MergeAppendState *mstate, List *ancestors,
+					   ExplainState *es)
+{
+	MergeAppend *plan = (MergeAppend *) mstate->ps.plan;
+
+	show_sort_group_keys((PlanState *) mstate, "Sort Key",
+						 plan->numCols, 0, plan->sortColIdx,
+						 plan->sortOperators, plan->collations,
+						 plan->nullsFirst,
+						 ancestors, es);
+}
+
+/*
+ * Show the grouping keys for an Agg node.
+ */
+static void
+show_agg_keys(AggState *astate, List *ancestors,
+			  ExplainState *es)
+{
+	Agg		   *plan = (Agg *) astate->ss.ps.plan;
+
+	if (plan->numCols > 0 || plan->groupingSets)
+	{
+		/* The key columns refer to the tlist of the child plan */
+		ancestors = lcons(plan, ancestors);
+
+		if (plan->groupingSets)
+			show_grouping_sets(outerPlanState(astate), plan, ancestors, es);
+		else
+			show_sort_group_keys(outerPlanState(astate), "Group Key",
+								 plan->numCols, 0, plan->grpColIdx,
+								 NULL, NULL, NULL,
+								 ancestors, es);
+
+		ancestors = list_delete_first(ancestors);
+	}
+}
+
+static void
+show_grouping_sets(PlanState *planstate, Agg *agg,
+				   List *ancestors, ExplainState *es)
+{
+	List	   *context;
+	bool		useprefix;
+	ListCell   *lc;
+
+	/* Set up deparsing context */
+	context = set_deparse_context_plan(es->deparse_cxt,
+									   planstate->plan,
+									   ancestors);
+	useprefix = (list_length(es->rtable) > 1 || es->verbose);
+
+	ExplainOpenGroup("Grouping Sets", "Grouping Sets", false, es);
+
+	show_grouping_set_keys(planstate, agg, NULL,
+						   context, useprefix, ancestors, es);
+
+	foreach(lc, agg->chain)
+	{
+		Agg		   *aggnode = lfirst(lc);
+		Sort	   *sortnode = (Sort *) aggnode->plan.lefttree;
+
+		show_grouping_set_keys(planstate, aggnode, sortnode,
+							   context, useprefix, ancestors, es);
+	}
+
+	ExplainCloseGroup("Grouping Sets", "Grouping Sets", false, es);
+}
+
+static void
+show_grouping_set_keys(PlanState *planstate,
+					   Agg *aggnode, Sort *sortnode,
+					   List *context, bool useprefix,
+					   List *ancestors, ExplainState *es)
+{
+	Plan	   *plan = planstate->plan;
+	char	   *exprstr;
+	ListCell   *lc;
+	List	   *gsets = aggnode->groupingSets;
+	AttrNumber *keycols = aggnode->grpColIdx;
+	const char *keyname;
+	const char *keysetname;
+
+	if (aggnode->aggstrategy == AGG_HASHED || aggnode->aggstrategy == AGG_MIXED)
+	{
+		keyname = "Hash Key";
+		keysetname = "Hash Keys";
+	}
+	else
+	{
+		keyname = "Group Key";
+		keysetname = "Group Keys";
+	}
+
+	ExplainOpenGroup("Grouping Set", NULL, true, es);
+
+	if (sortnode)
+	{
+		show_sort_group_keys(planstate, "Sort Key",
+							 sortnode->numCols, 0, sortnode->sortColIdx,
+							 sortnode->sortOperators, sortnode->collations,
+							 sortnode->nullsFirst,
+							 ancestors, es);
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+			es->indent++;
+	}
+
+	ExplainOpenGroup(keysetname, keysetname, false, es);
+
+	foreach(lc, gsets)
+	{
+		List	   *result = NIL;
+		ListCell   *lc2;
+
+		foreach(lc2, (List *) lfirst(lc))
+		{
+			Index		i = lfirst_int(lc2);
+			AttrNumber	keyresno = keycols[i];
+			TargetEntry *target = get_tle_by_resno(plan->targetlist,
+												   keyresno);
+
+			if (!target)
+				elog(ERROR, "no tlist entry for key %d", keyresno);
+			/* Deparse the expression, showing any top-level cast */
+			exprstr = deparse_expression((Node *) target->expr, context,
+										 useprefix, true);
+
+			result = lappend(result, exprstr);
+		}
+
+		if (!result && es->format == EXPLAIN_FORMAT_TEXT)
+			ExplainPropertyText(keyname, "()", es);
+		else
+			ExplainPropertyListNested(keyname, result, es);
+	}
+
+	ExplainCloseGroup(keysetname, keysetname, false, es);
+
+	if (sortnode && es->format == EXPLAIN_FORMAT_TEXT)
+		es->indent--;
+
+	ExplainCloseGroup("Grouping Set", NULL, true, es);
+}
+
+/*
+ * Show the grouping keys for a Group node.
+ */
+static void
+show_group_keys(GroupState *gstate, List *ancestors,
+				ExplainState *es)
+{
+	Group	   *plan = (Group *) gstate->ss.ps.plan;
+
+	/* The key columns refer to the tlist of the child plan */
+	ancestors = lcons(plan, ancestors);
+	show_sort_group_keys(outerPlanState(gstate), "Group Key",
+						 plan->numCols, 0, plan->grpColIdx,
+						 NULL, NULL, NULL,
+						 ancestors, es);
+	ancestors = list_delete_first(ancestors);
+}
+
+/*
+ * Common code to show sort/group keys, which are represented in plan nodes
+ * as arrays of targetlist indexes.  If it's a sort key rather than a group
+ * key, also pass sort operators/collations/nullsFirst arrays.
+ */
+static void
+show_sort_group_keys(PlanState *planstate, const char *qlabel,
+					 int nkeys, int nPresortedKeys, AttrNumber *keycols,
+					 Oid *sortOperators, Oid *collations, bool *nullsFirst,
+					 List *ancestors, ExplainState *es)
+{
+	Plan	   *plan = planstate->plan;
+	List	   *context;
+	List	   *result = NIL;
+	List	   *resultPresorted = NIL;
+	StringInfoData sortkeybuf;
+	bool		useprefix;
+	int			keyno;
+
+	if (nkeys <= 0)
+		return;
+
+	initStringInfo(&sortkeybuf);
+
+	/* Set up deparsing context */
+	context = set_deparse_context_plan(es->deparse_cxt,
+									   plan,
+									   ancestors);
+	useprefix = (list_length(es->rtable) > 1 || es->verbose);
+
+	for (keyno = 0; keyno < nkeys; keyno++)
+	{
+		/* find key expression in tlist */
+		AttrNumber	keyresno = keycols[keyno];
+		TargetEntry *target = get_tle_by_resno(plan->targetlist,
+											   keyresno);
+		char	   *exprstr;
+
+		if (!target)
+			elog(ERROR, "no tlist entry for key %d", keyresno);
+		/* Deparse the expression, showing any top-level cast */
+		exprstr = deparse_expression((Node *) target->expr, context,
+									 useprefix, true);
+		resetStringInfo(&sortkeybuf);
+		appendStringInfoString(&sortkeybuf, exprstr);
+		/* Append sort order information, if relevant */
+		if (sortOperators != NULL)
+			show_sortorder_options(&sortkeybuf,
+								   (Node *) target->expr,
+								   sortOperators[keyno],
+								   collations[keyno],
+								   nullsFirst[keyno]);
+		/* Emit one property-list item per sort key */
+		result = lappend(result, pstrdup(sortkeybuf.data));
+		if (keyno < nPresortedKeys)
+			resultPresorted = lappend(resultPresorted, exprstr);
+	}
+
+	ExplainPropertyList(qlabel, result, es);
+	if (nPresortedKeys > 0)
+		ExplainPropertyList("Presorted Key", resultPresorted, es);
+}
+
+/*
+ * Append nondefault characteristics of the sort ordering of a column to buf
+ * (collation, direction, NULLS FIRST/LAST)
+ */
+static void
+show_sortorder_options(StringInfo buf, Node *sortexpr,
+					   Oid sortOperator, Oid collation, bool nullsFirst)
+{
+	Oid			sortcoltype = exprType(sortexpr);
+	bool		reverse = false;
+	TypeCacheEntry *typentry;
+
+	typentry = lookup_type_cache(sortcoltype,
+								 TYPECACHE_LT_OPR | TYPECACHE_GT_OPR);
+
+	/*
+	 * Print COLLATE if it's not default for the column's type.  There are
+	 * some cases where this is redundant, eg if expression is a column whose
+	 * declared collation is that collation, but it's hard to distinguish that
+	 * here (and arguably, printing COLLATE explicitly is a good idea anyway
+	 * in such cases).
+	 */
+	if (OidIsValid(collation) && collation != get_typcollation(sortcoltype))
+	{
+		char	   *collname = get_collation_name(collation);
+
+		if (collname == NULL)
+			elog(ERROR, "cache lookup failed for collation %u", collation);
+		appendStringInfo(buf, " COLLATE %s", quote_identifier(collname));
+	}
+
+	/* Print direction if not ASC, or USING if non-default sort operator */
+	if (sortOperator == typentry->gt_opr)
+	{
+		appendStringInfoString(buf, " DESC");
+		reverse = true;
+	}
+	else if (sortOperator != typentry->lt_opr)
+	{
+		char	   *opname = get_opname(sortOperator);
+
+		if (opname == NULL)
+			elog(ERROR, "cache lookup failed for operator %u", sortOperator);
+		appendStringInfo(buf, " USING %s", opname);
+		/* Determine whether operator would be considered ASC or DESC */
+		(void) get_equality_op_for_ordering_op(sortOperator, &reverse);
+	}
+
+	/* Add NULLS FIRST/LAST only if it wouldn't be default */
+	if (nullsFirst && !reverse)
+	{
+		appendStringInfoString(buf, " NULLS FIRST");
+	}
+	else if (!nullsFirst && reverse)
+	{
+		appendStringInfoString(buf, " NULLS LAST");
+	}
+}
+
+/*
+ * Show TABLESAMPLE properties
+ */
+static void
+show_tablesample(TableSampleClause *tsc, PlanState *planstate,
+				 List *ancestors, ExplainState *es)
+{
+	List	   *context;
+	bool		useprefix;
+	char	   *method_name;
+	List	   *params = NIL;
+	char	   *repeatable;
+	ListCell   *lc;
+
+	/* Set up deparsing context */
+	context = set_deparse_context_plan(es->deparse_cxt,
+									   planstate->plan,
+									   ancestors);
+	useprefix = list_length(es->rtable) > 1;
+
+	/* Get the tablesample method name */
+	method_name = get_func_name(tsc->tsmhandler);
+
+	/* Deparse parameter expressions */
+	foreach(lc, tsc->args)
+	{
+		Node	   *arg = (Node *) lfirst(lc);
+
+		params = lappend(params,
+						 deparse_expression(arg, context,
+											useprefix, false));
+	}
+	if (tsc->repeatable)
+		repeatable = deparse_expression((Node *) tsc->repeatable, context,
+										useprefix, false);
+	else
+		repeatable = NULL;
+
+	/* Print results */
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		bool		first = true;
+
+		ExplainIndentText(es);
+		appendStringInfo(es->str, "Sampling: %s (", method_name);
+		foreach(lc, params)
+		{
+			if (!first)
+				appendStringInfoString(es->str, ", ");
+			appendStringInfoString(es->str, (const char *) lfirst(lc));
+			first = false;
+		}
+		appendStringInfoChar(es->str, ')');
+		if (repeatable)
+			appendStringInfo(es->str, " REPEATABLE (%s)", repeatable);
+		appendStringInfoChar(es->str, '\n');
+	}
+	else
+	{
+		ExplainPropertyText("Sampling Method", method_name, es);
+		ExplainPropertyList("Sampling Parameters", params, es);
+		if (repeatable)
+			ExplainPropertyText("Repeatable Seed", repeatable, es);
+	}
+}
+
+/*
+ * If it's EXPLAIN ANALYZE, show tuplesort stats for a sort node
+ */
+static void
+show_sort_info(SortState *sortstate, ExplainState *es)
+{
+	if (!es->analyze)
+		return;
+
+	if (sortstate->sort_Done && sortstate->tuplesortstate != NULL)
+	{
+		Tuplesortstate *state = (Tuplesortstate *) sortstate->tuplesortstate;
+		TuplesortInstrumentation stats;
+		const char *sortMethod;
+		const char *spaceType;
+		int64		spaceUsed;
+
+		tuplesort_get_stats(state, &stats);
+		sortMethod = tuplesort_method_name(stats.sortMethod);
+		spaceType = tuplesort_space_type_name(stats.spaceType);
+		spaceUsed = stats.spaceUsed;
+
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+		{
+			ExplainIndentText(es);
+			appendStringInfo(es->str, "Sort Method: %s  %s: " INT64_FORMAT "kB\n",
+							 sortMethod, spaceType, spaceUsed);
+		}
+		else
+		{
+			ExplainPropertyText("Sort Method", sortMethod, es);
+			ExplainPropertyInteger("Sort Space Used", "kB", spaceUsed, es);
+			ExplainPropertyText("Sort Space Type", spaceType, es);
+		}
+	}
+
+	/*
+	 * You might think we should just skip this stanza entirely when
+	 * es->hide_workers is true, but then we'd get no sort-method output at
+	 * all.  We have to make it look like worker 0's data is top-level data.
+	 * This is easily done by just skipping the OpenWorker/CloseWorker calls.
+	 * Currently, we don't worry about the possibility that there are multiple
+	 * workers in such a case; if there are, duplicate output fields will be
+	 * emitted.
+	 */
+	if (sortstate->shared_info != NULL)
+	{
+		int			n;
+
+		for (n = 0; n < sortstate->shared_info->num_workers; n++)
+		{
+			TuplesortInstrumentation *sinstrument;
+			const char *sortMethod;
+			const char *spaceType;
+			int64		spaceUsed;
+
+			sinstrument = &sortstate->shared_info->sinstrument[n];
+			if (sinstrument->sortMethod == SORT_TYPE_STILL_IN_PROGRESS)
+				continue;		/* ignore any unfilled slots */
+			sortMethod = tuplesort_method_name(sinstrument->sortMethod);
+			spaceType = tuplesort_space_type_name(sinstrument->spaceType);
+			spaceUsed = sinstrument->spaceUsed;
+
+			if (es->workers_state)
+				ExplainOpenWorker(n, es);
+
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+			{
+				ExplainIndentText(es);
+				appendStringInfo(es->str,
+								 "Sort Method: %s  %s: " INT64_FORMAT "kB\n",
+								 sortMethod, spaceType, spaceUsed);
+			}
+			else
+			{
+				ExplainPropertyText("Sort Method", sortMethod, es);
+				ExplainPropertyInteger("Sort Space Used", "kB", spaceUsed, es);
+				ExplainPropertyText("Sort Space Type", spaceType, es);
+			}
+
+			if (es->workers_state)
+				ExplainCloseWorker(n, es);
+		}
+	}
+}
+
+/*
+ * Incremental sort nodes sort in (a potentially very large number of) batches,
+ * so EXPLAIN ANALYZE needs to roll up the tuplesort stats from each batch into
+ * an intelligible summary.
+ *
+ * This function is used for both a non-parallel node and each worker in a
+ * parallel incremental sort node.
+ */
+static void
+show_incremental_sort_group_info(IncrementalSortGroupInfo *groupInfo,
+								 const char *groupLabel, bool indent, ExplainState *es)
+{
+	ListCell   *methodCell;
+	List	   *methodNames = NIL;
+
+	/* Generate a list of sort methods used across all groups. */
+	for (int bit = 0; bit < NUM_TUPLESORTMETHODS; bit++)
+	{
+		TuplesortMethod sortMethod = (1 << bit);
+
+		if (groupInfo->sortMethods & sortMethod)
+		{
+			const char *methodName = tuplesort_method_name(sortMethod);
+
+			methodNames = lappend(methodNames, unconstify(char *, methodName));
+		}
+	}
+
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		if (indent)
+			appendStringInfoSpaces(es->str, es->indent * 2);
+		appendStringInfo(es->str, "%s Groups: " INT64_FORMAT "  Sort Method", groupLabel,
+						 groupInfo->groupCount);
+		/* plural/singular based on methodNames size */
+		if (list_length(methodNames) > 1)
+			appendStringInfoString(es->str, "s: ");
+		else
+			appendStringInfoString(es->str, ": ");
+		foreach(methodCell, methodNames)
+		{
+			appendStringInfoString(es->str, (char *) methodCell->ptr_value);
+			if (foreach_current_index(methodCell) < list_length(methodNames) - 1)
+				appendStringInfoString(es->str, ", ");
+		}
+
+		if (groupInfo->maxMemorySpaceUsed > 0)
+		{
+			int64		avgSpace = groupInfo->totalMemorySpaceUsed / groupInfo->groupCount;
+			const char *spaceTypeName;
+
+			spaceTypeName = tuplesort_space_type_name(SORT_SPACE_TYPE_MEMORY);
+			appendStringInfo(es->str, "  Average %s: " INT64_FORMAT "kB  Peak %s: " INT64_FORMAT "kB",
+							 spaceTypeName, avgSpace,
+							 spaceTypeName, groupInfo->maxMemorySpaceUsed);
+		}
+
+		if (groupInfo->maxDiskSpaceUsed > 0)
+		{
+			int64		avgSpace = groupInfo->totalDiskSpaceUsed / groupInfo->groupCount;
+
+			const char *spaceTypeName;
+
+			spaceTypeName = tuplesort_space_type_name(SORT_SPACE_TYPE_DISK);
+			appendStringInfo(es->str, "  Average %s: " INT64_FORMAT "kB  Peak %s: " INT64_FORMAT "kB",
+							 spaceTypeName, avgSpace,
+							 spaceTypeName, groupInfo->maxDiskSpaceUsed);
+		}
+	}
+	else
+	{
+		StringInfoData groupName;
+
+		initStringInfo(&groupName);
+		appendStringInfo(&groupName, "%s Groups", groupLabel);
+		ExplainOpenGroup("Incremental Sort Groups", groupName.data, true, es);
+		ExplainPropertyInteger("Group Count", NULL, groupInfo->groupCount, es);
+
+		ExplainPropertyList("Sort Methods Used", methodNames, es);
+
+		if (groupInfo->maxMemorySpaceUsed > 0)
+		{
+			int64		avgSpace = groupInfo->totalMemorySpaceUsed / groupInfo->groupCount;
+			const char *spaceTypeName;
+			StringInfoData memoryName;
+
+			spaceTypeName = tuplesort_space_type_name(SORT_SPACE_TYPE_MEMORY);
+			initStringInfo(&memoryName);
+			appendStringInfo(&memoryName, "Sort Space %s", spaceTypeName);
+			ExplainOpenGroup("Sort Space", memoryName.data, true, es);
+
+			ExplainPropertyInteger("Average Sort Space Used", "kB", avgSpace, es);
+			ExplainPropertyInteger("Peak Sort Space Used", "kB",
+								   groupInfo->maxMemorySpaceUsed, es);
+
+			ExplainCloseGroup("Sort Space", memoryName.data, true, es);
+		}
+		if (groupInfo->maxDiskSpaceUsed > 0)
+		{
+			int64		avgSpace = groupInfo->totalDiskSpaceUsed / groupInfo->groupCount;
+			const char *spaceTypeName;
+			StringInfoData diskName;
+
+			spaceTypeName = tuplesort_space_type_name(SORT_SPACE_TYPE_DISK);
+			initStringInfo(&diskName);
+			appendStringInfo(&diskName, "Sort Space %s", spaceTypeName);
+			ExplainOpenGroup("Sort Space", diskName.data, true, es);
+
+			ExplainPropertyInteger("Average Sort Space Used", "kB", avgSpace, es);
+			ExplainPropertyInteger("Peak Sort Space Used", "kB",
+								   groupInfo->maxDiskSpaceUsed, es);
+
+			ExplainCloseGroup("Sort Space", diskName.data, true, es);
+		}
+
+		ExplainCloseGroup("Incremental Sort Groups", groupName.data, true, es);
+	}
+}
+
+/*
+ * If it's EXPLAIN ANALYZE, show tuplesort stats for an incremental sort node
+ */
+static void
+show_incremental_sort_info(IncrementalSortState *incrsortstate,
+						   ExplainState *es)
+{
+	IncrementalSortGroupInfo *fullsortGroupInfo;
+	IncrementalSortGroupInfo *prefixsortGroupInfo;
+
+	fullsortGroupInfo = &incrsortstate->incsort_info.fullsortGroupInfo;
+
+	if (!es->analyze)
+		return;
+
+	/*
+	 * Since we never have any prefix groups unless we've first sorted a full
+	 * groups and transitioned modes (copying the tuples into a prefix group),
+	 * we don't need to do anything if there were 0 full groups.
+	 *
+	 * We still have to continue after this block if there are no full groups,
+	 * though, since it's possible that we have workers that did real work
+	 * even if the leader didn't participate.
+	 */
+	if (fullsortGroupInfo->groupCount > 0)
+	{
+		show_incremental_sort_group_info(fullsortGroupInfo, "Full-sort", true, es);
+		prefixsortGroupInfo = &incrsortstate->incsort_info.prefixsortGroupInfo;
+		if (prefixsortGroupInfo->groupCount > 0)
+		{
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+				appendStringInfoChar(es->str, '\n');
+			show_incremental_sort_group_info(prefixsortGroupInfo, "Pre-sorted", true, es);
+		}
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+			appendStringInfoChar(es->str, '\n');
+	}
+
+	if (incrsortstate->shared_info != NULL)
+	{
+		int			n;
+		bool		indent_first_line;
+
+		for (n = 0; n < incrsortstate->shared_info->num_workers; n++)
+		{
+			IncrementalSortInfo *incsort_info =
+			&incrsortstate->shared_info->sinfo[n];
+
+			/*
+			 * If a worker hasn't processed any sort groups at all, then
+			 * exclude it from output since it either didn't launch or didn't
+			 * contribute anything meaningful.
+			 */
+			fullsortGroupInfo = &incsort_info->fullsortGroupInfo;
+
+			/*
+			 * Since we never have any prefix groups unless we've first sorted
+			 * a full groups and transitioned modes (copying the tuples into a
+			 * prefix group), we don't need to do anything if there were 0
+			 * full groups.
+			 */
+			if (fullsortGroupInfo->groupCount == 0)
+				continue;
+
+			if (es->workers_state)
+				ExplainOpenWorker(n, es);
+
+			indent_first_line = es->workers_state == NULL || es->verbose;
+			show_incremental_sort_group_info(fullsortGroupInfo, "Full-sort",
+											 indent_first_line, es);
+			prefixsortGroupInfo = &incsort_info->prefixsortGroupInfo;
+			if (prefixsortGroupInfo->groupCount > 0)
+			{
+				if (es->format == EXPLAIN_FORMAT_TEXT)
+					appendStringInfoChar(es->str, '\n');
+				show_incremental_sort_group_info(prefixsortGroupInfo, "Pre-sorted", true, es);
+			}
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+				appendStringInfoChar(es->str, '\n');
+
+			if (es->workers_state)
+				ExplainCloseWorker(n, es);
+		}
+	}
+}
+
+/*
+ * Show information on hash buckets/batches.
+ */
+static void
+show_hash_info(HashState *hashstate, ExplainState *es)
+{
+	HashInstrumentation hinstrument = {0};
+
+	/*
+	 * Collect stats from the local process, even when it's a parallel query.
+	 * In a parallel query, the leader process may or may not have run the
+	 * hash join, and even if it did it may not have built a hash table due to
+	 * timing (if it started late it might have seen no tuples in the outer
+	 * relation and skipped building the hash table).  Therefore we have to be
+	 * prepared to get instrumentation data from all participants.
+	 */
+	if (hashstate->hinstrument)
+		memcpy(&hinstrument, hashstate->hinstrument,
+			   sizeof(HashInstrumentation));
+
+	/*
+	 * Merge results from workers.  In the parallel-oblivious case, the
+	 * results from all participants should be identical, except where
+	 * participants didn't run the join at all so have no data.  In the
+	 * parallel-aware case, we need to consider all the results.  Each worker
+	 * may have seen a different subset of batches and we want to report the
+	 * highest memory usage across all batches.  We take the maxima of other
+	 * values too, for the same reasons as in ExecHashAccumInstrumentation.
+	 */
+	if (hashstate->shared_info)
+	{
+		SharedHashInfo *shared_info = hashstate->shared_info;
+		int			i;
+
+		for (i = 0; i < shared_info->num_workers; ++i)
+		{
+			HashInstrumentation *worker_hi = &shared_info->hinstrument[i];
+
+			hinstrument.nbuckets = Max(hinstrument.nbuckets,
+									   worker_hi->nbuckets);
+			hinstrument.nbuckets_original = Max(hinstrument.nbuckets_original,
+												worker_hi->nbuckets_original);
+			hinstrument.nbatch = Max(hinstrument.nbatch,
+									 worker_hi->nbatch);
+			hinstrument.nbatch_original = Max(hinstrument.nbatch_original,
+											  worker_hi->nbatch_original);
+			hinstrument.space_peak = Max(hinstrument.space_peak,
+										 worker_hi->space_peak);
+		}
+	}
+
+	if (hinstrument.nbatch > 0)
+	{
+		long		spacePeakKb = (hinstrument.space_peak + 1023) / 1024;
+
+		if (es->format != EXPLAIN_FORMAT_TEXT)
+		{
+			ExplainPropertyInteger("Hash Buckets", NULL,
+								   hinstrument.nbuckets, es);
+			ExplainPropertyInteger("Original Hash Buckets", NULL,
+								   hinstrument.nbuckets_original, es);
+			ExplainPropertyInteger("Hash Batches", NULL,
+								   hinstrument.nbatch, es);
+			ExplainPropertyInteger("Original Hash Batches", NULL,
+								   hinstrument.nbatch_original, es);
+			ExplainPropertyInteger("Peak Memory Usage", "kB",
+								   spacePeakKb, es);
+		}
+		else if (hinstrument.nbatch_original != hinstrument.nbatch ||
+				 hinstrument.nbuckets_original != hinstrument.nbuckets)
+		{
+			ExplainIndentText(es);
+			appendStringInfo(es->str,
+							 "Buckets: %d (originally %d)  Batches: %d (originally %d)  Memory Usage: %ldkB\n",
+							 hinstrument.nbuckets,
+							 hinstrument.nbuckets_original,
+							 hinstrument.nbatch,
+							 hinstrument.nbatch_original,
+							 spacePeakKb);
+		}
+		else
+		{
+			ExplainIndentText(es);
+			appendStringInfo(es->str,
+							 "Buckets: %d  Batches: %d  Memory Usage: %ldkB\n",
+							 hinstrument.nbuckets, hinstrument.nbatch,
+							 spacePeakKb);
+		}
+	}
+}
+
+/*
+ * Show information on memoize hits/misses/evictions and memory usage.
+ */
+static void
+show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es)
+{
+	Plan	   *plan = ((PlanState *) mstate)->plan;
+	ListCell   *lc;
+	List	   *context;
+	StringInfoData keystr;
+	char	   *separator = "";
+	bool		useprefix;
+	int64		memPeakKb;
+
+	initStringInfo(&keystr);
+
+	/*
+	 * It's hard to imagine having a memoize node with fewer than 2 RTEs, but
+	 * let's just keep the same useprefix logic as elsewhere in this file.
+	 */
+	useprefix = list_length(es->rtable) > 1 || es->verbose;
+
+	/* Set up deparsing context */
+	context = set_deparse_context_plan(es->deparse_cxt,
+									   plan,
+									   ancestors);
+
+	foreach(lc, ((Memoize *) plan)->param_exprs)
+	{
+		Node	   *expr = (Node *) lfirst(lc);
+
+		appendStringInfoString(&keystr, separator);
+
+		appendStringInfoString(&keystr, deparse_expression(expr, context,
+														   useprefix, false));
+		separator = ", ";
+	}
+
+	if (es->format != EXPLAIN_FORMAT_TEXT)
+	{
+		ExplainPropertyText("Cache Key", keystr.data, es);
+		ExplainPropertyText("Cache Mode", mstate->binary_mode ? "binary" : "logical", es);
+	}
+	else
+	{
+		ExplainIndentText(es);
+		appendStringInfo(es->str, "Cache Key: %s\n", keystr.data);
+		ExplainIndentText(es);
+		appendStringInfo(es->str, "Cache Mode: %s\n", mstate->binary_mode ? "binary" : "logical");
+	}
+
+	pfree(keystr.data);
+
+	if (!es->analyze)
+		return;
+
+	if (mstate->stats.cache_misses > 0)
+	{
+		/*
+		 * mem_peak is only set when we freed memory, so we must use mem_used
+		 * when mem_peak is 0.
+		 */
+		if (mstate->stats.mem_peak > 0)
+			memPeakKb = (mstate->stats.mem_peak + 1023) / 1024;
+		else
+			memPeakKb = (mstate->mem_used + 1023) / 1024;
+
+		if (es->format != EXPLAIN_FORMAT_TEXT)
+		{
+			ExplainPropertyInteger("Cache Hits", NULL, mstate->stats.cache_hits, es);
+			ExplainPropertyInteger("Cache Misses", NULL, mstate->stats.cache_misses, es);
+			ExplainPropertyInteger("Cache Evictions", NULL, mstate->stats.cache_evictions, es);
+			ExplainPropertyInteger("Cache Overflows", NULL, mstate->stats.cache_overflows, es);
+			ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb, es);
+		}
+		else
+		{
+			ExplainIndentText(es);
+			appendStringInfo(es->str,
+							 "Hits: " UINT64_FORMAT "  Misses: " UINT64_FORMAT "  Evictions: " UINT64_FORMAT "  Overflows: " UINT64_FORMAT "  Memory Usage: " INT64_FORMAT "kB\n",
+							 mstate->stats.cache_hits,
+							 mstate->stats.cache_misses,
+							 mstate->stats.cache_evictions,
+							 mstate->stats.cache_overflows,
+							 memPeakKb);
+		}
+	}
+
+	if (mstate->shared_info == NULL)
+		return;
+
+	/* Show details from parallel workers */
+	for (int n = 0; n < mstate->shared_info->num_workers; n++)
+	{
+		MemoizeInstrumentation *si;
+
+		si = &mstate->shared_info->sinstrument[n];
+
+		/*
+		 * Skip workers that didn't do any work.  We needn't bother checking
+		 * for cache hits as a miss will always occur before a cache hit.
+		 */
+		if (si->cache_misses == 0)
+			continue;
+
+		if (es->workers_state)
+			ExplainOpenWorker(n, es);
+
+		/*
+		 * Since the worker's MemoizeState.mem_used field is unavailable to
+		 * us, ExecEndMemoize will have set the
+		 * MemoizeInstrumentation.mem_peak field for us.  No need to do the
+		 * zero checks like we did for the serial case above.
+		 */
+		memPeakKb = (si->mem_peak + 1023) / 1024;
+
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+		{
+			ExplainIndentText(es);
+			appendStringInfo(es->str,
+							 "Hits: " UINT64_FORMAT "  Misses: " UINT64_FORMAT "  Evictions: " UINT64_FORMAT "  Overflows: " UINT64_FORMAT "  Memory Usage: " INT64_FORMAT "kB\n",
+							 si->cache_hits, si->cache_misses,
+							 si->cache_evictions, si->cache_overflows,
+							 memPeakKb);
+		}
+		else
+		{
+			ExplainPropertyInteger("Cache Hits", NULL,
+								   si->cache_hits, es);
+			ExplainPropertyInteger("Cache Misses", NULL,
+								   si->cache_misses, es);
+			ExplainPropertyInteger("Cache Evictions", NULL,
+								   si->cache_evictions, es);
+			ExplainPropertyInteger("Cache Overflows", NULL,
+								   si->cache_overflows, es);
+			ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb,
+								   es);
+		}
+
+		if (es->workers_state)
+			ExplainCloseWorker(n, es);
+	}
+}
+
+/*
+ * Show information on hash aggregate memory usage and batches.
+ */
+static void
+show_hashagg_info(AggState *aggstate, ExplainState *es)
+{
+	Agg		   *agg = (Agg *) aggstate->ss.ps.plan;
+	int64		memPeakKb = (aggstate->hash_mem_peak + 1023) / 1024;
+
+	if (agg->aggstrategy != AGG_HASHED &&
+		agg->aggstrategy != AGG_MIXED)
+		return;
+
+	if (es->format != EXPLAIN_FORMAT_TEXT)
+	{
+
+		if (es->costs)
+			ExplainPropertyInteger("Planned Partitions", NULL,
+								   aggstate->hash_planned_partitions, es);
+
+		/*
+		 * During parallel query the leader may have not helped out.  We
+		 * detect this by checking how much memory it used.  If we find it
+		 * didn't do any work then we don't show its properties.
+		 */
+		if (es->analyze && aggstate->hash_mem_peak > 0)
+		{
+			ExplainPropertyInteger("HashAgg Batches", NULL,
+								   aggstate->hash_batches_used, es);
+			ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb, es);
+			ExplainPropertyInteger("Disk Usage", "kB",
+								   aggstate->hash_disk_used, es);
+		}
+	}
+	else
+	{
+		bool		gotone = false;
+
+		if (es->costs && aggstate->hash_planned_partitions > 0)
+		{
+			ExplainIndentText(es);
+			appendStringInfo(es->str, "Planned Partitions: %d",
+							 aggstate->hash_planned_partitions);
+			gotone = true;
+		}
+
+		/*
+		 * During parallel query the leader may have not helped out.  We
+		 * detect this by checking how much memory it used.  If we find it
+		 * didn't do any work then we don't show its properties.
+		 */
+		if (es->analyze && aggstate->hash_mem_peak > 0)
+		{
+			if (!gotone)
+				ExplainIndentText(es);
+			else
+				appendStringInfoString(es->str, "  ");
+
+			appendStringInfo(es->str, "Batches: %d  Memory Usage: " INT64_FORMAT "kB",
+							 aggstate->hash_batches_used, memPeakKb);
+			gotone = true;
+
+			/* Only display disk usage if we spilled to disk */
+			if (aggstate->hash_batches_used > 1)
+			{
+				appendStringInfo(es->str, "  Disk Usage: " UINT64_FORMAT "kB",
+								 aggstate->hash_disk_used);
+			}
+		}
+
+		if (gotone)
+			appendStringInfoChar(es->str, '\n');
+	}
+
+	/* Display stats for each parallel worker */
+	if (es->analyze && aggstate->shared_info != NULL)
+	{
+		for (int n = 0; n < aggstate->shared_info->num_workers; n++)
+		{
+			AggregateInstrumentation *sinstrument;
+			uint64		hash_disk_used;
+			int			hash_batches_used;
+
+			sinstrument = &aggstate->shared_info->sinstrument[n];
+			/* Skip workers that didn't do anything */
+			if (sinstrument->hash_mem_peak == 0)
+				continue;
+			hash_disk_used = sinstrument->hash_disk_used;
+			hash_batches_used = sinstrument->hash_batches_used;
+			memPeakKb = (sinstrument->hash_mem_peak + 1023) / 1024;
+
+			if (es->workers_state)
+				ExplainOpenWorker(n, es);
+
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+			{
+				ExplainIndentText(es);
+
+				appendStringInfo(es->str, "Batches: %d  Memory Usage: " INT64_FORMAT "kB",
+								 hash_batches_used, memPeakKb);
+
+				/* Only display disk usage if we spilled to disk */
+				if (hash_batches_used > 1)
+					appendStringInfo(es->str, "  Disk Usage: " UINT64_FORMAT "kB",
+									 hash_disk_used);
+				appendStringInfoChar(es->str, '\n');
+			}
+			else
+			{
+				ExplainPropertyInteger("HashAgg Batches", NULL,
+									   hash_batches_used, es);
+				ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb,
+									   es);
+				ExplainPropertyInteger("Disk Usage", "kB", hash_disk_used, es);
+			}
+
+			if (es->workers_state)
+				ExplainCloseWorker(n, es);
+		}
+	}
+}
+
+/*
+ * If it's EXPLAIN ANALYZE, show exact/lossy pages for a BitmapHeapScan node
+ */
+static void
+show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
+{
+	if (es->format != EXPLAIN_FORMAT_TEXT)
+	{
+		ExplainPropertyInteger("Exact Heap Blocks", NULL,
+							   planstate->exact_pages, es);
+		ExplainPropertyInteger("Lossy Heap Blocks", NULL,
+							   planstate->lossy_pages, es);
+	}
+	else
+	{
+		if (planstate->exact_pages > 0 || planstate->lossy_pages > 0)
+		{
+			ExplainIndentText(es);
+			appendStringInfoString(es->str, "Heap Blocks:");
+			if (planstate->exact_pages > 0)
+				appendStringInfo(es->str, " exact=%ld", planstate->exact_pages);
+			if (planstate->lossy_pages > 0)
+				appendStringInfo(es->str, " lossy=%ld", planstate->lossy_pages);
+			appendStringInfoChar(es->str, '\n');
+		}
+	}
+}
+
+/*
+ * If it's EXPLAIN ANALYZE, show instrumentation information for a plan node
+ *
+ * "which" identifies which instrumentation counter to print
+ */
+static void
+show_instrumentation_count(const char *qlabel, int which,
+						   PlanState *planstate, ExplainState *es)
+{
+	double		nfiltered;
+	double		nloops;
+
+	if (!es->analyze || !planstate->instrument)
+		return;
+
+	if (which == 2)
+		nfiltered = planstate->instrument->nfiltered2;
+	else
+		nfiltered = planstate->instrument->nfiltered1;
+	nloops = planstate->instrument->nloops;
+
+	/* In text mode, suppress zero counts; they're not interesting enough */
+	if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT)
+	{
+		if (nloops > 0)
+			ExplainPropertyFloat(qlabel, NULL, nfiltered / nloops, 0, es);
+		else
+			ExplainPropertyFloat(qlabel, NULL, 0.0, 0, es);
+	}
+}
+
+/*
+ * Show extra information for a ForeignScan node.
+ */
+static void
+show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es)
+{
+	FdwRoutine *fdwroutine = fsstate->fdwroutine;
+
+	/* Let the FDW emit whatever fields it wants */
+	if (((ForeignScan *) fsstate->ss.ps.plan)->operation != CMD_SELECT)
+	{
+		if (fdwroutine->ExplainDirectModify != NULL)
+			fdwroutine->ExplainDirectModify(fsstate, es);
+	}
+	else
+	{
+		if (fdwroutine->ExplainForeignScan != NULL)
+			fdwroutine->ExplainForeignScan(fsstate, es);
+	}
+}
+
+/*
+ * Show initplan params evaluated at Gather or Gather Merge node.
+ */
+static void
+show_eval_params(Bitmapset *bms_params, ExplainState *es)
+{
+	int			paramid = -1;
+	List	   *params = NIL;
+
+	Assert(bms_params);
+
+	while ((paramid = bms_next_member(bms_params, paramid)) >= 0)
+	{
+		char		param[32];
+
+		snprintf(param, sizeof(param), "$%d", paramid);
+		params = lappend(params, pstrdup(param));
+	}
+
+	if (params)
+		ExplainPropertyList("Params Evaluated", params, es);
+}
+
+/*
+ * Fetch the name of an index in an EXPLAIN
+ *
+ * We allow plugins to get control here so that plans involving hypothetical
+ * indexes can be explained.
+ *
+ * Note: names returned by this function should be "raw"; the caller will
+ * apply quoting if needed.  Formerly the convention was to do quoting here,
+ * but we don't want that in non-text output formats.
+ */
+static const char *
+explain_get_index_name(Oid indexId)
+{
+	const char *result;
+
+	if (explain_get_index_name_hook)
+		result = (*explain_get_index_name_hook) (indexId);
+	else
+		result = NULL;
+	if (result == NULL)
+	{
+		/* default behavior: look it up in the catalogs */
+		result = get_rel_name(indexId);
+		if (result == NULL)
+			elog(ERROR, "cache lookup failed for index %u", indexId);
+	}
+	return result;
+}
+
+/*
+ * Show buffer usage details.
+ */
+static void
+show_buffer_usage(ExplainState *es, const BufferUsage *usage, bool planning)
+{
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		bool		has_shared = (usage->shared_blks_hit > 0 ||
+								  usage->shared_blks_read > 0 ||
+								  usage->shared_blks_dirtied > 0 ||
+								  usage->shared_blks_written > 0);
+		bool		has_local = (usage->local_blks_hit > 0 ||
+								 usage->local_blks_read > 0 ||
+								 usage->local_blks_dirtied > 0 ||
+								 usage->local_blks_written > 0);
+		bool		has_temp = (usage->temp_blks_read > 0 ||
+								usage->temp_blks_written > 0);
+		bool		has_timing = (!INSTR_TIME_IS_ZERO(usage->blk_read_time) ||
+								  !INSTR_TIME_IS_ZERO(usage->blk_write_time));
+		bool		has_temp_timing = (!INSTR_TIME_IS_ZERO(usage->temp_blk_read_time) ||
+									   !INSTR_TIME_IS_ZERO(usage->temp_blk_write_time));
+		bool		show_planning = (planning && (has_shared ||
+												  has_local || has_temp || has_timing ||
+												  has_temp_timing));
+
+		if (show_planning)
+		{
+			ExplainIndentText(es);
+			appendStringInfoString(es->str, "Planning:\n");
+			es->indent++;
+		}
+
+		/* Show only positive counter values. */
+		if (has_shared || has_local || has_temp)
+		{
+			ExplainIndentText(es);
+			appendStringInfoString(es->str, "Buffers:");
+
+			if (has_shared)
+			{
+				appendStringInfoString(es->str, " shared");
+				if (usage->shared_blks_hit > 0)
+					appendStringInfo(es->str, " hit=%lld",
+									 (long long) usage->shared_blks_hit);
+				if (usage->shared_blks_read > 0)
+					appendStringInfo(es->str, " read=%lld",
+									 (long long) usage->shared_blks_read);
+				if (usage->shared_blks_dirtied > 0)
+					appendStringInfo(es->str, " dirtied=%lld",
+									 (long long) usage->shared_blks_dirtied);
+				if (usage->shared_blks_written > 0)
+					appendStringInfo(es->str, " written=%lld",
+									 (long long) usage->shared_blks_written);
+				if (has_local || has_temp)
+					appendStringInfoChar(es->str, ',');
+			}
+			if (has_local)
+			{
+				appendStringInfoString(es->str, " local");
+				if (usage->local_blks_hit > 0)
+					appendStringInfo(es->str, " hit=%lld",
+									 (long long) usage->local_blks_hit);
+				if (usage->local_blks_read > 0)
+					appendStringInfo(es->str, " read=%lld",
+									 (long long) usage->local_blks_read);
+				if (usage->local_blks_dirtied > 0)
+					appendStringInfo(es->str, " dirtied=%lld",
+									 (long long) usage->local_blks_dirtied);
+				if (usage->local_blks_written > 0)
+					appendStringInfo(es->str, " written=%lld",
+									 (long long) usage->local_blks_written);
+				if (has_temp)
+					appendStringInfoChar(es->str, ',');
+			}
+			if (has_temp)
+			{
+				appendStringInfoString(es->str, " temp");
+				if (usage->temp_blks_read > 0)
+					appendStringInfo(es->str, " read=%lld",
+									 (long long) usage->temp_blks_read);
+				if (usage->temp_blks_written > 0)
+					appendStringInfo(es->str, " written=%lld",
+									 (long long) usage->temp_blks_written);
+			}
+			appendStringInfoChar(es->str, '\n');
+		}
+
+		/* As above, show only positive counter values. */
+		if (has_timing || has_temp_timing)
+		{
+			ExplainIndentText(es);
+			appendStringInfoString(es->str, "I/O Timings:");
+
+			if (has_timing)
+			{
+				appendStringInfoString(es->str, " shared/local");
+				if (!INSTR_TIME_IS_ZERO(usage->blk_read_time))
+					appendStringInfo(es->str, " read=%0.3f",
+									 INSTR_TIME_GET_MILLISEC(usage->blk_read_time));
+				if (!INSTR_TIME_IS_ZERO(usage->blk_write_time))
+					appendStringInfo(es->str, " write=%0.3f",
+									 INSTR_TIME_GET_MILLISEC(usage->blk_write_time));
+				if (has_temp_timing)
+					appendStringInfoChar(es->str, ',');
+			}
+			if (has_temp_timing)
+			{
+				appendStringInfoString(es->str, " temp");
+				if (!INSTR_TIME_IS_ZERO(usage->temp_blk_read_time))
+					appendStringInfo(es->str, " read=%0.3f",
+									 INSTR_TIME_GET_MILLISEC(usage->temp_blk_read_time));
+				if (!INSTR_TIME_IS_ZERO(usage->temp_blk_write_time))
+					appendStringInfo(es->str, " write=%0.3f",
+									 INSTR_TIME_GET_MILLISEC(usage->temp_blk_write_time));
+			}
+			appendStringInfoChar(es->str, '\n');
+		}
+
+		if (show_planning)
+			es->indent--;
+	}
+	else
+	{
+		ExplainPropertyInteger("Shared Hit Blocks", NULL,
+							   usage->shared_blks_hit, es);
+		ExplainPropertyInteger("Shared Read Blocks", NULL,
+							   usage->shared_blks_read, es);
+		ExplainPropertyInteger("Shared Dirtied Blocks", NULL,
+							   usage->shared_blks_dirtied, es);
+		ExplainPropertyInteger("Shared Written Blocks", NULL,
+							   usage->shared_blks_written, es);
+		ExplainPropertyInteger("Local Hit Blocks", NULL,
+							   usage->local_blks_hit, es);
+		ExplainPropertyInteger("Local Read Blocks", NULL,
+							   usage->local_blks_read, es);
+		ExplainPropertyInteger("Local Dirtied Blocks", NULL,
+							   usage->local_blks_dirtied, es);
+		ExplainPropertyInteger("Local Written Blocks", NULL,
+							   usage->local_blks_written, es);
+		ExplainPropertyInteger("Temp Read Blocks", NULL,
+							   usage->temp_blks_read, es);
+		ExplainPropertyInteger("Temp Written Blocks", NULL,
+							   usage->temp_blks_written, es);
+		if (track_io_timing)
+		{
+			ExplainPropertyFloat("I/O Read Time", "ms",
+								 INSTR_TIME_GET_MILLISEC(usage->blk_read_time),
+								 3, es);
+			ExplainPropertyFloat("I/O Write Time", "ms",
+								 INSTR_TIME_GET_MILLISEC(usage->blk_write_time),
+								 3, es);
+			ExplainPropertyFloat("Temp I/O Read Time", "ms",
+								 INSTR_TIME_GET_MILLISEC(usage->temp_blk_read_time),
+								 3, es);
+			ExplainPropertyFloat("Temp I/O Write Time", "ms",
+								 INSTR_TIME_GET_MILLISEC(usage->temp_blk_write_time),
+								 3, es);
+		}
+	}
+}
+
+/*
+ * Show WAL usage details.
+ */
+static void
+show_wal_usage(ExplainState *es, const WalUsage *usage)
+{
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		/* Show only positive counter values. */
+		if ((usage->wal_records > 0) || (usage->wal_fpi > 0) ||
+			(usage->wal_bytes > 0))
+		{
+			ExplainIndentText(es);
+			appendStringInfoString(es->str, "WAL:");
+
+			if (usage->wal_records > 0)
+				appendStringInfo(es->str, " records=%lld",
+								 (long long) usage->wal_records);
+			if (usage->wal_fpi > 0)
+				appendStringInfo(es->str, " fpi=%lld",
+								 (long long) usage->wal_fpi);
+			if (usage->wal_bytes > 0)
+				appendStringInfo(es->str, " bytes=" UINT64_FORMAT,
+								 usage->wal_bytes);
+			appendStringInfoChar(es->str, '\n');
+		}
+	}
+	else
+	{
+		ExplainPropertyInteger("WAL Records", NULL,
+							   usage->wal_records, es);
+		ExplainPropertyInteger("WAL FPI", NULL,
+							   usage->wal_fpi, es);
+		ExplainPropertyUInteger("WAL Bytes", NULL,
+								usage->wal_bytes, es);
+	}
+}
+
+/*
+ * Add some additional details about an IndexScan or IndexOnlyScan
+ */
+static void
+ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir,
+						ExplainState *es)
+{
+	const char *indexname = explain_get_index_name(indexid);
+
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		if (ScanDirectionIsBackward(indexorderdir))
+			appendStringInfoString(es->str, " Backward");
+		appendStringInfo(es->str, " using %s", quote_identifier(indexname));
+	}
+	else
+	{
+		const char *scandir;
+
+		switch (indexorderdir)
+		{
+			case BackwardScanDirection:
+				scandir = "Backward";
+				break;
+			case NoMovementScanDirection:
+				scandir = "NoMovement";
+				break;
+			case ForwardScanDirection:
+				scandir = "Forward";
+				break;
+			default:
+				scandir = "???";
+				break;
+		}
+		ExplainPropertyText("Scan Direction", scandir, es);
+		ExplainPropertyText("Index Name", indexname, es);
+	}
+}
+
+/*
+ * Show the target of a Scan node
+ */
+static void
+ExplainScanTarget(Scan *plan, ExplainState *es)
+{
+	ExplainTargetRel((Plan *) plan, plan->scanrelid, es);
+}
+
+/*
+ * Show the target of a ModifyTable node
+ *
+ * Here we show the nominal target (ie, the relation that was named in the
+ * original query).  If the actual target(s) is/are different, we'll show them
+ * in show_modifytable_info().
+ */
+static void
+ExplainModifyTarget(ModifyTable *plan, ExplainState *es)
+{
+	ExplainTargetRel((Plan *) plan, plan->nominalRelation, es);
+}
+
+/*
+ * Show the target relation of a scan or modify node
+ */
+static void
+ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
+{
+	char	   *objectname = NULL;
+	char	   *namespace = NULL;
+	const char *objecttag = NULL;
+	RangeTblEntry *rte;
+	char	   *refname;
+
+	rte = rt_fetch(rti, es->rtable);
+	refname = (char *) list_nth(es->rtable_names, rti - 1);
+	if (refname == NULL)
+		refname = rte->eref->aliasname;
+
+	switch (nodeTag(plan))
+	{
+		case T_SeqScan:
+		case T_SampleScan:
+		case T_IndexScan:
+		case T_IndexOnlyScan:
+		case T_BitmapHeapScan:
+		case T_TidScan:
+		case T_TidRangeScan:
+		case T_ForeignScan:
+		case T_CustomScan:
+		case T_ModifyTable:
+			/* Assert it's on a real relation */
+			Assert(rte->rtekind == RTE_RELATION);
+			objectname = get_rel_name(rte->relid);
+			if (es->verbose)
+				namespace = get_namespace_name_or_temp(get_rel_namespace(rte->relid));
+			objecttag = "Relation Name";
+			break;
+		case T_FunctionScan:
+			{
+				FunctionScan *fscan = (FunctionScan *) plan;
+
+				/* Assert it's on a RangeFunction */
+				Assert(rte->rtekind == RTE_FUNCTION);
+
+				/*
+				 * If the expression is still a function call of a single
+				 * function, we can get the real name of the function.
+				 * Otherwise, punt.  (Even if it was a single function call
+				 * originally, the optimizer could have simplified it away.)
+				 */
+				if (list_length(fscan->functions) == 1)
+				{
+					RangeTblFunction *rtfunc = (RangeTblFunction *) linitial(fscan->functions);
+
+					if (IsA(rtfunc->funcexpr, FuncExpr))
+					{
+						FuncExpr   *funcexpr = (FuncExpr *) rtfunc->funcexpr;
+						Oid			funcid = funcexpr->funcid;
+
+						objectname = get_func_name(funcid);
+						if (es->verbose)
+							namespace = get_namespace_name_or_temp(get_func_namespace(funcid));
+					}
+				}
+				objecttag = "Function Name";
+			}
+			break;
+		case T_TableFuncScan:
+			Assert(rte->rtekind == RTE_TABLEFUNC);
+			objectname = "xmltable";
+			objecttag = "Table Function Name";
+			break;
+		case T_ValuesScan:
+			Assert(rte->rtekind == RTE_VALUES);
+			break;
+		case T_CteScan:
+			/* Assert it's on a non-self-reference CTE */
+			Assert(rte->rtekind == RTE_CTE);
+			Assert(!rte->self_reference);
+			objectname = rte->ctename;
+			objecttag = "CTE Name";
+			break;
+		case T_NamedTuplestoreScan:
+			Assert(rte->rtekind == RTE_NAMEDTUPLESTORE);
+			objectname = rte->enrname;
+			objecttag = "Tuplestore Name";
+			break;
+		case T_WorkTableScan:
+			/* Assert it's on a self-reference CTE */
+			Assert(rte->rtekind == RTE_CTE);
+			Assert(rte->self_reference);
+			objectname = rte->ctename;
+			objecttag = "CTE Name";
+			break;
+		default:
+			break;
+	}
+
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		appendStringInfoString(es->str, " on");
+		if (namespace != NULL)
+			appendStringInfo(es->str, " %s.%s", quote_identifier(namespace),
+							 quote_identifier(objectname));
+		else if (objectname != NULL)
+			appendStringInfo(es->str, " %s", quote_identifier(objectname));
+		if (objectname == NULL || strcmp(refname, objectname) != 0)
+			appendStringInfo(es->str, " %s", quote_identifier(refname));
+	}
+	else
+	{
+		if (objecttag != NULL && objectname != NULL)
+			ExplainPropertyText(objecttag, objectname, es);
+		if (namespace != NULL)
+			ExplainPropertyText("Schema", namespace, es);
+		ExplainPropertyText("Alias", refname, es);
+	}
+}
+
+/*
+ * Show extra information for a ModifyTable node
+ *
+ * We have three objectives here.  First, if there's more than one target
+ * table or it's different from the nominal target, identify the actual
+ * target(s).  Second, give FDWs a chance to display extra info about foreign
+ * targets.  Third, show information about ON CONFLICT.
+ */
+static void
+show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
+					  ExplainState *es)
+{
+	ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
+	const char *operation;
+	const char *foperation;
+	bool		labeltargets;
+	int			j;
+	List	   *idxNames = NIL;
+	ListCell   *lst;
+
+	switch (node->operation)
+	{
+		case CMD_INSERT:
+			operation = "Insert";
+			foperation = "Foreign Insert";
+			break;
+		case CMD_UPDATE:
+			operation = "Update";
+			foperation = "Foreign Update";
+			break;
+		case CMD_DELETE:
+			operation = "Delete";
+			foperation = "Foreign Delete";
+			break;
+		case CMD_MERGE:
+			operation = "Merge";
+			/* XXX unsupported for now, but avoid compiler noise */
+			foperation = "Foreign Merge";
+			break;
+		default:
+			operation = "???";
+			foperation = "Foreign ???";
+			break;
+	}
+
+	/* Should we explicitly label target relations? */
+	labeltargets = (mtstate->mt_nrels > 1 ||
+					(mtstate->mt_nrels == 1 &&
+					 mtstate->resultRelInfo[0].ri_RangeTableIndex != node->nominalRelation));
+
+	if (labeltargets)
+		ExplainOpenGroup("Target Tables", "Target Tables", false, es);
+
+	for (j = 0; j < mtstate->mt_nrels; j++)
+	{
+		ResultRelInfo *resultRelInfo = mtstate->resultRelInfo + j;
+		FdwRoutine *fdwroutine = resultRelInfo->ri_FdwRoutine;
+
+		if (labeltargets)
+		{
+			/* Open a group for this target */
+			ExplainOpenGroup("Target Table", NULL, true, es);
+
+			/*
+			 * In text mode, decorate each target with operation type, so that
+			 * ExplainTargetRel's output of " on foo" will read nicely.
+			 */
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+			{
+				ExplainIndentText(es);
+				appendStringInfoString(es->str,
+									   fdwroutine ? foperation : operation);
+			}
+
+			/* Identify target */
+			ExplainTargetRel((Plan *) node,
+							 resultRelInfo->ri_RangeTableIndex,
+							 es);
+
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+			{
+				appendStringInfoChar(es->str, '\n');
+				es->indent++;
+			}
+		}
+
+		/* Give FDW a chance if needed */
+		if (!resultRelInfo->ri_usesFdwDirectModify &&
+			fdwroutine != NULL &&
+			fdwroutine->ExplainForeignModify != NULL)
+		{
+			List	   *fdw_private = (List *) list_nth(node->fdwPrivLists, j);
+
+			fdwroutine->ExplainForeignModify(mtstate,
+											 resultRelInfo,
+											 fdw_private,
+											 j,
+											 es);
+		}
+
+		if (labeltargets)
+		{
+			/* Undo the indentation we added in text format */
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+				es->indent--;
+
+			/* Close the group */
+			ExplainCloseGroup("Target Table", NULL, true, es);
+		}
+	}
+
+	/* Gather names of ON CONFLICT arbiter indexes */
+	foreach(lst, node->arbiterIndexes)
+	{
+		char	   *indexname = get_rel_name(lfirst_oid(lst));
+
+		idxNames = lappend(idxNames, indexname);
+	}
+
+	if (node->onConflictAction != ONCONFLICT_NONE)
+	{
+		ExplainPropertyText("Conflict Resolution",
+							node->onConflictAction == ONCONFLICT_NOTHING ?
+							"NOTHING" : "UPDATE",
+							es);
+
+		/*
+		 * Don't display arbiter indexes at all when DO NOTHING variant
+		 * implicitly ignores all conflicts
+		 */
+		if (idxNames)
+			ExplainPropertyList("Conflict Arbiter Indexes", idxNames, es);
+
+		/* ON CONFLICT DO UPDATE WHERE qual is specially displayed */
+		if (node->onConflictWhere)
+		{
+			show_upper_qual((List *) node->onConflictWhere, "Conflict Filter",
+							&mtstate->ps, ancestors, es);
+			show_instrumentation_count("Rows Removed by Conflict Filter", 1, &mtstate->ps, es);
+		}
+
+		/* EXPLAIN ANALYZE display of actual outcome for each tuple proposed */
+		if (es->analyze && mtstate->ps.instrument)
+		{
+			double		total;
+			double		insert_path;
+			double		other_path;
+
+			InstrEndLoop(outerPlanState(mtstate)->instrument);
+
+			/* count the number of source rows */
+			total = outerPlanState(mtstate)->instrument->ntuples;
+			other_path = mtstate->ps.instrument->ntuples2;
+			insert_path = total - other_path;
+
+			ExplainPropertyFloat("Tuples Inserted", NULL,
+								 insert_path, 0, es);
+			ExplainPropertyFloat("Conflicting Tuples", NULL,
+								 other_path, 0, es);
+		}
+	}
+	else if (node->operation == CMD_MERGE)
+	{
+		/* EXPLAIN ANALYZE display of tuples processed */
+		if (es->analyze && mtstate->ps.instrument)
+		{
+			double		total;
+			double		insert_path;
+			double		update_path;
+			double		delete_path;
+			double		skipped_path;
+
+			InstrEndLoop(outerPlanState(mtstate)->instrument);
+
+			/* count the number of source rows */
+			total = outerPlanState(mtstate)->instrument->ntuples;
+			insert_path = mtstate->mt_merge_inserted;
+			update_path = mtstate->mt_merge_updated;
+			delete_path = mtstate->mt_merge_deleted;
+			skipped_path = total - insert_path - update_path - delete_path;
+			Assert(skipped_path >= 0);
+
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+			{
+				if (total > 0)
+				{
+					ExplainIndentText(es);
+					appendStringInfoString(es->str, "Tuples:");
+					if (insert_path > 0)
+						appendStringInfo(es->str, " inserted=%.0f", insert_path);
+					if (update_path > 0)
+						appendStringInfo(es->str, " updated=%.0f", update_path);
+					if (delete_path > 0)
+						appendStringInfo(es->str, " deleted=%.0f", delete_path);
+					if (skipped_path > 0)
+						appendStringInfo(es->str, " skipped=%.0f", skipped_path);
+					appendStringInfoChar(es->str, '\n');
+				}
+			}
+			else
+			{
+				ExplainPropertyFloat("Tuples Inserted", NULL, insert_path, 0, es);
+				ExplainPropertyFloat("Tuples Updated", NULL, update_path, 0, es);
+				ExplainPropertyFloat("Tuples Deleted", NULL, delete_path, 0, es);
+				ExplainPropertyFloat("Tuples Skipped", NULL, skipped_path, 0, es);
+			}
+		}
+	}
+
+	if (labeltargets)
+		ExplainCloseGroup("Target Tables", "Target Tables", false, es);
+}
+
+/*
+ * Explain the constituent plans of an Append, MergeAppend,
+ * BitmapAnd, or BitmapOr node.
+ *
+ * The ancestors list should already contain the immediate parent of these
+ * plans.
+ */
+static void
+ExplainMemberNodes(PlanState **planstates, int nplans,
+				   List *ancestors, ExplainState *es)
+{
+	int			j;
+
+	for (j = 0; j < nplans; j++)
+		ExplainNode(planstates[j], ancestors,
+					"Member", NULL, es);
+}
+
+/*
+ * Report about any pruned subnodes of an Append or MergeAppend node.
+ *
+ * nplans indicates the number of live subplans.
+ * nchildren indicates the original number of subnodes in the Plan;
+ * some of these may have been pruned by the run-time pruning code.
+ */
+static void
+ExplainMissingMembers(int nplans, int nchildren, ExplainState *es)
+{
+	if (nplans < nchildren || es->format != EXPLAIN_FORMAT_TEXT)
+		ExplainPropertyInteger("Subplans Removed", NULL,
+							   nchildren - nplans, es);
+}
+
+/*
+ * Explain a list of SubPlans (or initPlans, which also use SubPlan nodes).
+ *
+ * The ancestors list should already contain the immediate parent of these
+ * SubPlans.
+ */
+static void
+ExplainSubPlans(List *plans, List *ancestors,
+				const char *relationship, ExplainState *es)
+{
+	ListCell   *lst;
+
+	foreach(lst, plans)
+	{
+		SubPlanState *sps = (SubPlanState *) lfirst(lst);
+		SubPlan    *sp = sps->subplan;
+
+		/*
+		 * There can be multiple SubPlan nodes referencing the same physical
+		 * subplan (same plan_id, which is its index in PlannedStmt.subplans).
+		 * We should print a subplan only once, so track which ones we already
+		 * printed.  This state must be global across the plan tree, since the
+		 * duplicate nodes could be in different plan nodes, eg both a bitmap
+		 * indexscan's indexqual and its parent heapscan's recheck qual.  (We
+		 * do not worry too much about which plan node we show the subplan as
+		 * attached to in such cases.)
+		 */
+		if (bms_is_member(sp->plan_id, es->printed_subplans))
+			continue;
+		es->printed_subplans = bms_add_member(es->printed_subplans,
+											  sp->plan_id);
+
+		/*
+		 * Treat the SubPlan node as an ancestor of the plan node(s) within
+		 * it, so that ruleutils.c can find the referents of subplan
+		 * parameters.
+		 */
+		ancestors = lcons(sp, ancestors);
+
+		ExplainNode(sps->planstate, ancestors,
+					relationship, sp->plan_name, es);
+
+		ancestors = list_delete_first(ancestors);
+	}
+}
+
+/*
+ * Explain a list of children of a CustomScan.
+ */
+static void
+ExplainCustomChildren(CustomScanState *css, List *ancestors, ExplainState *es)
+{
+	ListCell   *cell;
+	const char *label =
+	(list_length(css->custom_ps) != 1 ? "children" : "child");
+
+	foreach(cell, css->custom_ps)
+		ExplainNode((PlanState *) lfirst(cell), ancestors, label, NULL, es);
+}
+
+/*
+ * Create a per-plan-node workspace for collecting per-worker data.
+ *
+ * Output related to each worker will be temporarily "set aside" into a
+ * separate buffer, which we'll merge into the main output stream once
+ * we've processed all data for the plan node.  This makes it feasible to
+ * generate a coherent sub-group of fields for each worker, even though the
+ * code that produces the fields is in several different places in this file.
+ * Formatting of such a set-aside field group is managed by
+ * ExplainOpenSetAsideGroup and ExplainSaveGroup/ExplainRestoreGroup.
+ */
+static ExplainWorkersState *
+ExplainCreateWorkersState(int num_workers)
+{
+	ExplainWorkersState *wstate;
+
+	wstate = (ExplainWorkersState *) palloc(sizeof(ExplainWorkersState));
+	wstate->num_workers = num_workers;
+	wstate->worker_inited = (bool *) palloc0(num_workers * sizeof(bool));
+	wstate->worker_str = (StringInfoData *)
+		palloc0(num_workers * sizeof(StringInfoData));
+	wstate->worker_state_save = (int *) palloc(num_workers * sizeof(int));
+	return wstate;
+}
+
+/*
+ * Begin or resume output into the set-aside group for worker N.
+ */
+static void
+ExplainOpenWorker(int n, ExplainState *es)
+{
+	ExplainWorkersState *wstate = es->workers_state;
+
+	Assert(wstate);
+	Assert(n >= 0 && n < wstate->num_workers);
+
+	/* Save prior output buffer pointer */
+	wstate->prev_str = es->str;
+
+	if (!wstate->worker_inited[n])
+	{
+		/* First time through, so create the buffer for this worker */
+		initStringInfo(&wstate->worker_str[n]);
+		es->str = &wstate->worker_str[n];
+
+		/*
+		 * Push suitable initial formatting state for this worker's field
+		 * group.  We allow one extra logical nesting level, since this group
+		 * will eventually be wrapped in an outer "Workers" group.
+		 */
+		ExplainOpenSetAsideGroup("Worker", NULL, true, 2, es);
+
+		/*
+		 * In non-TEXT formats we always emit a "Worker Number" field, even if
+		 * there's no other data for this worker.
+		 */
+		if (es->format != EXPLAIN_FORMAT_TEXT)
+			ExplainPropertyInteger("Worker Number", NULL, n, es);
+
+		wstate->worker_inited[n] = true;
+	}
+	else
+	{
+		/* Resuming output for a worker we've already emitted some data for */
+		es->str = &wstate->worker_str[n];
+
+		/* Restore formatting state saved by last ExplainCloseWorker() */
+		ExplainRestoreGroup(es, 2, &wstate->worker_state_save[n]);
+	}
+
+	/*
+	 * In TEXT format, prefix the first output line for this worker with
+	 * "Worker N:".  Then, any additional lines should be indented one more
+	 * stop than the "Worker N" line is.
+	 */
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		if (es->str->len == 0)
+		{
+			ExplainIndentText(es);
+			appendStringInfo(es->str, "Worker %d:  ", n);
+		}
+
+		es->indent++;
+	}
+}
+
+/*
+ * End output for worker N --- must pair with previous ExplainOpenWorker call
+ */
+static void
+ExplainCloseWorker(int n, ExplainState *es)
+{
+	ExplainWorkersState *wstate = es->workers_state;
+
+	Assert(wstate);
+	Assert(n >= 0 && n < wstate->num_workers);
+	Assert(wstate->worker_inited[n]);
+
+	/*
+	 * Save formatting state in case we do another ExplainOpenWorker(), then
+	 * pop the formatting stack.
+	 */
+	ExplainSaveGroup(es, 2, &wstate->worker_state_save[n]);
+
+	/*
+	 * In TEXT format, if we didn't actually produce any output line(s) then
+	 * truncate off the partial line emitted by ExplainOpenWorker.  (This is
+	 * to avoid bogus output if, say, show_buffer_usage chooses not to print
+	 * anything for the worker.)  Also fix up the indent level.
+	 */
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		while (es->str->len > 0 && es->str->data[es->str->len - 1] != '\n')
+			es->str->data[--(es->str->len)] = '\0';
+
+		es->indent--;
+	}
+
+	/* Restore prior output buffer pointer */
+	es->str = wstate->prev_str;
+}
+
+/*
+ * Print per-worker info for current node, then free the ExplainWorkersState.
+ */
+static void
+ExplainFlushWorkersState(ExplainState *es)
+{
+	ExplainWorkersState *wstate = es->workers_state;
+
+	ExplainOpenGroup("Workers", "Workers", false, es);
+	for (int i = 0; i < wstate->num_workers; i++)
+	{
+		if (wstate->worker_inited[i])
+		{
+			/* This must match previous ExplainOpenSetAsideGroup call */
+			ExplainOpenGroup("Worker", NULL, true, es);
+			appendStringInfoString(es->str, wstate->worker_str[i].data);
+			ExplainCloseGroup("Worker", NULL, true, es);
+
+			pfree(wstate->worker_str[i].data);
+		}
+	}
+	ExplainCloseGroup("Workers", "Workers", false, es);
+
+	pfree(wstate->worker_inited);
+	pfree(wstate->worker_str);
+	pfree(wstate->worker_state_save);
+	pfree(wstate);
+}
+
+/*
+ * Explain a property, such as sort keys or targets, that takes the form of
+ * a list of unlabeled items.  "data" is a list of C strings.
+ */
+void
+ExplainPropertyList(const char *qlabel, List *data, ExplainState *es)
+{
+	ListCell   *lc;
+	bool		first = true;
+
+	switch (es->format)
+	{
+		case EXPLAIN_FORMAT_TEXT:
+			ExplainIndentText(es);
+			appendStringInfo(es->str, "%s: ", qlabel);
+			foreach(lc, data)
+			{
+				if (!first)
+					appendStringInfoString(es->str, ", ");
+				appendStringInfoString(es->str, (const char *) lfirst(lc));
+				first = false;
+			}
+			appendStringInfoChar(es->str, '\n');
+			break;
+
+		case EXPLAIN_FORMAT_XML:
+			ExplainXMLTag(qlabel, X_OPENING, es);
+			foreach(lc, data)
+			{
+				char	   *str;
+
+				appendStringInfoSpaces(es->str, es->indent * 2 + 2);
+				appendStringInfoString(es->str, "<Item>");
+				str = escape_xml((const char *) lfirst(lc));
+				appendStringInfoString(es->str, str);
+				pfree(str);
+				appendStringInfoString(es->str, "</Item>\n");
+			}
+			ExplainXMLTag(qlabel, X_CLOSING, es);
+			break;
+
+		case EXPLAIN_FORMAT_JSON:
+			ExplainJSONLineEnding(es);
+			appendStringInfoSpaces(es->str, es->indent * 2);
+			escape_json(es->str, qlabel);
+			appendStringInfoString(es->str, ": [");
+			foreach(lc, data)
+			{
+				if (!first)
+					appendStringInfoString(es->str, ", ");
+				escape_json(es->str, (const char *) lfirst(lc));
+				first = false;
+			}
+			appendStringInfoChar(es->str, ']');
+			break;
+
+		case EXPLAIN_FORMAT_YAML:
+			ExplainYAMLLineStarting(es);
+			appendStringInfo(es->str, "%s: ", qlabel);
+			foreach(lc, data)
+			{
+				appendStringInfoChar(es->str, '\n');
+				appendStringInfoSpaces(es->str, es->indent * 2 + 2);
+				appendStringInfoString(es->str, "- ");
+				escape_yaml(es->str, (const char *) lfirst(lc));
+			}
+			break;
+	}
+}
+
+/*
+ * Explain a property that takes the form of a list of unlabeled items within
+ * another list.  "data" is a list of C strings.
+ */
+void
+ExplainPropertyListNested(const char *qlabel, List *data, ExplainState *es)
+{
+	ListCell   *lc;
+	bool		first = true;
+
+	switch (es->format)
+	{
+		case EXPLAIN_FORMAT_TEXT:
+		case EXPLAIN_FORMAT_XML:
+			ExplainPropertyList(qlabel, data, es);
+			return;
+
+		case EXPLAIN_FORMAT_JSON:
+			ExplainJSONLineEnding(es);
+			appendStringInfoSpaces(es->str, es->indent * 2);
+			appendStringInfoChar(es->str, '[');
+			foreach(lc, data)
+			{
+				if (!first)
+					appendStringInfoString(es->str, ", ");
+				escape_json(es->str, (const char *) lfirst(lc));
+				first = false;
+			}
+			appendStringInfoChar(es->str, ']');
+			break;
+
+		case EXPLAIN_FORMAT_YAML:
+			ExplainYAMLLineStarting(es);
+			appendStringInfoString(es->str, "- [");
+			foreach(lc, data)
+			{
+				if (!first)
+					appendStringInfoString(es->str, ", ");
+				escape_yaml(es->str, (const char *) lfirst(lc));
+				first = false;
+			}
+			appendStringInfoChar(es->str, ']');
+			break;
+	}
+}
+
+/*
+ * Explain a simple property.
+ *
+ * If "numeric" is true, the value is a number (or other value that
+ * doesn't need quoting in JSON).
+ *
+ * If unit is non-NULL the text format will display it after the value.
+ *
+ * This usually should not be invoked directly, but via one of the datatype
+ * specific routines ExplainPropertyText, ExplainPropertyInteger, etc.
+ */
+static void
+ExplainProperty(const char *qlabel, const char *unit, const char *value,
+				bool numeric, ExplainState *es)
+{
+	switch (es->format)
+	{
+		case EXPLAIN_FORMAT_TEXT:
+			ExplainIndentText(es);
+			if (unit)
+				appendStringInfo(es->str, "%s: %s %s\n", qlabel, value, unit);
+			else
+				appendStringInfo(es->str, "%s: %s\n", qlabel, value);
+			break;
+
+		case EXPLAIN_FORMAT_XML:
+			{
+				char	   *str;
+
+				appendStringInfoSpaces(es->str, es->indent * 2);
+				ExplainXMLTag(qlabel, X_OPENING | X_NOWHITESPACE, es);
+				str = escape_xml(value);
+				appendStringInfoString(es->str, str);
+				pfree(str);
+				ExplainXMLTag(qlabel, X_CLOSING | X_NOWHITESPACE, es);
+				appendStringInfoChar(es->str, '\n');
+			}
+			break;
+
+		case EXPLAIN_FORMAT_JSON:
+			ExplainJSONLineEnding(es);
+			appendStringInfoSpaces(es->str, es->indent * 2);
+			escape_json(es->str, qlabel);
+			appendStringInfoString(es->str, ": ");
+			if (numeric)
+				appendStringInfoString(es->str, value);
+			else
+				escape_json(es->str, value);
+			break;
+
+		case EXPLAIN_FORMAT_YAML:
+			ExplainYAMLLineStarting(es);
+			appendStringInfo(es->str, "%s: ", qlabel);
+			if (numeric)
+				appendStringInfoString(es->str, value);
+			else
+				escape_yaml(es->str, value);
+			break;
+	}
+}
+
+/*
+ * Explain a string-valued property.
+ */
+void
+ExplainPropertyText(const char *qlabel, const char *value, ExplainState *es)
+{
+	ExplainProperty(qlabel, NULL, value, false, es);
+}
+
+/*
+ * Explain an integer-valued property.
+ */
+void
+ExplainPropertyInteger(const char *qlabel, const char *unit, int64 value,
+					   ExplainState *es)
+{
+	char		buf[32];
+
+	snprintf(buf, sizeof(buf), INT64_FORMAT, value);
+	ExplainProperty(qlabel, unit, buf, true, es);
+}
+
+/*
+ * Explain an unsigned integer-valued property.
+ */
+void
+ExplainPropertyUInteger(const char *qlabel, const char *unit, uint64 value,
+						ExplainState *es)
+{
+	char		buf[32];
+
+	snprintf(buf, sizeof(buf), UINT64_FORMAT, value);
+	ExplainProperty(qlabel, unit, buf, true, es);
+}
+
+/*
+ * Explain a float-valued property, using the specified number of
+ * fractional digits.
+ */
+void
+ExplainPropertyFloat(const char *qlabel, const char *unit, double value,
+					 int ndigits, ExplainState *es)
+{
+	char	   *buf;
+
+	buf = psprintf("%.*f", ndigits, value);
+	ExplainProperty(qlabel, unit, buf, true, es);
+	pfree(buf);
+}
+
+/*
+ * Explain a bool-valued property.
+ */
+void
+ExplainPropertyBool(const char *qlabel, bool value, ExplainState *es)
+{
+	ExplainProperty(qlabel, NULL, value ? "true" : "false", true, es);
+}
+
+/*
+ * Open a group of related objects.
+ *
+ * objtype is the type of the group object, labelname is its label within
+ * a containing object (if any).
+ *
+ * If labeled is true, the group members will be labeled properties,
+ * while if it's false, they'll be unlabeled objects.
+ */
+void
+ExplainOpenGroup(const char *objtype, const char *labelname,
+				 bool labeled, ExplainState *es)
+{
+	switch (es->format)
+	{
+		case EXPLAIN_FORMAT_TEXT:
+			/* nothing to do */
+			break;
+
+		case EXPLAIN_FORMAT_XML:
+			ExplainXMLTag(objtype, X_OPENING, es);
+			es->indent++;
+			break;
+
+		case EXPLAIN_FORMAT_JSON:
+			ExplainJSONLineEnding(es);
+			appendStringInfoSpaces(es->str, 2 * es->indent);
+			if (labelname)
+			{
+				escape_json(es->str, labelname);
+				appendStringInfoString(es->str, ": ");
+			}
+			appendStringInfoChar(es->str, labeled ? '{' : '[');
+
+			/*
+			 * In JSON format, the grouping_stack is an integer list.  0 means
+			 * we've emitted nothing at this grouping level, 1 means we've
+			 * emitted something (and so the next item needs a comma). See
+			 * ExplainJSONLineEnding().
+			 */
+			es->grouping_stack = lcons_int(0, es->grouping_stack);
+			es->indent++;
+			break;
+
+		case EXPLAIN_FORMAT_YAML:
+
+			/*
+			 * In YAML format, the grouping stack is an integer list.  0 means
+			 * we've emitted nothing at this grouping level AND this grouping
+			 * level is unlabeled and must be marked with "- ".  See
+			 * ExplainYAMLLineStarting().
+			 */
+			ExplainYAMLLineStarting(es);
+			if (labelname)
+			{
+				appendStringInfo(es->str, "%s: ", labelname);
+				es->grouping_stack = lcons_int(1, es->grouping_stack);
+			}
+			else
+			{
+				appendStringInfoString(es->str, "- ");
+				es->grouping_stack = lcons_int(0, es->grouping_stack);
+			}
+			es->indent++;
+			break;
+	}
+}
+
+/*
+ * Close a group of related objects.
+ * Parameters must match the corresponding ExplainOpenGroup call.
+ */
+void
+ExplainCloseGroup(const char *objtype, const char *labelname,
+				  bool labeled, ExplainState *es)
+{
+	switch (es->format)
+	{
+		case EXPLAIN_FORMAT_TEXT:
+			/* nothing to do */
+			break;
+
+		case EXPLAIN_FORMAT_XML:
+			es->indent--;
+			ExplainXMLTag(objtype, X_CLOSING, es);
+			break;
+
+		case EXPLAIN_FORMAT_JSON:
+			es->indent--;
+			appendStringInfoChar(es->str, '\n');
+			appendStringInfoSpaces(es->str, 2 * es->indent);
+			appendStringInfoChar(es->str, labeled ? '}' : ']');
+			es->grouping_stack = list_delete_first(es->grouping_stack);
+			break;
+
+		case EXPLAIN_FORMAT_YAML:
+			es->indent--;
+			es->grouping_stack = list_delete_first(es->grouping_stack);
+			break;
+	}
+}
+
+/*
+ * Open a group of related objects, without emitting actual data.
+ *
+ * Prepare the formatting state as though we were beginning a group with
+ * the identified properties, but don't actually emit anything.  Output
+ * subsequent to this call can be redirected into a separate output buffer,
+ * and then eventually appended to the main output buffer after doing a
+ * regular ExplainOpenGroup call (with the same parameters).
+ *
+ * The extra "depth" parameter is the new group's depth compared to current.
+ * It could be more than one, in case the eventual output will be enclosed
+ * in additional nesting group levels.  We assume we don't need to track
+ * formatting state for those levels while preparing this group's output.
+ *
+ * There is no ExplainCloseSetAsideGroup --- in current usage, we always
+ * pop this state with ExplainSaveGroup.
+ */
+static void
+ExplainOpenSetAsideGroup(const char *objtype, const char *labelname,
+						 bool labeled, int depth, ExplainState *es)
+{
+	switch (es->format)
+	{
+		case EXPLAIN_FORMAT_TEXT:
+			/* nothing to do */
+			break;
+
+		case EXPLAIN_FORMAT_XML:
+			es->indent += depth;
+			break;
+
+		case EXPLAIN_FORMAT_JSON:
+			es->grouping_stack = lcons_int(0, es->grouping_stack);
+			es->indent += depth;
+			break;
+
+		case EXPLAIN_FORMAT_YAML:
+			if (labelname)
+				es->grouping_stack = lcons_int(1, es->grouping_stack);
+			else
+				es->grouping_stack = lcons_int(0, es->grouping_stack);
+			es->indent += depth;
+			break;
+	}
+}
+
+/*
+ * Pop one level of grouping state, allowing for a re-push later.
+ *
+ * This is typically used after ExplainOpenSetAsideGroup; pass the
+ * same "depth" used for that.
+ *
+ * This should not emit any output.  If state needs to be saved,
+ * save it at *state_save.  Currently, an integer save area is sufficient
+ * for all formats, but we might need to revisit that someday.
+ */
+static void
+ExplainSaveGroup(ExplainState *es, int depth, int *state_save)
+{
+	switch (es->format)
+	{
+		case EXPLAIN_FORMAT_TEXT:
+			/* nothing to do */
+			break;
+
+		case EXPLAIN_FORMAT_XML:
+			es->indent -= depth;
+			break;
+
+		case EXPLAIN_FORMAT_JSON:
+			es->indent -= depth;
+			*state_save = linitial_int(es->grouping_stack);
+			es->grouping_stack = list_delete_first(es->grouping_stack);
+			break;
+
+		case EXPLAIN_FORMAT_YAML:
+			es->indent -= depth;
+			*state_save = linitial_int(es->grouping_stack);
+			es->grouping_stack = list_delete_first(es->grouping_stack);
+			break;
+	}
+}
+
+/*
+ * Re-push one level of grouping state, undoing the effects of ExplainSaveGroup.
+ */
+static void
+ExplainRestoreGroup(ExplainState *es, int depth, int *state_save)
+{
+	switch (es->format)
+	{
+		case EXPLAIN_FORMAT_TEXT:
+			/* nothing to do */
+			break;
+
+		case EXPLAIN_FORMAT_XML:
+			es->indent += depth;
+			break;
+
+		case EXPLAIN_FORMAT_JSON:
+			es->grouping_stack = lcons_int(*state_save, es->grouping_stack);
+			es->indent += depth;
+			break;
+
+		case EXPLAIN_FORMAT_YAML:
+			es->grouping_stack = lcons_int(*state_save, es->grouping_stack);
+			es->indent += depth;
+			break;
+	}
+}
+
+/*
+ * Emit a "dummy" group that never has any members.
+ *
+ * objtype is the type of the group object, labelname is its label within
+ * a containing object (if any).
+ */
+static void
+ExplainDummyGroup(const char *objtype, const char *labelname, ExplainState *es)
+{
+	switch (es->format)
+	{
+		case EXPLAIN_FORMAT_TEXT:
+			/* nothing to do */
+			break;
+
+		case EXPLAIN_FORMAT_XML:
+			ExplainXMLTag(objtype, X_CLOSE_IMMEDIATE, es);
+			break;
+
+		case EXPLAIN_FORMAT_JSON:
+			ExplainJSONLineEnding(es);
+			appendStringInfoSpaces(es->str, 2 * es->indent);
+			if (labelname)
+			{
+				escape_json(es->str, labelname);
+				appendStringInfoString(es->str, ": ");
+			}
+			escape_json(es->str, objtype);
+			break;
+
+		case EXPLAIN_FORMAT_YAML:
+			ExplainYAMLLineStarting(es);
+			if (labelname)
+			{
+				escape_yaml(es->str, labelname);
+				appendStringInfoString(es->str, ": ");
+			}
+			else
+			{
+				appendStringInfoString(es->str, "- ");
+			}
+			escape_yaml(es->str, objtype);
+			break;
+	}
+}
+
+/*
+ * Emit the start-of-output boilerplate.
+ *
+ * This is just enough different from processing a subgroup that we need
+ * a separate pair of subroutines.
+ */
+void
+ExplainBeginOutput(ExplainState *es)
+{
+	switch (es->format)
+	{
+		case EXPLAIN_FORMAT_TEXT:
+			/* nothing to do */
+			break;
+
+		case EXPLAIN_FORMAT_XML:
+			appendStringInfoString(es->str,
+								   "<explain xmlns=\"http://www.postgresql.org/2009/explain\">\n");
+			es->indent++;
+			break;
+
+		case EXPLAIN_FORMAT_JSON:
+			/* top-level structure is an array of plans */
+			appendStringInfoChar(es->str, '[');
+			es->grouping_stack = lcons_int(0, es->grouping_stack);
+			es->indent++;
+			break;
+
+		case EXPLAIN_FORMAT_YAML:
+			es->grouping_stack = lcons_int(0, es->grouping_stack);
+			break;
+	}
+}
+
+/*
+ * Emit the end-of-output boilerplate.
+ */
+void
+ExplainEndOutput(ExplainState *es)
+{
+	switch (es->format)
+	{
+		case EXPLAIN_FORMAT_TEXT:
+			/* nothing to do */
+			break;
+
+		case EXPLAIN_FORMAT_XML:
+			es->indent--;
+			appendStringInfoString(es->str, "</explain>");
+			break;
+
+		case EXPLAIN_FORMAT_JSON:
+			es->indent--;
+			appendStringInfoString(es->str, "\n]");
+			es->grouping_stack = list_delete_first(es->grouping_stack);
+			break;
+
+		case EXPLAIN_FORMAT_YAML:
+			es->grouping_stack = list_delete_first(es->grouping_stack);
+			break;
+	}
+}
+
+/*
+ * Put an appropriate separator between multiple plans
+ */
+void
+ExplainSeparatePlans(ExplainState *es)
+{
+	switch (es->format)
+	{
+		case EXPLAIN_FORMAT_TEXT:
+			/* add a blank line */
+			appendStringInfoChar(es->str, '\n');
+			break;
+
+		case EXPLAIN_FORMAT_XML:
+		case EXPLAIN_FORMAT_JSON:
+		case EXPLAIN_FORMAT_YAML:
+			/* nothing to do */
+			break;
+	}
+}
+
+/*
+ * Emit opening or closing XML tag.
+ *
+ * "flags" must contain X_OPENING, X_CLOSING, or X_CLOSE_IMMEDIATE.
+ * Optionally, OR in X_NOWHITESPACE to suppress the whitespace we'd normally
+ * add.
+ *
+ * XML restricts tag names more than our other output formats, eg they can't
+ * contain white space or slashes.  Replace invalid characters with dashes,
+ * so that for example "I/O Read Time" becomes "I-O-Read-Time".
+ */
+static void
+ExplainXMLTag(const char *tagname, int flags, ExplainState *es)
+{
+	const char *s;
+	const char *valid = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.";
+
+	if ((flags & X_NOWHITESPACE) == 0)
+		appendStringInfoSpaces(es->str, 2 * es->indent);
+	appendStringInfoCharMacro(es->str, '<');
+	if ((flags & X_CLOSING) != 0)
+		appendStringInfoCharMacro(es->str, '/');
+	for (s = tagname; *s; s++)
+		appendStringInfoChar(es->str, strchr(valid, *s) ? *s : '-');
+	if ((flags & X_CLOSE_IMMEDIATE) != 0)
+		appendStringInfoString(es->str, " /");
+	appendStringInfoCharMacro(es->str, '>');
+	if ((flags & X_NOWHITESPACE) == 0)
+		appendStringInfoCharMacro(es->str, '\n');
+}
+
+/*
+ * Indent a text-format line.
+ *
+ * We indent by two spaces per indentation level.  However, when emitting
+ * data for a parallel worker there might already be data on the current line
+ * (cf. ExplainOpenWorker); in that case, don't indent any more.
+ */
+static void
+ExplainIndentText(ExplainState *es)
+{
+	Assert(es->format == EXPLAIN_FORMAT_TEXT);
+	if (es->str->len == 0 || es->str->data[es->str->len - 1] == '\n')
+		appendStringInfoSpaces(es->str, es->indent * 2);
+}
+
+/*
+ * Emit a JSON line ending.
+ *
+ * JSON requires a comma after each property but the last.  To facilitate this,
+ * in JSON format, the text emitted for each property begins just prior to the
+ * preceding line-break (and comma, if applicable).
+ */
+static void
+ExplainJSONLineEnding(ExplainState *es)
+{
+	Assert(es->format == EXPLAIN_FORMAT_JSON);
+	if (linitial_int(es->grouping_stack) != 0)
+		appendStringInfoChar(es->str, ',');
+	else
+		linitial_int(es->grouping_stack) = 1;
+	appendStringInfoChar(es->str, '\n');
+}
+
+/*
+ * Indent a YAML line.
+ *
+ * YAML lines are ordinarily indented by two spaces per indentation level.
+ * The text emitted for each property begins just prior to the preceding
+ * line-break, except for the first property in an unlabeled group, for which
+ * it begins immediately after the "- " that introduces the group.  The first
+ * property of the group appears on the same line as the opening "- ".
+ */
+static void
+ExplainYAMLLineStarting(ExplainState *es)
+{
+	Assert(es->format == EXPLAIN_FORMAT_YAML);
+	if (linitial_int(es->grouping_stack) == 0)
+	{
+		linitial_int(es->grouping_stack) = 1;
+	}
+	else
+	{
+		appendStringInfoChar(es->str, '\n');
+		appendStringInfoSpaces(es->str, es->indent * 2);
+	}
+}
+
+/*
+ * YAML is a superset of JSON; unfortunately, the YAML quoting rules are
+ * ridiculously complicated -- as documented in sections 5.3 and 7.3.3 of
+ * http://yaml.org/spec/1.2/spec.html -- so we chose to just quote everything.
+ * Empty strings, strings with leading or trailing whitespace, and strings
+ * containing a variety of special characters must certainly be quoted or the
+ * output is invalid; and other seemingly harmless strings like "0xa" or
+ * "true" must be quoted, lest they be interpreted as a hexadecimal or Boolean
+ * constant rather than a string.
+ */
+static void
+escape_yaml(StringInfo buf, const char *str)
+{
+	escape_json(buf, str);
+}
diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c
new file mode 100644
index 0000000..df6f021
--- /dev/null
+++ b/src/backend/commands/extension.c
@@ -0,0 +1,3417 @@
+/*-------------------------------------------------------------------------
+ *
+ * extension.c
+ *	  Commands to manipulate extensions
+ *
+ * Extensions in PostgreSQL allow management of collections of SQL objects.
+ *
+ * All we need internally to manage an extension is an OID so that the
+ * dependent objects can be associated with it.  An extension is created by
+ * populating the pg_extension catalog from a "control" file.
+ * The extension control file is parsed with the same parser we use for
+ * postgresql.conf.  An extension also has an installation script file,
+ * containing SQL commands to create the extension's objects.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/extension.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <dirent.h>
+#include <limits.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/relation.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_depend.h"
+#include "catalog/pg_extension.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_type.h"
+#include "commands/alter.h"
+#include "commands/comment.h"
+#include "commands/defrem.h"
+#include "commands/extension.h"
+#include "commands/schemacmds.h"
+#include "funcapi.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "storage/fd.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/varlena.h"
+
+
+/* Globally visible state variables */
+bool		creating_extension = false;
+Oid			CurrentExtensionObject = InvalidOid;
+
+/*
+ * Internal data structure to hold the results of parsing a control file
+ */
+typedef struct ExtensionControlFile
+{
+	char	   *name;			/* name of the extension */
+	char	   *directory;		/* directory for script files */
+	char	   *default_version;	/* default install target version, if any */
+	char	   *module_pathname;	/* string to substitute for
+									 * MODULE_PATHNAME */
+	char	   *comment;		/* comment, if any */
+	char	   *schema;			/* target schema (allowed if !relocatable) */
+	bool		relocatable;	/* is ALTER EXTENSION SET SCHEMA supported? */
+	bool		superuser;		/* must be superuser to install? */
+	bool		trusted;		/* allow becoming superuser on the fly? */
+	int			encoding;		/* encoding of the script file, or -1 */
+	List	   *requires;		/* names of prerequisite extensions */
+} ExtensionControlFile;
+
+/*
+ * Internal data structure for update path information
+ */
+typedef struct ExtensionVersionInfo
+{
+	char	   *name;			/* name of the starting version */
+	List	   *reachable;		/* List of ExtensionVersionInfo's */
+	bool		installable;	/* does this version have an install script? */
+	/* working state for Dijkstra's algorithm: */
+	bool		distance_known; /* is distance from start known yet? */
+	int			distance;		/* current worst-case distance estimate */
+	struct ExtensionVersionInfo *previous;	/* current best predecessor */
+} ExtensionVersionInfo;
+
+/* Local functions */
+static List *find_update_path(List *evi_list,
+							  ExtensionVersionInfo *evi_start,
+							  ExtensionVersionInfo *evi_target,
+							  bool reject_indirect,
+							  bool reinitialize);
+static Oid	get_required_extension(char *reqExtensionName,
+								   char *extensionName,
+								   char *origSchemaName,
+								   bool cascade,
+								   List *parents,
+								   bool is_create);
+static void get_available_versions_for_extension(ExtensionControlFile *pcontrol,
+												 Tuplestorestate *tupstore,
+												 TupleDesc tupdesc);
+static Datum convert_requires_to_datum(List *requires);
+static void ApplyExtensionUpdates(Oid extensionOid,
+								  ExtensionControlFile *pcontrol,
+								  const char *initialVersion,
+								  List *updateVersions,
+								  char *origSchemaName,
+								  bool cascade,
+								  bool is_create);
+static char *read_whole_file(const char *filename, int *length);
+
+
+/*
+ * get_extension_oid - given an extension name, look up the OID
+ *
+ * If missing_ok is false, throw an error if extension name not found.  If
+ * true, just return InvalidOid.
+ */
+Oid
+get_extension_oid(const char *extname, bool missing_ok)
+{
+	Oid			result;
+	Relation	rel;
+	SysScanDesc scandesc;
+	HeapTuple	tuple;
+	ScanKeyData entry[1];
+
+	rel = table_open(ExtensionRelationId, AccessShareLock);
+
+	ScanKeyInit(&entry[0],
+				Anum_pg_extension_extname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(extname));
+
+	scandesc = systable_beginscan(rel, ExtensionNameIndexId, true,
+								  NULL, 1, entry);
+
+	tuple = systable_getnext(scandesc);
+
+	/* We assume that there can be at most one matching tuple */
+	if (HeapTupleIsValid(tuple))
+		result = ((Form_pg_extension) GETSTRUCT(tuple))->oid;
+	else
+		result = InvalidOid;
+
+	systable_endscan(scandesc);
+
+	table_close(rel, AccessShareLock);
+
+	if (!OidIsValid(result) && !missing_ok)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("extension \"%s\" does not exist",
+						extname)));
+
+	return result;
+}
+
+/*
+ * get_extension_name - given an extension OID, look up the name
+ *
+ * Returns a palloc'd string, or NULL if no such extension.
+ */
+char *
+get_extension_name(Oid ext_oid)
+{
+	char	   *result;
+	Relation	rel;
+	SysScanDesc scandesc;
+	HeapTuple	tuple;
+	ScanKeyData entry[1];
+
+	rel = table_open(ExtensionRelationId, AccessShareLock);
+
+	ScanKeyInit(&entry[0],
+				Anum_pg_extension_oid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(ext_oid));
+
+	scandesc = systable_beginscan(rel, ExtensionOidIndexId, true,
+								  NULL, 1, entry);
+
+	tuple = systable_getnext(scandesc);
+
+	/* We assume that there can be at most one matching tuple */
+	if (HeapTupleIsValid(tuple))
+		result = pstrdup(NameStr(((Form_pg_extension) GETSTRUCT(tuple))->extname));
+	else
+		result = NULL;
+
+	systable_endscan(scandesc);
+
+	table_close(rel, AccessShareLock);
+
+	return result;
+}
+
+/*
+ * get_extension_schema - given an extension OID, fetch its extnamespace
+ *
+ * Returns InvalidOid if no such extension.
+ */
+static Oid
+get_extension_schema(Oid ext_oid)
+{
+	Oid			result;
+	Relation	rel;
+	SysScanDesc scandesc;
+	HeapTuple	tuple;
+	ScanKeyData entry[1];
+
+	rel = table_open(ExtensionRelationId, AccessShareLock);
+
+	ScanKeyInit(&entry[0],
+				Anum_pg_extension_oid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(ext_oid));
+
+	scandesc = systable_beginscan(rel, ExtensionOidIndexId, true,
+								  NULL, 1, entry);
+
+	tuple = systable_getnext(scandesc);
+
+	/* We assume that there can be at most one matching tuple */
+	if (HeapTupleIsValid(tuple))
+		result = ((Form_pg_extension) GETSTRUCT(tuple))->extnamespace;
+	else
+		result = InvalidOid;
+
+	systable_endscan(scandesc);
+
+	table_close(rel, AccessShareLock);
+
+	return result;
+}
+
+/*
+ * Utility functions to check validity of extension and version names
+ */
+static void
+check_valid_extension_name(const char *extensionname)
+{
+	int			namelen = strlen(extensionname);
+
+	/*
+	 * Disallow empty names (the parser rejects empty identifiers anyway, but
+	 * let's check).
+	 */
+	if (namelen == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid extension name: \"%s\"", extensionname),
+				 errdetail("Extension names must not be empty.")));
+
+	/*
+	 * No double dashes, since that would make script filenames ambiguous.
+	 */
+	if (strstr(extensionname, "--"))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid extension name: \"%s\"", extensionname),
+				 errdetail("Extension names must not contain \"--\".")));
+
+	/*
+	 * No leading or trailing dash either.  (We could probably allow this, but
+	 * it would require much care in filename parsing and would make filenames
+	 * visually if not formally ambiguous.  Since there's no real-world use
+	 * case, let's just forbid it.)
+	 */
+	if (extensionname[0] == '-' || extensionname[namelen - 1] == '-')
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid extension name: \"%s\"", extensionname),
+				 errdetail("Extension names must not begin or end with \"-\".")));
+
+	/*
+	 * No directory separators either (this is sufficient to prevent ".."
+	 * style attacks).
+	 */
+	if (first_dir_separator(extensionname) != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid extension name: \"%s\"", extensionname),
+				 errdetail("Extension names must not contain directory separator characters.")));
+}
+
+static void
+check_valid_version_name(const char *versionname)
+{
+	int			namelen = strlen(versionname);
+
+	/*
+	 * Disallow empty names (we could possibly allow this, but there seems
+	 * little point).
+	 */
+	if (namelen == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid extension version name: \"%s\"", versionname),
+				 errdetail("Version names must not be empty.")));
+
+	/*
+	 * No double dashes, since that would make script filenames ambiguous.
+	 */
+	if (strstr(versionname, "--"))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid extension version name: \"%s\"", versionname),
+				 errdetail("Version names must not contain \"--\".")));
+
+	/*
+	 * No leading or trailing dash either.
+	 */
+	if (versionname[0] == '-' || versionname[namelen - 1] == '-')
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid extension version name: \"%s\"", versionname),
+				 errdetail("Version names must not begin or end with \"-\".")));
+
+	/*
+	 * No directory separators either (this is sufficient to prevent ".."
+	 * style attacks).
+	 */
+	if (first_dir_separator(versionname) != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid extension version name: \"%s\"", versionname),
+				 errdetail("Version names must not contain directory separator characters.")));
+}
+
+/*
+ * Utility functions to handle extension-related path names
+ */
+static bool
+is_extension_control_filename(const char *filename)
+{
+	const char *extension = strrchr(filename, '.');
+
+	return (extension != NULL) && (strcmp(extension, ".control") == 0);
+}
+
+static bool
+is_extension_script_filename(const char *filename)
+{
+	const char *extension = strrchr(filename, '.');
+
+	return (extension != NULL) && (strcmp(extension, ".sql") == 0);
+}
+
+static char *
+get_extension_control_directory(void)
+{
+	char		sharepath[MAXPGPATH];
+	char	   *result;
+
+	get_share_path(my_exec_path, sharepath);
+	result = (char *) palloc(MAXPGPATH);
+	snprintf(result, MAXPGPATH, "%s/extension", sharepath);
+
+	return result;
+}
+
+static char *
+get_extension_control_filename(const char *extname)
+{
+	char		sharepath[MAXPGPATH];
+	char	   *result;
+
+	get_share_path(my_exec_path, sharepath);
+	result = (char *) palloc(MAXPGPATH);
+	snprintf(result, MAXPGPATH, "%s/extension/%s.control",
+			 sharepath, extname);
+
+	return result;
+}
+
+static char *
+get_extension_script_directory(ExtensionControlFile *control)
+{
+	char		sharepath[MAXPGPATH];
+	char	   *result;
+
+	/*
+	 * The directory parameter can be omitted, absolute, or relative to the
+	 * installation's share directory.
+	 */
+	if (!control->directory)
+		return get_extension_control_directory();
+
+	if (is_absolute_path(control->directory))
+		return pstrdup(control->directory);
+
+	get_share_path(my_exec_path, sharepath);
+	result = (char *) palloc(MAXPGPATH);
+	snprintf(result, MAXPGPATH, "%s/%s", sharepath, control->directory);
+
+	return result;
+}
+
+static char *
+get_extension_aux_control_filename(ExtensionControlFile *control,
+								   const char *version)
+{
+	char	   *result;
+	char	   *scriptdir;
+
+	scriptdir = get_extension_script_directory(control);
+
+	result = (char *) palloc(MAXPGPATH);
+	snprintf(result, MAXPGPATH, "%s/%s--%s.control",
+			 scriptdir, control->name, version);
+
+	pfree(scriptdir);
+
+	return result;
+}
+
+static char *
+get_extension_script_filename(ExtensionControlFile *control,
+							  const char *from_version, const char *version)
+{
+	char	   *result;
+	char	   *scriptdir;
+
+	scriptdir = get_extension_script_directory(control);
+
+	result = (char *) palloc(MAXPGPATH);
+	if (from_version)
+		snprintf(result, MAXPGPATH, "%s/%s--%s--%s.sql",
+				 scriptdir, control->name, from_version, version);
+	else
+		snprintf(result, MAXPGPATH, "%s/%s--%s.sql",
+				 scriptdir, control->name, version);
+
+	pfree(scriptdir);
+
+	return result;
+}
+
+
+/*
+ * Parse contents of primary or auxiliary control file, and fill in
+ * fields of *control.  We parse primary file if version == NULL,
+ * else the optional auxiliary file for that version.
+ *
+ * Control files are supposed to be very short, half a dozen lines,
+ * so we don't worry about memory allocation risks here.  Also we don't
+ * worry about what encoding it's in; all values are expected to be ASCII.
+ */
+static void
+parse_extension_control_file(ExtensionControlFile *control,
+							 const char *version)
+{
+	char	   *filename;
+	FILE	   *file;
+	ConfigVariable *item,
+			   *head = NULL,
+			   *tail = NULL;
+
+	/*
+	 * Locate the file to read.  Auxiliary files are optional.
+	 */
+	if (version)
+		filename = get_extension_aux_control_filename(control, version);
+	else
+		filename = get_extension_control_filename(control->name);
+
+	if ((file = AllocateFile(filename, "r")) == NULL)
+	{
+		if (errno == ENOENT)
+		{
+			/* no complaint for missing auxiliary file */
+			if (version)
+			{
+				pfree(filename);
+				return;
+			}
+
+			/* missing control file indicates extension is not installed */
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("extension \"%s\" is not available", control->name),
+					 errdetail("Could not open extension control file \"%s\": %m.",
+							   filename),
+					 errhint("The extension must first be installed on the system where PostgreSQL is running.")));
+		}
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not open extension control file \"%s\": %m",
+						filename)));
+	}
+
+	/*
+	 * Parse the file content, using GUC's file parsing code.  We need not
+	 * check the return value since any errors will be thrown at ERROR level.
+	 */
+	(void) ParseConfigFp(file, filename, 0, ERROR, &head, &tail);
+
+	FreeFile(file);
+
+	/*
+	 * Convert the ConfigVariable list into ExtensionControlFile entries.
+	 */
+	for (item = head; item != NULL; item = item->next)
+	{
+		if (strcmp(item->name, "directory") == 0)
+		{
+			if (version)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("parameter \"%s\" cannot be set in a secondary extension control file",
+								item->name)));
+
+			control->directory = pstrdup(item->value);
+		}
+		else if (strcmp(item->name, "default_version") == 0)
+		{
+			if (version)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("parameter \"%s\" cannot be set in a secondary extension control file",
+								item->name)));
+
+			control->default_version = pstrdup(item->value);
+		}
+		else if (strcmp(item->name, "module_pathname") == 0)
+		{
+			control->module_pathname = pstrdup(item->value);
+		}
+		else if (strcmp(item->name, "comment") == 0)
+		{
+			control->comment = pstrdup(item->value);
+		}
+		else if (strcmp(item->name, "schema") == 0)
+		{
+			control->schema = pstrdup(item->value);
+		}
+		else if (strcmp(item->name, "relocatable") == 0)
+		{
+			if (!parse_bool(item->value, &control->relocatable))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("parameter \"%s\" requires a Boolean value",
+								item->name)));
+		}
+		else if (strcmp(item->name, "superuser") == 0)
+		{
+			if (!parse_bool(item->value, &control->superuser))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("parameter \"%s\" requires a Boolean value",
+								item->name)));
+		}
+		else if (strcmp(item->name, "trusted") == 0)
+		{
+			if (!parse_bool(item->value, &control->trusted))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("parameter \"%s\" requires a Boolean value",
+								item->name)));
+		}
+		else if (strcmp(item->name, "encoding") == 0)
+		{
+			control->encoding = pg_valid_server_encoding(item->value);
+			if (control->encoding < 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_OBJECT),
+						 errmsg("\"%s\" is not a valid encoding name",
+								item->value)));
+		}
+		else if (strcmp(item->name, "requires") == 0)
+		{
+			/* Need a modifiable copy of string */
+			char	   *rawnames = pstrdup(item->value);
+
+			/* Parse string into list of identifiers */
+			if (!SplitIdentifierString(rawnames, ',', &control->requires))
+			{
+				/* syntax error in name list */
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("parameter \"%s\" must be a list of extension names",
+								item->name)));
+			}
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized parameter \"%s\" in file \"%s\"",
+							item->name, filename)));
+	}
+
+	FreeConfigVariables(head);
+
+	if (control->relocatable && control->schema != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("parameter \"schema\" cannot be specified when \"relocatable\" is true")));
+
+	pfree(filename);
+}
+
+/*
+ * Read the primary control file for the specified extension.
+ */
+static ExtensionControlFile *
+read_extension_control_file(const char *extname)
+{
+	ExtensionControlFile *control;
+
+	/*
+	 * Set up default values.  Pointer fields are initially null.
+	 */
+	control = (ExtensionControlFile *) palloc0(sizeof(ExtensionControlFile));
+	control->name = pstrdup(extname);
+	control->relocatable = false;
+	control->superuser = true;
+	control->trusted = false;
+	control->encoding = -1;
+
+	/*
+	 * Parse the primary control file.
+	 */
+	parse_extension_control_file(control, NULL);
+
+	return control;
+}
+
+/*
+ * Read the auxiliary control file for the specified extension and version.
+ *
+ * Returns a new modified ExtensionControlFile struct; the original struct
+ * (reflecting just the primary control file) is not modified.
+ */
+static ExtensionControlFile *
+read_extension_aux_control_file(const ExtensionControlFile *pcontrol,
+								const char *version)
+{
+	ExtensionControlFile *acontrol;
+
+	/*
+	 * Flat-copy the struct.  Pointer fields share values with original.
+	 */
+	acontrol = (ExtensionControlFile *) palloc(sizeof(ExtensionControlFile));
+	memcpy(acontrol, pcontrol, sizeof(ExtensionControlFile));
+
+	/*
+	 * Parse the auxiliary control file, overwriting struct fields
+	 */
+	parse_extension_control_file(acontrol, version);
+
+	return acontrol;
+}
+
+/*
+ * Read an SQL script file into a string, and convert to database encoding
+ */
+static char *
+read_extension_script_file(const ExtensionControlFile *control,
+						   const char *filename)
+{
+	int			src_encoding;
+	char	   *src_str;
+	char	   *dest_str;
+	int			len;
+
+	src_str = read_whole_file(filename, &len);
+
+	/* use database encoding if not given */
+	if (control->encoding < 0)
+		src_encoding = GetDatabaseEncoding();
+	else
+		src_encoding = control->encoding;
+
+	/* make sure that source string is valid in the expected encoding */
+	(void) pg_verify_mbstr(src_encoding, src_str, len, false);
+
+	/*
+	 * Convert the encoding to the database encoding. read_whole_file
+	 * null-terminated the string, so if no conversion happens the string is
+	 * valid as is.
+	 */
+	dest_str = pg_any_to_server(src_str, len, src_encoding);
+
+	return dest_str;
+}
+
+/*
+ * Execute given SQL string.
+ *
+ * Note: it's tempting to just use SPI to execute the string, but that does
+ * not work very well.  The really serious problem is that SPI will parse,
+ * analyze, and plan the whole string before executing any of it; of course
+ * this fails if there are any plannable statements referring to objects
+ * created earlier in the script.  A lesser annoyance is that SPI insists
+ * on printing the whole string as errcontext in case of any error, and that
+ * could be very long.
+ */
+static void
+execute_sql_string(const char *sql)
+{
+	List	   *raw_parsetree_list;
+	DestReceiver *dest;
+	ListCell   *lc1;
+
+	/*
+	 * Parse the SQL string into a list of raw parse trees.
+	 */
+	raw_parsetree_list = pg_parse_query(sql);
+
+	/* All output from SELECTs goes to the bit bucket */
+	dest = CreateDestReceiver(DestNone);
+
+	/*
+	 * Do parse analysis, rule rewrite, planning, and execution for each raw
+	 * parsetree.  We must fully execute each query before beginning parse
+	 * analysis on the next one, since there may be interdependencies.
+	 */
+	foreach(lc1, raw_parsetree_list)
+	{
+		RawStmt    *parsetree = lfirst_node(RawStmt, lc1);
+		MemoryContext per_parsetree_context,
+					oldcontext;
+		List	   *stmt_list;
+		ListCell   *lc2;
+
+		/*
+		 * We do the work for each parsetree in a short-lived context, to
+		 * limit the memory used when there are many commands in the string.
+		 */
+		per_parsetree_context =
+			AllocSetContextCreate(CurrentMemoryContext,
+								  "execute_sql_string per-statement context",
+								  ALLOCSET_DEFAULT_SIZES);
+		oldcontext = MemoryContextSwitchTo(per_parsetree_context);
+
+		/* Be sure parser can see any DDL done so far */
+		CommandCounterIncrement();
+
+		stmt_list = pg_analyze_and_rewrite_fixedparams(parsetree,
+													   sql,
+													   NULL,
+													   0,
+													   NULL);
+		stmt_list = pg_plan_queries(stmt_list, sql, CURSOR_OPT_PARALLEL_OK, NULL);
+
+		foreach(lc2, stmt_list)
+		{
+			PlannedStmt *stmt = lfirst_node(PlannedStmt, lc2);
+
+			CommandCounterIncrement();
+
+			PushActiveSnapshot(GetTransactionSnapshot());
+
+			if (stmt->utilityStmt == NULL)
+			{
+				QueryDesc  *qdesc;
+
+				qdesc = CreateQueryDesc(stmt,
+										sql,
+										GetActiveSnapshot(), NULL,
+										dest, NULL, NULL, 0);
+
+				ExecutorStart(qdesc, 0);
+				ExecutorRun(qdesc, ForwardScanDirection, 0, true);
+				ExecutorFinish(qdesc);
+				ExecutorEnd(qdesc);
+
+				FreeQueryDesc(qdesc);
+			}
+			else
+			{
+				if (IsA(stmt->utilityStmt, TransactionStmt))
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("transaction control statements are not allowed within an extension script")));
+
+				ProcessUtility(stmt,
+							   sql,
+							   false,
+							   PROCESS_UTILITY_QUERY,
+							   NULL,
+							   NULL,
+							   dest,
+							   NULL);
+			}
+
+			PopActiveSnapshot();
+		}
+
+		/* Clean up per-parsetree context. */
+		MemoryContextSwitchTo(oldcontext);
+		MemoryContextDelete(per_parsetree_context);
+	}
+
+	/* Be sure to advance the command counter after the last script command */
+	CommandCounterIncrement();
+}
+
+/*
+ * Policy function: is the given extension trusted for installation by a
+ * non-superuser?
+ *
+ * (Update the errhint logic below if you change this.)
+ */
+static bool
+extension_is_trusted(ExtensionControlFile *control)
+{
+	AclResult	aclresult;
+
+	/* Never trust unless extension's control file says it's okay */
+	if (!control->trusted)
+		return false;
+	/* Allow if user has CREATE privilege on current database */
+	aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE);
+	if (aclresult == ACLCHECK_OK)
+		return true;
+	return false;
+}
+
+/*
+ * Execute the appropriate script file for installing or updating the extension
+ *
+ * If from_version isn't NULL, it's an update
+ */
+static void
+execute_extension_script(Oid extensionOid, ExtensionControlFile *control,
+						 const char *from_version,
+						 const char *version,
+						 List *requiredSchemas,
+						 const char *schemaName, Oid schemaOid)
+{
+	bool		switch_to_superuser = false;
+	char	   *filename;
+	Oid			save_userid = 0;
+	int			save_sec_context = 0;
+	int			save_nestlevel;
+	StringInfoData pathbuf;
+	ListCell   *lc;
+
+	/*
+	 * Enforce superuser-ness if appropriate.  We postpone these checks until
+	 * here so that the control flags are correctly associated with the right
+	 * script(s) if they happen to be set in secondary control files.
+	 */
+	if (control->superuser && !superuser())
+	{
+		if (extension_is_trusted(control))
+			switch_to_superuser = true;
+		else if (from_version == NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("permission denied to create extension \"%s\"",
+							control->name),
+					 control->trusted
+					 ? errhint("Must have CREATE privilege on current database to create this extension.")
+					 : errhint("Must be superuser to create this extension.")));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("permission denied to update extension \"%s\"",
+							control->name),
+					 control->trusted
+					 ? errhint("Must have CREATE privilege on current database to update this extension.")
+					 : errhint("Must be superuser to update this extension.")));
+	}
+
+	filename = get_extension_script_filename(control, from_version, version);
+
+	/*
+	 * If installing a trusted extension on behalf of a non-superuser, become
+	 * the bootstrap superuser.  (This switch will be cleaned up automatically
+	 * if the transaction aborts, as will the GUC changes below.)
+	 */
+	if (switch_to_superuser)
+	{
+		GetUserIdAndSecContext(&save_userid, &save_sec_context);
+		SetUserIdAndSecContext(BOOTSTRAP_SUPERUSERID,
+							   save_sec_context | SECURITY_LOCAL_USERID_CHANGE);
+	}
+
+	/*
+	 * Force client_min_messages and log_min_messages to be at least WARNING,
+	 * so that we won't spam the user with useless NOTICE messages from common
+	 * script actions like creating shell types.
+	 *
+	 * We use the equivalent of a function SET option to allow the setting to
+	 * persist for exactly the duration of the script execution.  guc.c also
+	 * takes care of undoing the setting on error.
+	 *
+	 * log_min_messages can't be set by ordinary users, so for that one we
+	 * pretend to be superuser.
+	 */
+	save_nestlevel = NewGUCNestLevel();
+
+	if (client_min_messages < WARNING)
+		(void) set_config_option("client_min_messages", "warning",
+								 PGC_USERSET, PGC_S_SESSION,
+								 GUC_ACTION_SAVE, true, 0, false);
+	if (log_min_messages < WARNING)
+		(void) set_config_option_ext("log_min_messages", "warning",
+									 PGC_SUSET, PGC_S_SESSION,
+									 BOOTSTRAP_SUPERUSERID,
+									 GUC_ACTION_SAVE, true, 0, false);
+
+	/*
+	 * Similarly disable check_function_bodies, to ensure that SQL functions
+	 * won't be parsed during creation.
+	 */
+	if (check_function_bodies)
+		(void) set_config_option("check_function_bodies", "off",
+								 PGC_USERSET, PGC_S_SESSION,
+								 GUC_ACTION_SAVE, true, 0, false);
+
+	/*
+	 * Set up the search path to have the target schema first, making it be
+	 * the default creation target namespace.  Then add the schemas of any
+	 * prerequisite extensions, unless they are in pg_catalog which would be
+	 * searched anyway.  (Listing pg_catalog explicitly in a non-first
+	 * position would be bad for security.)  Finally add pg_temp to ensure
+	 * that temp objects can't take precedence over others.
+	 *
+	 * Note: it might look tempting to use PushOverrideSearchPath for this,
+	 * but we cannot do that.  We have to actually set the search_path GUC in
+	 * case the extension script examines or changes it.  In any case, the
+	 * GUC_ACTION_SAVE method is just as convenient.
+	 */
+	initStringInfo(&pathbuf);
+	appendStringInfoString(&pathbuf, quote_identifier(schemaName));
+	foreach(lc, requiredSchemas)
+	{
+		Oid			reqschema = lfirst_oid(lc);
+		char	   *reqname = get_namespace_name(reqschema);
+
+		if (reqname && strcmp(reqname, "pg_catalog") != 0)
+			appendStringInfo(&pathbuf, ", %s", quote_identifier(reqname));
+	}
+	appendStringInfoString(&pathbuf, ", pg_temp");
+
+	(void) set_config_option("search_path", pathbuf.data,
+							 PGC_USERSET, PGC_S_SESSION,
+							 GUC_ACTION_SAVE, true, 0, false);
+
+	/*
+	 * Set creating_extension and related variables so that
+	 * recordDependencyOnCurrentExtension and other functions do the right
+	 * things.  On failure, ensure we reset these variables.
+	 */
+	creating_extension = true;
+	CurrentExtensionObject = extensionOid;
+	PG_TRY();
+	{
+		char	   *c_sql = read_extension_script_file(control, filename);
+		Datum		t_sql;
+
+		/*
+		 * We filter each substitution through quote_identifier().  When the
+		 * arg contains one of the following characters, no one collection of
+		 * quoting can work inside $$dollar-quoted string literals$$,
+		 * 'single-quoted string literals', and outside of any literal.  To
+		 * avoid a security snare for extension authors, error on substitution
+		 * for arguments containing these.
+		 */
+		const char *quoting_relevant_chars = "\"$'\\";
+
+		/* We use various functions that want to operate on text datums */
+		t_sql = CStringGetTextDatum(c_sql);
+
+		/*
+		 * Reduce any lines beginning with "\echo" to empty.  This allows
+		 * scripts to contain messages telling people not to run them via
+		 * psql, which has been found to be necessary due to old habits.
+		 */
+		t_sql = DirectFunctionCall4Coll(textregexreplace,
+										C_COLLATION_OID,
+										t_sql,
+										CStringGetTextDatum("^\\\\echo.*$"),
+										CStringGetTextDatum(""),
+										CStringGetTextDatum("ng"));
+
+		/*
+		 * If the script uses @extowner@, substitute the calling username.
+		 */
+		if (strstr(c_sql, "@extowner@"))
+		{
+			Oid			uid = switch_to_superuser ? save_userid : GetUserId();
+			const char *userName = GetUserNameFromId(uid, false);
+			const char *qUserName = quote_identifier(userName);
+
+			t_sql = DirectFunctionCall3Coll(replace_text,
+											C_COLLATION_OID,
+											t_sql,
+											CStringGetTextDatum("@extowner@"),
+											CStringGetTextDatum(qUserName));
+			if (strpbrk(userName, quoting_relevant_chars))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+						 errmsg("invalid character in extension owner: must not contain any of \"%s\"",
+								quoting_relevant_chars)));
+		}
+
+		/*
+		 * If it's not relocatable, substitute the target schema name for
+		 * occurrences of @extschema@.
+		 *
+		 * For a relocatable extension, we needn't do this.  There cannot be
+		 * any need for @extschema@, else it wouldn't be relocatable.
+		 */
+		if (!control->relocatable)
+		{
+			Datum		old = t_sql;
+			const char *qSchemaName = quote_identifier(schemaName);
+
+			t_sql = DirectFunctionCall3Coll(replace_text,
+											C_COLLATION_OID,
+											t_sql,
+											CStringGetTextDatum("@extschema@"),
+											CStringGetTextDatum(qSchemaName));
+			if (t_sql != old && strpbrk(schemaName, quoting_relevant_chars))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+						 errmsg("invalid character in extension \"%s\" schema: must not contain any of \"%s\"",
+								control->name, quoting_relevant_chars)));
+		}
+
+		/*
+		 * If module_pathname was set in the control file, substitute its
+		 * value for occurrences of MODULE_PATHNAME.
+		 */
+		if (control->module_pathname)
+		{
+			t_sql = DirectFunctionCall3Coll(replace_text,
+											C_COLLATION_OID,
+											t_sql,
+											CStringGetTextDatum("MODULE_PATHNAME"),
+											CStringGetTextDatum(control->module_pathname));
+		}
+
+		/* And now back to C string */
+		c_sql = text_to_cstring(DatumGetTextPP(t_sql));
+
+		execute_sql_string(c_sql);
+	}
+	PG_FINALLY();
+	{
+		creating_extension = false;
+		CurrentExtensionObject = InvalidOid;
+	}
+	PG_END_TRY();
+
+	/*
+	 * Restore the GUC variables we set above.
+	 */
+	AtEOXact_GUC(true, save_nestlevel);
+
+	/*
+	 * Restore authentication state if needed.
+	 */
+	if (switch_to_superuser)
+		SetUserIdAndSecContext(save_userid, save_sec_context);
+}
+
+/*
+ * Find or create an ExtensionVersionInfo for the specified version name
+ *
+ * Currently, we just use a List of the ExtensionVersionInfo's.  Searching
+ * for them therefore uses about O(N^2) time when there are N versions of
+ * the extension.  We could change the data structure to a hash table if
+ * this ever becomes a bottleneck.
+ */
+static ExtensionVersionInfo *
+get_ext_ver_info(const char *versionname, List **evi_list)
+{
+	ExtensionVersionInfo *evi;
+	ListCell   *lc;
+
+	foreach(lc, *evi_list)
+	{
+		evi = (ExtensionVersionInfo *) lfirst(lc);
+		if (strcmp(evi->name, versionname) == 0)
+			return evi;
+	}
+
+	evi = (ExtensionVersionInfo *) palloc(sizeof(ExtensionVersionInfo));
+	evi->name = pstrdup(versionname);
+	evi->reachable = NIL;
+	evi->installable = false;
+	/* initialize for later application of Dijkstra's algorithm */
+	evi->distance_known = false;
+	evi->distance = INT_MAX;
+	evi->previous = NULL;
+
+	*evi_list = lappend(*evi_list, evi);
+
+	return evi;
+}
+
+/*
+ * Locate the nearest unprocessed ExtensionVersionInfo
+ *
+ * This part of the algorithm is also about O(N^2).  A priority queue would
+ * make it much faster, but for now there's no need.
+ */
+static ExtensionVersionInfo *
+get_nearest_unprocessed_vertex(List *evi_list)
+{
+	ExtensionVersionInfo *evi = NULL;
+	ListCell   *lc;
+
+	foreach(lc, evi_list)
+	{
+		ExtensionVersionInfo *evi2 = (ExtensionVersionInfo *) lfirst(lc);
+
+		/* only vertices whose distance is still uncertain are candidates */
+		if (evi2->distance_known)
+			continue;
+		/* remember the closest such vertex */
+		if (evi == NULL ||
+			evi->distance > evi2->distance)
+			evi = evi2;
+	}
+
+	return evi;
+}
+
+/*
+ * Obtain information about the set of update scripts available for the
+ * specified extension.  The result is a List of ExtensionVersionInfo
+ * structs, each with a subsidiary list of the ExtensionVersionInfos for
+ * the versions that can be reached in one step from that version.
+ */
+static List *
+get_ext_ver_list(ExtensionControlFile *control)
+{
+	List	   *evi_list = NIL;
+	int			extnamelen = strlen(control->name);
+	char	   *location;
+	DIR		   *dir;
+	struct dirent *de;
+
+	location = get_extension_script_directory(control);
+	dir = AllocateDir(location);
+	while ((de = ReadDir(dir, location)) != NULL)
+	{
+		char	   *vername;
+		char	   *vername2;
+		ExtensionVersionInfo *evi;
+		ExtensionVersionInfo *evi2;
+
+		/* must be a .sql file ... */
+		if (!is_extension_script_filename(de->d_name))
+			continue;
+
+		/* ... matching extension name followed by separator */
+		if (strncmp(de->d_name, control->name, extnamelen) != 0 ||
+			de->d_name[extnamelen] != '-' ||
+			de->d_name[extnamelen + 1] != '-')
+			continue;
+
+		/* extract version name(s) from 'extname--something.sql' filename */
+		vername = pstrdup(de->d_name + extnamelen + 2);
+		*strrchr(vername, '.') = '\0';
+		vername2 = strstr(vername, "--");
+		if (!vername2)
+		{
+			/* It's an install, not update, script; record its version name */
+			evi = get_ext_ver_info(vername, &evi_list);
+			evi->installable = true;
+			continue;
+		}
+		*vername2 = '\0';		/* terminate first version */
+		vername2 += 2;			/* and point to second */
+
+		/* if there's a third --, it's bogus, ignore it */
+		if (strstr(vername2, "--"))
+			continue;
+
+		/* Create ExtensionVersionInfos and link them together */
+		evi = get_ext_ver_info(vername, &evi_list);
+		evi2 = get_ext_ver_info(vername2, &evi_list);
+		evi->reachable = lappend(evi->reachable, evi2);
+	}
+	FreeDir(dir);
+
+	return evi_list;
+}
+
+/*
+ * Given an initial and final version name, identify the sequence of update
+ * scripts that have to be applied to perform that update.
+ *
+ * Result is a List of names of versions to transition through (the initial
+ * version is *not* included).
+ */
+static List *
+identify_update_path(ExtensionControlFile *control,
+					 const char *oldVersion, const char *newVersion)
+{
+	List	   *result;
+	List	   *evi_list;
+	ExtensionVersionInfo *evi_start;
+	ExtensionVersionInfo *evi_target;
+
+	/* Extract the version update graph from the script directory */
+	evi_list = get_ext_ver_list(control);
+
+	/* Initialize start and end vertices */
+	evi_start = get_ext_ver_info(oldVersion, &evi_list);
+	evi_target = get_ext_ver_info(newVersion, &evi_list);
+
+	/* Find shortest path */
+	result = find_update_path(evi_list, evi_start, evi_target, false, false);
+
+	if (result == NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("extension \"%s\" has no update path from version \"%s\" to version \"%s\"",
+						control->name, oldVersion, newVersion)));
+
+	return result;
+}
+
+/*
+ * Apply Dijkstra's algorithm to find the shortest path from evi_start to
+ * evi_target.
+ *
+ * If reject_indirect is true, ignore paths that go through installable
+ * versions.  This saves work when the caller will consider starting from
+ * all installable versions anyway.
+ *
+ * If reinitialize is false, assume the ExtensionVersionInfo list has not
+ * been used for this before, and the initialization done by get_ext_ver_info
+ * is still good.  Otherwise, reinitialize all transient fields used here.
+ *
+ * Result is a List of names of versions to transition through (the initial
+ * version is *not* included).  Returns NIL if no such path.
+ */
+static List *
+find_update_path(List *evi_list,
+				 ExtensionVersionInfo *evi_start,
+				 ExtensionVersionInfo *evi_target,
+				 bool reject_indirect,
+				 bool reinitialize)
+{
+	List	   *result;
+	ExtensionVersionInfo *evi;
+	ListCell   *lc;
+
+	/* Caller error if start == target */
+	Assert(evi_start != evi_target);
+	/* Caller error if reject_indirect and target is installable */
+	Assert(!(reject_indirect && evi_target->installable));
+
+	if (reinitialize)
+	{
+		foreach(lc, evi_list)
+		{
+			evi = (ExtensionVersionInfo *) lfirst(lc);
+			evi->distance_known = false;
+			evi->distance = INT_MAX;
+			evi->previous = NULL;
+		}
+	}
+
+	evi_start->distance = 0;
+
+	while ((evi = get_nearest_unprocessed_vertex(evi_list)) != NULL)
+	{
+		if (evi->distance == INT_MAX)
+			break;				/* all remaining vertices are unreachable */
+		evi->distance_known = true;
+		if (evi == evi_target)
+			break;				/* found shortest path to target */
+		foreach(lc, evi->reachable)
+		{
+			ExtensionVersionInfo *evi2 = (ExtensionVersionInfo *) lfirst(lc);
+			int			newdist;
+
+			/* if reject_indirect, treat installable versions as unreachable */
+			if (reject_indirect && evi2->installable)
+				continue;
+			newdist = evi->distance + 1;
+			if (newdist < evi2->distance)
+			{
+				evi2->distance = newdist;
+				evi2->previous = evi;
+			}
+			else if (newdist == evi2->distance &&
+					 evi2->previous != NULL &&
+					 strcmp(evi->name, evi2->previous->name) < 0)
+			{
+				/*
+				 * Break ties in favor of the version name that comes first
+				 * according to strcmp().  This behavior is undocumented and
+				 * users shouldn't rely on it.  We do it just to ensure that
+				 * if there is a tie, the update path that is chosen does not
+				 * depend on random factors like the order in which directory
+				 * entries get visited.
+				 */
+				evi2->previous = evi;
+			}
+		}
+	}
+
+	/* Return NIL if target is not reachable from start */
+	if (!evi_target->distance_known)
+		return NIL;
+
+	/* Build and return list of version names representing the update path */
+	result = NIL;
+	for (evi = evi_target; evi != evi_start; evi = evi->previous)
+		result = lcons(evi->name, result);
+
+	return result;
+}
+
+/*
+ * Given a target version that is not directly installable, find the
+ * best installation sequence starting from a directly-installable version.
+ *
+ * evi_list: previously-collected version update graph
+ * evi_target: member of that list that we want to reach
+ *
+ * Returns the best starting-point version, or NULL if there is none.
+ * On success, *best_path is set to the path from the start point.
+ *
+ * If there's more than one possible start point, prefer shorter update paths,
+ * and break any ties arbitrarily on the basis of strcmp'ing the starting
+ * versions' names.
+ */
+static ExtensionVersionInfo *
+find_install_path(List *evi_list, ExtensionVersionInfo *evi_target,
+				  List **best_path)
+{
+	ExtensionVersionInfo *evi_start = NULL;
+	ListCell   *lc;
+
+	*best_path = NIL;
+
+	/*
+	 * We don't expect to be called for an installable target, but if we are,
+	 * the answer is easy: just start from there, with an empty update path.
+	 */
+	if (evi_target->installable)
+		return evi_target;
+
+	/* Consider all installable versions as start points */
+	foreach(lc, evi_list)
+	{
+		ExtensionVersionInfo *evi1 = (ExtensionVersionInfo *) lfirst(lc);
+		List	   *path;
+
+		if (!evi1->installable)
+			continue;
+
+		/*
+		 * Find shortest path from evi1 to evi_target; but no need to consider
+		 * paths going through other installable versions.
+		 */
+		path = find_update_path(evi_list, evi1, evi_target, true, true);
+		if (path == NIL)
+			continue;
+
+		/* Remember best path */
+		if (evi_start == NULL ||
+			list_length(path) < list_length(*best_path) ||
+			(list_length(path) == list_length(*best_path) &&
+			 strcmp(evi_start->name, evi1->name) < 0))
+		{
+			evi_start = evi1;
+			*best_path = path;
+		}
+	}
+
+	return evi_start;
+}
+
+/*
+ * CREATE EXTENSION worker
+ *
+ * When CASCADE is specified, CreateExtensionInternal() recurses if required
+ * extensions need to be installed.  To sanely handle cyclic dependencies,
+ * the "parents" list contains a list of names of extensions already being
+ * installed, allowing us to error out if we recurse to one of those.
+ */
+static ObjectAddress
+CreateExtensionInternal(char *extensionName,
+						char *schemaName,
+						const char *versionName,
+						bool cascade,
+						List *parents,
+						bool is_create)
+{
+	char	   *origSchemaName = schemaName;
+	Oid			schemaOid = InvalidOid;
+	Oid			extowner = GetUserId();
+	ExtensionControlFile *pcontrol;
+	ExtensionControlFile *control;
+	char	   *filename;
+	struct stat fst;
+	List	   *updateVersions;
+	List	   *requiredExtensions;
+	List	   *requiredSchemas;
+	Oid			extensionOid;
+	ObjectAddress address;
+	ListCell   *lc;
+
+	/*
+	 * Read the primary control file.  Note we assume that it does not contain
+	 * any non-ASCII data, so there is no need to worry about encoding at this
+	 * point.
+	 */
+	pcontrol = read_extension_control_file(extensionName);
+
+	/*
+	 * Determine the version to install
+	 */
+	if (versionName == NULL)
+	{
+		if (pcontrol->default_version)
+			versionName = pcontrol->default_version;
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("version to install must be specified")));
+	}
+	check_valid_version_name(versionName);
+
+	/*
+	 * Figure out which script(s) we need to run to install the desired
+	 * version of the extension.  If we do not have a script that directly
+	 * does what is needed, we try to find a sequence of update scripts that
+	 * will get us there.
+	 */
+	filename = get_extension_script_filename(pcontrol, NULL, versionName);
+	if (stat(filename, &fst) == 0)
+	{
+		/* Easy, no extra scripts */
+		updateVersions = NIL;
+	}
+	else
+	{
+		/* Look for best way to install this version */
+		List	   *evi_list;
+		ExtensionVersionInfo *evi_start;
+		ExtensionVersionInfo *evi_target;
+
+		/* Extract the version update graph from the script directory */
+		evi_list = get_ext_ver_list(pcontrol);
+
+		/* Identify the target version */
+		evi_target = get_ext_ver_info(versionName, &evi_list);
+
+		/* Identify best path to reach target */
+		evi_start = find_install_path(evi_list, evi_target,
+									  &updateVersions);
+
+		/* Fail if no path ... */
+		if (evi_start == NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("extension \"%s\" has no installation script nor update path for version \"%s\"",
+							pcontrol->name, versionName)));
+
+		/* Otherwise, install best starting point and then upgrade */
+		versionName = evi_start->name;
+	}
+
+	/*
+	 * Fetch control parameters for installation target version
+	 */
+	control = read_extension_aux_control_file(pcontrol, versionName);
+
+	/*
+	 * Determine the target schema to install the extension into
+	 */
+	if (schemaName)
+	{
+		/* If the user is giving us the schema name, it must exist already. */
+		schemaOid = get_namespace_oid(schemaName, false);
+	}
+
+	if (control->schema != NULL)
+	{
+		/*
+		 * The extension is not relocatable and the author gave us a schema
+		 * for it.
+		 *
+		 * Unless CASCADE parameter was given, it's an error to give a schema
+		 * different from control->schema if control->schema is specified.
+		 */
+		if (schemaName && strcmp(control->schema, schemaName) != 0 &&
+			!cascade)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("extension \"%s\" must be installed in schema \"%s\"",
+							control->name,
+							control->schema)));
+
+		/* Always use the schema from control file for current extension. */
+		schemaName = control->schema;
+
+		/* Find or create the schema in case it does not exist. */
+		schemaOid = get_namespace_oid(schemaName, true);
+
+		if (!OidIsValid(schemaOid))
+		{
+			CreateSchemaStmt *csstmt = makeNode(CreateSchemaStmt);
+
+			csstmt->schemaname = schemaName;
+			csstmt->authrole = NULL;	/* will be created by current user */
+			csstmt->schemaElts = NIL;
+			csstmt->if_not_exists = false;
+			CreateSchemaCommand(csstmt, "(generated CREATE SCHEMA command)",
+								-1, -1);
+
+			/*
+			 * CreateSchemaCommand includes CommandCounterIncrement, so new
+			 * schema is now visible.
+			 */
+			schemaOid = get_namespace_oid(schemaName, false);
+		}
+	}
+	else if (!OidIsValid(schemaOid))
+	{
+		/*
+		 * Neither user nor author of the extension specified schema; use the
+		 * current default creation namespace, which is the first explicit
+		 * entry in the search_path.
+		 */
+		List	   *search_path = fetch_search_path(false);
+
+		if (search_path == NIL) /* nothing valid in search_path? */
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_SCHEMA),
+					 errmsg("no schema has been selected to create in")));
+		schemaOid = linitial_oid(search_path);
+		schemaName = get_namespace_name(schemaOid);
+		if (schemaName == NULL) /* recently-deleted namespace? */
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_SCHEMA),
+					 errmsg("no schema has been selected to create in")));
+
+		list_free(search_path);
+	}
+
+	/*
+	 * Make note if a temporary namespace has been accessed in this
+	 * transaction.
+	 */
+	if (isTempNamespace(schemaOid))
+		MyXactFlags |= XACT_FLAGS_ACCESSEDTEMPNAMESPACE;
+
+	/*
+	 * We don't check creation rights on the target namespace here.  If the
+	 * extension script actually creates any objects there, it will fail if
+	 * the user doesn't have such permissions.  But there are cases such as
+	 * procedural languages where it's convenient to set schema = pg_catalog
+	 * yet we don't want to restrict the command to users with ACL_CREATE for
+	 * pg_catalog.
+	 */
+
+	/*
+	 * Look up the prerequisite extensions, install them if necessary, and
+	 * build lists of their OIDs and the OIDs of their target schemas.
+	 */
+	requiredExtensions = NIL;
+	requiredSchemas = NIL;
+	foreach(lc, control->requires)
+	{
+		char	   *curreq = (char *) lfirst(lc);
+		Oid			reqext;
+		Oid			reqschema;
+
+		reqext = get_required_extension(curreq,
+										extensionName,
+										origSchemaName,
+										cascade,
+										parents,
+										is_create);
+		reqschema = get_extension_schema(reqext);
+		requiredExtensions = lappend_oid(requiredExtensions, reqext);
+		requiredSchemas = lappend_oid(requiredSchemas, reqschema);
+	}
+
+	/*
+	 * Insert new tuple into pg_extension, and create dependency entries.
+	 */
+	address = InsertExtensionTuple(control->name, extowner,
+								   schemaOid, control->relocatable,
+								   versionName,
+								   PointerGetDatum(NULL),
+								   PointerGetDatum(NULL),
+								   requiredExtensions);
+	extensionOid = address.objectId;
+
+	/*
+	 * Apply any control-file comment on extension
+	 */
+	if (control->comment != NULL)
+		CreateComments(extensionOid, ExtensionRelationId, 0, control->comment);
+
+	/*
+	 * Execute the installation script file
+	 */
+	execute_extension_script(extensionOid, control,
+							 NULL, versionName,
+							 requiredSchemas,
+							 schemaName, schemaOid);
+
+	/*
+	 * If additional update scripts have to be executed, apply the updates as
+	 * though a series of ALTER EXTENSION UPDATE commands were given
+	 */
+	ApplyExtensionUpdates(extensionOid, pcontrol,
+						  versionName, updateVersions,
+						  origSchemaName, cascade, is_create);
+
+	return address;
+}
+
+/*
+ * Get the OID of an extension listed in "requires", possibly creating it.
+ */
+static Oid
+get_required_extension(char *reqExtensionName,
+					   char *extensionName,
+					   char *origSchemaName,
+					   bool cascade,
+					   List *parents,
+					   bool is_create)
+{
+	Oid			reqExtensionOid;
+
+	reqExtensionOid = get_extension_oid(reqExtensionName, true);
+	if (!OidIsValid(reqExtensionOid))
+	{
+		if (cascade)
+		{
+			/* Must install it. */
+			ObjectAddress addr;
+			List	   *cascade_parents;
+			ListCell   *lc;
+
+			/* Check extension name validity before trying to cascade. */
+			check_valid_extension_name(reqExtensionName);
+
+			/* Check for cyclic dependency between extensions. */
+			foreach(lc, parents)
+			{
+				char	   *pname = (char *) lfirst(lc);
+
+				if (strcmp(pname, reqExtensionName) == 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_RECURSION),
+							 errmsg("cyclic dependency detected between extensions \"%s\" and \"%s\"",
+									reqExtensionName, extensionName)));
+			}
+
+			ereport(NOTICE,
+					(errmsg("installing required extension \"%s\"",
+							reqExtensionName)));
+
+			/* Add current extension to list of parents to pass down. */
+			cascade_parents = lappend(list_copy(parents), extensionName);
+
+			/*
+			 * Create the required extension.  We propagate the SCHEMA option
+			 * if any, and CASCADE, but no other options.
+			 */
+			addr = CreateExtensionInternal(reqExtensionName,
+										   origSchemaName,
+										   NULL,
+										   cascade,
+										   cascade_parents,
+										   is_create);
+
+			/* Get its newly-assigned OID. */
+			reqExtensionOid = addr.objectId;
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("required extension \"%s\" is not installed",
+							reqExtensionName),
+					 is_create ?
+					 errhint("Use CREATE EXTENSION ... CASCADE to install required extensions too.") : 0));
+	}
+
+	return reqExtensionOid;
+}
+
+/*
+ * CREATE EXTENSION
+ */
+ObjectAddress
+CreateExtension(ParseState *pstate, CreateExtensionStmt *stmt)
+{
+	DefElem    *d_schema = NULL;
+	DefElem    *d_new_version = NULL;
+	DefElem    *d_cascade = NULL;
+	char	   *schemaName = NULL;
+	char	   *versionName = NULL;
+	bool		cascade = false;
+	ListCell   *lc;
+
+	/* Check extension name validity before any filesystem access */
+	check_valid_extension_name(stmt->extname);
+
+	/*
+	 * Check for duplicate extension name.  The unique index on
+	 * pg_extension.extname would catch this anyway, and serves as a backstop
+	 * in case of race conditions; but this is a friendlier error message, and
+	 * besides we need a check to support IF NOT EXISTS.
+	 */
+	if (get_extension_oid(stmt->extname, true) != InvalidOid)
+	{
+		if (stmt->if_not_exists)
+		{
+			ereport(NOTICE,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("extension \"%s\" already exists, skipping",
+							stmt->extname)));
+			return InvalidObjectAddress;
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("extension \"%s\" already exists",
+							stmt->extname)));
+	}
+
+	/*
+	 * We use global variables to track the extension being created, so we can
+	 * create only one extension at the same time.
+	 */
+	if (creating_extension)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("nested CREATE EXTENSION is not supported")));
+
+	/* Deconstruct the statement option list */
+	foreach(lc, stmt->options)
+	{
+		DefElem    *defel = (DefElem *) lfirst(lc);
+
+		if (strcmp(defel->defname, "schema") == 0)
+		{
+			if (d_schema)
+				errorConflictingDefElem(defel, pstate);
+			d_schema = defel;
+			schemaName = defGetString(d_schema);
+		}
+		else if (strcmp(defel->defname, "new_version") == 0)
+		{
+			if (d_new_version)
+				errorConflictingDefElem(defel, pstate);
+			d_new_version = defel;
+			versionName = defGetString(d_new_version);
+		}
+		else if (strcmp(defel->defname, "cascade") == 0)
+		{
+			if (d_cascade)
+				errorConflictingDefElem(defel, pstate);
+			d_cascade = defel;
+			cascade = defGetBoolean(d_cascade);
+		}
+		else
+			elog(ERROR, "unrecognized option: %s", defel->defname);
+	}
+
+	/* Call CreateExtensionInternal to do the real work. */
+	return CreateExtensionInternal(stmt->extname,
+								   schemaName,
+								   versionName,
+								   cascade,
+								   NIL,
+								   true);
+}
+
+/*
+ * InsertExtensionTuple
+ *
+ * Insert the new pg_extension row, and create extension's dependency entries.
+ * Return the OID assigned to the new row.
+ *
+ * This is exported for the benefit of pg_upgrade, which has to create a
+ * pg_extension entry (and the extension-level dependencies) without
+ * actually running the extension's script.
+ *
+ * extConfig and extCondition should be arrays or PointerGetDatum(NULL).
+ * We declare them as plain Datum to avoid needing array.h in extension.h.
+ */
+ObjectAddress
+InsertExtensionTuple(const char *extName, Oid extOwner,
+					 Oid schemaOid, bool relocatable, const char *extVersion,
+					 Datum extConfig, Datum extCondition,
+					 List *requiredExtensions)
+{
+	Oid			extensionOid;
+	Relation	rel;
+	Datum		values[Natts_pg_extension];
+	bool		nulls[Natts_pg_extension];
+	HeapTuple	tuple;
+	ObjectAddress myself;
+	ObjectAddress nsp;
+	ObjectAddresses *refobjs;
+	ListCell   *lc;
+
+	/*
+	 * Build and insert the pg_extension tuple
+	 */
+	rel = table_open(ExtensionRelationId, RowExclusiveLock);
+
+	memset(values, 0, sizeof(values));
+	memset(nulls, 0, sizeof(nulls));
+
+	extensionOid = GetNewOidWithIndex(rel, ExtensionOidIndexId,
+									  Anum_pg_extension_oid);
+	values[Anum_pg_extension_oid - 1] = ObjectIdGetDatum(extensionOid);
+	values[Anum_pg_extension_extname - 1] =
+		DirectFunctionCall1(namein, CStringGetDatum(extName));
+	values[Anum_pg_extension_extowner - 1] = ObjectIdGetDatum(extOwner);
+	values[Anum_pg_extension_extnamespace - 1] = ObjectIdGetDatum(schemaOid);
+	values[Anum_pg_extension_extrelocatable - 1] = BoolGetDatum(relocatable);
+	values[Anum_pg_extension_extversion - 1] = CStringGetTextDatum(extVersion);
+
+	if (extConfig == PointerGetDatum(NULL))
+		nulls[Anum_pg_extension_extconfig - 1] = true;
+	else
+		values[Anum_pg_extension_extconfig - 1] = extConfig;
+
+	if (extCondition == PointerGetDatum(NULL))
+		nulls[Anum_pg_extension_extcondition - 1] = true;
+	else
+		values[Anum_pg_extension_extcondition - 1] = extCondition;
+
+	tuple = heap_form_tuple(rel->rd_att, values, nulls);
+
+	CatalogTupleInsert(rel, tuple);
+
+	heap_freetuple(tuple);
+	table_close(rel, RowExclusiveLock);
+
+	/*
+	 * Record dependencies on owner, schema, and prerequisite extensions
+	 */
+	recordDependencyOnOwner(ExtensionRelationId, extensionOid, extOwner);
+
+	refobjs = new_object_addresses();
+
+	ObjectAddressSet(myself, ExtensionRelationId, extensionOid);
+
+	ObjectAddressSet(nsp, NamespaceRelationId, schemaOid);
+	add_exact_object_address(&nsp, refobjs);
+
+	foreach(lc, requiredExtensions)
+	{
+		Oid			reqext = lfirst_oid(lc);
+		ObjectAddress otherext;
+
+		ObjectAddressSet(otherext, ExtensionRelationId, reqext);
+		add_exact_object_address(&otherext, refobjs);
+	}
+
+	/* Record all of them (this includes duplicate elimination) */
+	record_object_address_dependencies(&myself, refobjs, DEPENDENCY_NORMAL);
+	free_object_addresses(refobjs);
+
+	/* Post creation hook for new extension */
+	InvokeObjectPostCreateHook(ExtensionRelationId, extensionOid, 0);
+
+	return myself;
+}
+
+/*
+ * Guts of extension deletion.
+ *
+ * All we need do here is remove the pg_extension tuple itself.  Everything
+ * else is taken care of by the dependency infrastructure.
+ */
+void
+RemoveExtensionById(Oid extId)
+{
+	Relation	rel;
+	SysScanDesc scandesc;
+	HeapTuple	tuple;
+	ScanKeyData entry[1];
+
+	/*
+	 * Disallow deletion of any extension that's currently open for insertion;
+	 * else subsequent executions of recordDependencyOnCurrentExtension()
+	 * could create dangling pg_depend records that refer to a no-longer-valid
+	 * pg_extension OID.  This is needed not so much because we think people
+	 * might write "DROP EXTENSION foo" in foo's own script files, as because
+	 * errors in dependency management in extension script files could give
+	 * rise to cases where an extension is dropped as a result of recursing
+	 * from some contained object.  Because of that, we must test for the case
+	 * here, not at some higher level of the DROP EXTENSION command.
+	 */
+	if (extId == CurrentExtensionObject)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("cannot drop extension \"%s\" because it is being modified",
+						get_extension_name(extId))));
+
+	rel = table_open(ExtensionRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&entry[0],
+				Anum_pg_extension_oid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(extId));
+	scandesc = systable_beginscan(rel, ExtensionOidIndexId, true,
+								  NULL, 1, entry);
+
+	tuple = systable_getnext(scandesc);
+
+	/* We assume that there can be at most one matching tuple */
+	if (HeapTupleIsValid(tuple))
+		CatalogTupleDelete(rel, &tuple->t_self);
+
+	systable_endscan(scandesc);
+
+	table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * This function lists the available extensions (one row per primary control
+ * file in the control directory).  We parse each control file and report the
+ * interesting fields.
+ *
+ * The system view pg_available_extensions provides a user interface to this
+ * SRF, adding information about whether the extensions are installed in the
+ * current DB.
+ */
+Datum
+pg_available_extensions(PG_FUNCTION_ARGS)
+{
+	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+	char	   *location;
+	DIR		   *dir;
+	struct dirent *de;
+
+	/* Build tuplestore to hold the result rows */
+	InitMaterializedSRF(fcinfo, 0);
+
+	location = get_extension_control_directory();
+	dir = AllocateDir(location);
+
+	/*
+	 * If the control directory doesn't exist, we want to silently return an
+	 * empty set.  Any other error will be reported by ReadDir.
+	 */
+	if (dir == NULL && errno == ENOENT)
+	{
+		/* do nothing */
+	}
+	else
+	{
+		while ((de = ReadDir(dir, location)) != NULL)
+		{
+			ExtensionControlFile *control;
+			char	   *extname;
+			Datum		values[3];
+			bool		nulls[3];
+
+			if (!is_extension_control_filename(de->d_name))
+				continue;
+
+			/* extract extension name from 'name.control' filename */
+			extname = pstrdup(de->d_name);
+			*strrchr(extname, '.') = '\0';
+
+			/* ignore it if it's an auxiliary control file */
+			if (strstr(extname, "--"))
+				continue;
+
+			control = read_extension_control_file(extname);
+
+			memset(values, 0, sizeof(values));
+			memset(nulls, 0, sizeof(nulls));
+
+			/* name */
+			values[0] = DirectFunctionCall1(namein,
+											CStringGetDatum(control->name));
+			/* default_version */
+			if (control->default_version == NULL)
+				nulls[1] = true;
+			else
+				values[1] = CStringGetTextDatum(control->default_version);
+			/* comment */
+			if (control->comment == NULL)
+				nulls[2] = true;
+			else
+				values[2] = CStringGetTextDatum(control->comment);
+
+			tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+								 values, nulls);
+		}
+
+		FreeDir(dir);
+	}
+
+	return (Datum) 0;
+}
+
+/*
+ * This function lists the available extension versions (one row per
+ * extension installation script).  For each version, we parse the related
+ * control file(s) and report the interesting fields.
+ *
+ * The system view pg_available_extension_versions provides a user interface
+ * to this SRF, adding information about which versions are installed in the
+ * current DB.
+ */
+Datum
+pg_available_extension_versions(PG_FUNCTION_ARGS)
+{
+	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+	char	   *location;
+	DIR		   *dir;
+	struct dirent *de;
+
+	/* Build tuplestore to hold the result rows */
+	InitMaterializedSRF(fcinfo, 0);
+
+	location = get_extension_control_directory();
+	dir = AllocateDir(location);
+
+	/*
+	 * If the control directory doesn't exist, we want to silently return an
+	 * empty set.  Any other error will be reported by ReadDir.
+	 */
+	if (dir == NULL && errno == ENOENT)
+	{
+		/* do nothing */
+	}
+	else
+	{
+		while ((de = ReadDir(dir, location)) != NULL)
+		{
+			ExtensionControlFile *control;
+			char	   *extname;
+
+			if (!is_extension_control_filename(de->d_name))
+				continue;
+
+			/* extract extension name from 'name.control' filename */
+			extname = pstrdup(de->d_name);
+			*strrchr(extname, '.') = '\0';
+
+			/* ignore it if it's an auxiliary control file */
+			if (strstr(extname, "--"))
+				continue;
+
+			/* read the control file */
+			control = read_extension_control_file(extname);
+
+			/* scan extension's script directory for install scripts */
+			get_available_versions_for_extension(control, rsinfo->setResult,
+												 rsinfo->setDesc);
+		}
+
+		FreeDir(dir);
+	}
+
+	return (Datum) 0;
+}
+
+/*
+ * Inner loop for pg_available_extension_versions:
+ *		read versions of one extension, add rows to tupstore
+ */
+static void
+get_available_versions_for_extension(ExtensionControlFile *pcontrol,
+									 Tuplestorestate *tupstore,
+									 TupleDesc tupdesc)
+{
+	List	   *evi_list;
+	ListCell   *lc;
+
+	/* Extract the version update graph from the script directory */
+	evi_list = get_ext_ver_list(pcontrol);
+
+	/* For each installable version ... */
+	foreach(lc, evi_list)
+	{
+		ExtensionVersionInfo *evi = (ExtensionVersionInfo *) lfirst(lc);
+		ExtensionControlFile *control;
+		Datum		values[8];
+		bool		nulls[8];
+		ListCell   *lc2;
+
+		if (!evi->installable)
+			continue;
+
+		/*
+		 * Fetch parameters for specific version (pcontrol is not changed)
+		 */
+		control = read_extension_aux_control_file(pcontrol, evi->name);
+
+		memset(values, 0, sizeof(values));
+		memset(nulls, 0, sizeof(nulls));
+
+		/* name */
+		values[0] = DirectFunctionCall1(namein,
+										CStringGetDatum(control->name));
+		/* version */
+		values[1] = CStringGetTextDatum(evi->name);
+		/* superuser */
+		values[2] = BoolGetDatum(control->superuser);
+		/* trusted */
+		values[3] = BoolGetDatum(control->trusted);
+		/* relocatable */
+		values[4] = BoolGetDatum(control->relocatable);
+		/* schema */
+		if (control->schema == NULL)
+			nulls[5] = true;
+		else
+			values[5] = DirectFunctionCall1(namein,
+											CStringGetDatum(control->schema));
+		/* requires */
+		if (control->requires == NIL)
+			nulls[6] = true;
+		else
+			values[6] = convert_requires_to_datum(control->requires);
+		/* comment */
+		if (control->comment == NULL)
+			nulls[7] = true;
+		else
+			values[7] = CStringGetTextDatum(control->comment);
+
+		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+
+		/*
+		 * Find all non-directly-installable versions that would be installed
+		 * starting from this version, and report them, inheriting the
+		 * parameters that aren't changed in updates from this version.
+		 */
+		foreach(lc2, evi_list)
+		{
+			ExtensionVersionInfo *evi2 = (ExtensionVersionInfo *) lfirst(lc2);
+			List	   *best_path;
+
+			if (evi2->installable)
+				continue;
+			if (find_install_path(evi_list, evi2, &best_path) == evi)
+			{
+				/*
+				 * Fetch parameters for this version (pcontrol is not changed)
+				 */
+				control = read_extension_aux_control_file(pcontrol, evi2->name);
+
+				/* name stays the same */
+				/* version */
+				values[1] = CStringGetTextDatum(evi2->name);
+				/* superuser */
+				values[2] = BoolGetDatum(control->superuser);
+				/* trusted */
+				values[3] = BoolGetDatum(control->trusted);
+				/* relocatable */
+				values[4] = BoolGetDatum(control->relocatable);
+				/* schema stays the same */
+				/* requires */
+				if (control->requires == NIL)
+					nulls[6] = true;
+				else
+				{
+					values[6] = convert_requires_to_datum(control->requires);
+					nulls[6] = false;
+				}
+				/* comment stays the same */
+
+				tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+			}
+		}
+	}
+}
+
+/*
+ * Test whether the given extension exists (not whether it's installed)
+ *
+ * This checks for the existence of a matching control file in the extension
+ * directory.  That's not a bulletproof check, since the file might be
+ * invalid, but this is only used for hints so it doesn't have to be 100%
+ * right.
+ */
+bool
+extension_file_exists(const char *extensionName)
+{
+	bool		result = false;
+	char	   *location;
+	DIR		   *dir;
+	struct dirent *de;
+
+	location = get_extension_control_directory();
+	dir = AllocateDir(location);
+
+	/*
+	 * If the control directory doesn't exist, we want to silently return
+	 * false.  Any other error will be reported by ReadDir.
+	 */
+	if (dir == NULL && errno == ENOENT)
+	{
+		/* do nothing */
+	}
+	else
+	{
+		while ((de = ReadDir(dir, location)) != NULL)
+		{
+			char	   *extname;
+
+			if (!is_extension_control_filename(de->d_name))
+				continue;
+
+			/* extract extension name from 'name.control' filename */
+			extname = pstrdup(de->d_name);
+			*strrchr(extname, '.') = '\0';
+
+			/* ignore it if it's an auxiliary control file */
+			if (strstr(extname, "--"))
+				continue;
+
+			/* done if it matches request */
+			if (strcmp(extname, extensionName) == 0)
+			{
+				result = true;
+				break;
+			}
+		}
+
+		FreeDir(dir);
+	}
+
+	return result;
+}
+
+/*
+ * Convert a list of extension names to a name[] Datum
+ */
+static Datum
+convert_requires_to_datum(List *requires)
+{
+	Datum	   *datums;
+	int			ndatums;
+	ArrayType  *a;
+	ListCell   *lc;
+
+	ndatums = list_length(requires);
+	datums = (Datum *) palloc(ndatums * sizeof(Datum));
+	ndatums = 0;
+	foreach(lc, requires)
+	{
+		char	   *curreq = (char *) lfirst(lc);
+
+		datums[ndatums++] =
+			DirectFunctionCall1(namein, CStringGetDatum(curreq));
+	}
+	a = construct_array(datums, ndatums,
+						NAMEOID,
+						NAMEDATALEN, false, TYPALIGN_CHAR);
+	return PointerGetDatum(a);
+}
+
+/*
+ * This function reports the version update paths that exist for the
+ * specified extension.
+ */
+Datum
+pg_extension_update_paths(PG_FUNCTION_ARGS)
+{
+	Name		extname = PG_GETARG_NAME(0);
+	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+	List	   *evi_list;
+	ExtensionControlFile *control;
+	ListCell   *lc1;
+
+	/* Check extension name validity before any filesystem access */
+	check_valid_extension_name(NameStr(*extname));
+
+	/* Build tuplestore to hold the result rows */
+	InitMaterializedSRF(fcinfo, 0);
+
+	/* Read the extension's control file */
+	control = read_extension_control_file(NameStr(*extname));
+
+	/* Extract the version update graph from the script directory */
+	evi_list = get_ext_ver_list(control);
+
+	/* Iterate over all pairs of versions */
+	foreach(lc1, evi_list)
+	{
+		ExtensionVersionInfo *evi1 = (ExtensionVersionInfo *) lfirst(lc1);
+		ListCell   *lc2;
+
+		foreach(lc2, evi_list)
+		{
+			ExtensionVersionInfo *evi2 = (ExtensionVersionInfo *) lfirst(lc2);
+			List	   *path;
+			Datum		values[3];
+			bool		nulls[3];
+
+			if (evi1 == evi2)
+				continue;
+
+			/* Find shortest path from evi1 to evi2 */
+			path = find_update_path(evi_list, evi1, evi2, false, true);
+
+			/* Emit result row */
+			memset(values, 0, sizeof(values));
+			memset(nulls, 0, sizeof(nulls));
+
+			/* source */
+			values[0] = CStringGetTextDatum(evi1->name);
+			/* target */
+			values[1] = CStringGetTextDatum(evi2->name);
+			/* path */
+			if (path == NIL)
+				nulls[2] = true;
+			else
+			{
+				StringInfoData pathbuf;
+				ListCell   *lcv;
+
+				initStringInfo(&pathbuf);
+				/* The path doesn't include start vertex, but show it */
+				appendStringInfoString(&pathbuf, evi1->name);
+				foreach(lcv, path)
+				{
+					char	   *versionName = (char *) lfirst(lcv);
+
+					appendStringInfoString(&pathbuf, "--");
+					appendStringInfoString(&pathbuf, versionName);
+				}
+				values[2] = CStringGetTextDatum(pathbuf.data);
+				pfree(pathbuf.data);
+			}
+
+			tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+								 values, nulls);
+		}
+	}
+
+	return (Datum) 0;
+}
+
+/*
+ * pg_extension_config_dump
+ *
+ * Record information about a configuration table that belongs to an
+ * extension being created, but whose contents should be dumped in whole
+ * or in part during pg_dump.
+ */
+Datum
+pg_extension_config_dump(PG_FUNCTION_ARGS)
+{
+	Oid			tableoid = PG_GETARG_OID(0);
+	text	   *wherecond = PG_GETARG_TEXT_PP(1);
+	char	   *tablename;
+	Relation	extRel;
+	ScanKeyData key[1];
+	SysScanDesc extScan;
+	HeapTuple	extTup;
+	Datum		arrayDatum;
+	Datum		elementDatum;
+	int			arrayLength;
+	int			arrayIndex;
+	bool		isnull;
+	Datum		repl_val[Natts_pg_extension];
+	bool		repl_null[Natts_pg_extension];
+	bool		repl_repl[Natts_pg_extension];
+	ArrayType  *a;
+
+	/*
+	 * We only allow this to be called from an extension's SQL script. We
+	 * shouldn't need any permissions check beyond that.
+	 */
+	if (!creating_extension)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("%s can only be called from an SQL script executed by CREATE EXTENSION",
+						"pg_extension_config_dump()")));
+
+	/*
+	 * Check that the table exists and is a member of the extension being
+	 * created.  This ensures that we don't need to register an additional
+	 * dependency to protect the extconfig entry.
+	 */
+	tablename = get_rel_name(tableoid);
+	if (tablename == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_TABLE),
+				 errmsg("OID %u does not refer to a table", tableoid)));
+	if (getExtensionOfObject(RelationRelationId, tableoid) !=
+		CurrentExtensionObject)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("table \"%s\" is not a member of the extension being created",
+						tablename)));
+
+	/*
+	 * Add the table OID and WHERE condition to the extension's extconfig and
+	 * extcondition arrays.
+	 *
+	 * If the table is already in extconfig, treat this as an update of the
+	 * WHERE condition.
+	 */
+
+	/* Find the pg_extension tuple */
+	extRel = table_open(ExtensionRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_extension_oid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(CurrentExtensionObject));
+
+	extScan = systable_beginscan(extRel, ExtensionOidIndexId, true,
+								 NULL, 1, key);
+
+	extTup = systable_getnext(extScan);
+
+	if (!HeapTupleIsValid(extTup))	/* should not happen */
+		elog(ERROR, "could not find tuple for extension %u",
+			 CurrentExtensionObject);
+
+	memset(repl_val, 0, sizeof(repl_val));
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+
+	/* Build or modify the extconfig value */
+	elementDatum = ObjectIdGetDatum(tableoid);
+
+	arrayDatum = heap_getattr(extTup, Anum_pg_extension_extconfig,
+							  RelationGetDescr(extRel), &isnull);
+	if (isnull)
+	{
+		/* Previously empty extconfig, so build 1-element array */
+		arrayLength = 0;
+		arrayIndex = 1;
+
+		a = construct_array(&elementDatum, 1,
+							OIDOID,
+							sizeof(Oid), true, TYPALIGN_INT);
+	}
+	else
+	{
+		/* Modify or extend existing extconfig array */
+		Oid		   *arrayData;
+		int			i;
+
+		a = DatumGetArrayTypeP(arrayDatum);
+
+		arrayLength = ARR_DIMS(a)[0];
+		if (ARR_NDIM(a) != 1 ||
+			ARR_LBOUND(a)[0] != 1 ||
+			arrayLength < 0 ||
+			ARR_HASNULL(a) ||
+			ARR_ELEMTYPE(a) != OIDOID)
+			elog(ERROR, "extconfig is not a 1-D Oid array");
+		arrayData = (Oid *) ARR_DATA_PTR(a);
+
+		arrayIndex = arrayLength + 1;	/* set up to add after end */
+
+		for (i = 0; i < arrayLength; i++)
+		{
+			if (arrayData[i] == tableoid)
+			{
+				arrayIndex = i + 1; /* replace this element instead */
+				break;
+			}
+		}
+
+		a = array_set(a, 1, &arrayIndex,
+					  elementDatum,
+					  false,
+					  -1 /* varlena array */ ,
+					  sizeof(Oid) /* OID's typlen */ ,
+					  true /* OID's typbyval */ ,
+					  TYPALIGN_INT /* OID's typalign */ );
+	}
+	repl_val[Anum_pg_extension_extconfig - 1] = PointerGetDatum(a);
+	repl_repl[Anum_pg_extension_extconfig - 1] = true;
+
+	/* Build or modify the extcondition value */
+	elementDatum = PointerGetDatum(wherecond);
+
+	arrayDatum = heap_getattr(extTup, Anum_pg_extension_extcondition,
+							  RelationGetDescr(extRel), &isnull);
+	if (isnull)
+	{
+		if (arrayLength != 0)
+			elog(ERROR, "extconfig and extcondition arrays do not match");
+
+		a = construct_array(&elementDatum, 1,
+							TEXTOID,
+							-1, false, TYPALIGN_INT);
+	}
+	else
+	{
+		a = DatumGetArrayTypeP(arrayDatum);
+
+		if (ARR_NDIM(a) != 1 ||
+			ARR_LBOUND(a)[0] != 1 ||
+			ARR_HASNULL(a) ||
+			ARR_ELEMTYPE(a) != TEXTOID)
+			elog(ERROR, "extcondition is not a 1-D text array");
+		if (ARR_DIMS(a)[0] != arrayLength)
+			elog(ERROR, "extconfig and extcondition arrays do not match");
+
+		/* Add or replace at same index as in extconfig */
+		a = array_set(a, 1, &arrayIndex,
+					  elementDatum,
+					  false,
+					  -1 /* varlena array */ ,
+					  -1 /* TEXT's typlen */ ,
+					  false /* TEXT's typbyval */ ,
+					  TYPALIGN_INT /* TEXT's typalign */ );
+	}
+	repl_val[Anum_pg_extension_extcondition - 1] = PointerGetDatum(a);
+	repl_repl[Anum_pg_extension_extcondition - 1] = true;
+
+	extTup = heap_modify_tuple(extTup, RelationGetDescr(extRel),
+							   repl_val, repl_null, repl_repl);
+
+	CatalogTupleUpdate(extRel, &extTup->t_self, extTup);
+
+	systable_endscan(extScan);
+
+	table_close(extRel, RowExclusiveLock);
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * extension_config_remove
+ *
+ * Remove the specified table OID from extension's extconfig, if present.
+ * This is not currently exposed as a function, but it could be;
+ * for now, we just invoke it from ALTER EXTENSION DROP.
+ */
+static void
+extension_config_remove(Oid extensionoid, Oid tableoid)
+{
+	Relation	extRel;
+	ScanKeyData key[1];
+	SysScanDesc extScan;
+	HeapTuple	extTup;
+	Datum		arrayDatum;
+	int			arrayLength;
+	int			arrayIndex;
+	bool		isnull;
+	Datum		repl_val[Natts_pg_extension];
+	bool		repl_null[Natts_pg_extension];
+	bool		repl_repl[Natts_pg_extension];
+	ArrayType  *a;
+
+	/* Find the pg_extension tuple */
+	extRel = table_open(ExtensionRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_extension_oid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(extensionoid));
+
+	extScan = systable_beginscan(extRel, ExtensionOidIndexId, true,
+								 NULL, 1, key);
+
+	extTup = systable_getnext(extScan);
+
+	if (!HeapTupleIsValid(extTup))	/* should not happen */
+		elog(ERROR, "could not find tuple for extension %u",
+			 extensionoid);
+
+	/* Search extconfig for the tableoid */
+	arrayDatum = heap_getattr(extTup, Anum_pg_extension_extconfig,
+							  RelationGetDescr(extRel), &isnull);
+	if (isnull)
+	{
+		/* nothing to do */
+		a = NULL;
+		arrayLength = 0;
+		arrayIndex = -1;
+	}
+	else
+	{
+		Oid		   *arrayData;
+		int			i;
+
+		a = DatumGetArrayTypeP(arrayDatum);
+
+		arrayLength = ARR_DIMS(a)[0];
+		if (ARR_NDIM(a) != 1 ||
+			ARR_LBOUND(a)[0] != 1 ||
+			arrayLength < 0 ||
+			ARR_HASNULL(a) ||
+			ARR_ELEMTYPE(a) != OIDOID)
+			elog(ERROR, "extconfig is not a 1-D Oid array");
+		arrayData = (Oid *) ARR_DATA_PTR(a);
+
+		arrayIndex = -1;		/* flag for no deletion needed */
+
+		for (i = 0; i < arrayLength; i++)
+		{
+			if (arrayData[i] == tableoid)
+			{
+				arrayIndex = i; /* index to remove */
+				break;
+			}
+		}
+	}
+
+	/* If tableoid is not in extconfig, nothing to do */
+	if (arrayIndex < 0)
+	{
+		systable_endscan(extScan);
+		table_close(extRel, RowExclusiveLock);
+		return;
+	}
+
+	/* Modify or delete the extconfig value */
+	memset(repl_val, 0, sizeof(repl_val));
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+
+	if (arrayLength <= 1)
+	{
+		/* removing only element, just set array to null */
+		repl_null[Anum_pg_extension_extconfig - 1] = true;
+	}
+	else
+	{
+		/* squeeze out the target element */
+		Datum	   *dvalues;
+		int			nelems;
+		int			i;
+
+		/* We already checked there are no nulls */
+		deconstruct_array(a, OIDOID, sizeof(Oid), true, TYPALIGN_INT,
+						  &dvalues, NULL, &nelems);
+
+		for (i = arrayIndex; i < arrayLength - 1; i++)
+			dvalues[i] = dvalues[i + 1];
+
+		a = construct_array(dvalues, arrayLength - 1,
+							OIDOID, sizeof(Oid), true, TYPALIGN_INT);
+
+		repl_val[Anum_pg_extension_extconfig - 1] = PointerGetDatum(a);
+	}
+	repl_repl[Anum_pg_extension_extconfig - 1] = true;
+
+	/* Modify or delete the extcondition value */
+	arrayDatum = heap_getattr(extTup, Anum_pg_extension_extcondition,
+							  RelationGetDescr(extRel), &isnull);
+	if (isnull)
+	{
+		elog(ERROR, "extconfig and extcondition arrays do not match");
+	}
+	else
+	{
+		a = DatumGetArrayTypeP(arrayDatum);
+
+		if (ARR_NDIM(a) != 1 ||
+			ARR_LBOUND(a)[0] != 1 ||
+			ARR_HASNULL(a) ||
+			ARR_ELEMTYPE(a) != TEXTOID)
+			elog(ERROR, "extcondition is not a 1-D text array");
+		if (ARR_DIMS(a)[0] != arrayLength)
+			elog(ERROR, "extconfig and extcondition arrays do not match");
+	}
+
+	if (arrayLength <= 1)
+	{
+		/* removing only element, just set array to null */
+		repl_null[Anum_pg_extension_extcondition - 1] = true;
+	}
+	else
+	{
+		/* squeeze out the target element */
+		Datum	   *dvalues;
+		int			nelems;
+		int			i;
+
+		/* We already checked there are no nulls */
+		deconstruct_array(a, TEXTOID, -1, false, TYPALIGN_INT,
+						  &dvalues, NULL, &nelems);
+
+		for (i = arrayIndex; i < arrayLength - 1; i++)
+			dvalues[i] = dvalues[i + 1];
+
+		a = construct_array(dvalues, arrayLength - 1,
+							TEXTOID, -1, false, TYPALIGN_INT);
+
+		repl_val[Anum_pg_extension_extcondition - 1] = PointerGetDatum(a);
+	}
+	repl_repl[Anum_pg_extension_extcondition - 1] = true;
+
+	extTup = heap_modify_tuple(extTup, RelationGetDescr(extRel),
+							   repl_val, repl_null, repl_repl);
+
+	CatalogTupleUpdate(extRel, &extTup->t_self, extTup);
+
+	systable_endscan(extScan);
+
+	table_close(extRel, RowExclusiveLock);
+}
+
+/*
+ * Execute ALTER EXTENSION SET SCHEMA
+ */
+ObjectAddress
+AlterExtensionNamespace(const char *extensionName, const char *newschema, Oid *oldschema)
+{
+	Oid			extensionOid;
+	Oid			nspOid;
+	Oid			oldNspOid;
+	AclResult	aclresult;
+	Relation	extRel;
+	ScanKeyData key[2];
+	SysScanDesc extScan;
+	HeapTuple	extTup;
+	Form_pg_extension extForm;
+	Relation	depRel;
+	SysScanDesc depScan;
+	HeapTuple	depTup;
+	ObjectAddresses *objsMoved;
+	ObjectAddress extAddr;
+
+	extensionOid = get_extension_oid(extensionName, false);
+
+	nspOid = LookupCreationNamespace(newschema);
+
+	/*
+	 * Permission check: must own extension.  Note that we don't bother to
+	 * check ownership of the individual member objects ...
+	 */
+	if (!pg_extension_ownercheck(extensionOid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_EXTENSION,
+					   extensionName);
+
+	/* Permission check: must have creation rights in target namespace */
+	aclresult = pg_namespace_aclcheck(nspOid, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_SCHEMA, newschema);
+
+	/*
+	 * If the schema is currently a member of the extension, disallow moving
+	 * the extension into the schema.  That would create a dependency loop.
+	 */
+	if (getExtensionOfObject(NamespaceRelationId, nspOid) == extensionOid)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("cannot move extension \"%s\" into schema \"%s\" "
+						"because the extension contains the schema",
+						extensionName, newschema)));
+
+	/* Locate the pg_extension tuple */
+	extRel = table_open(ExtensionRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_extension_oid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(extensionOid));
+
+	extScan = systable_beginscan(extRel, ExtensionOidIndexId, true,
+								 NULL, 1, key);
+
+	extTup = systable_getnext(extScan);
+
+	if (!HeapTupleIsValid(extTup))	/* should not happen */
+		elog(ERROR, "could not find tuple for extension %u",
+			 extensionOid);
+
+	/* Copy tuple so we can modify it below */
+	extTup = heap_copytuple(extTup);
+	extForm = (Form_pg_extension) GETSTRUCT(extTup);
+
+	systable_endscan(extScan);
+
+	/*
+	 * If the extension is already in the target schema, just silently do
+	 * nothing.
+	 */
+	if (extForm->extnamespace == nspOid)
+	{
+		table_close(extRel, RowExclusiveLock);
+		return InvalidObjectAddress;
+	}
+
+	/* Check extension is supposed to be relocatable */
+	if (!extForm->extrelocatable)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("extension \"%s\" does not support SET SCHEMA",
+						NameStr(extForm->extname))));
+
+	objsMoved = new_object_addresses();
+
+	/* store the OID of the namespace to-be-changed */
+	oldNspOid = extForm->extnamespace;
+
+	/*
+	 * Scan pg_depend to find objects that depend directly on the extension,
+	 * and alter each one's schema.
+	 */
+	depRel = table_open(DependRelationId, AccessShareLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_refclassid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(ExtensionRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_refobjid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(extensionOid));
+
+	depScan = systable_beginscan(depRel, DependReferenceIndexId, true,
+								 NULL, 2, key);
+
+	while (HeapTupleIsValid(depTup = systable_getnext(depScan)))
+	{
+		Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup);
+		ObjectAddress dep;
+		Oid			dep_oldNspOid;
+
+		/*
+		 * Ignore non-membership dependencies.  (Currently, the only other
+		 * case we could see here is a normal dependency from another
+		 * extension.)
+		 */
+		if (pg_depend->deptype != DEPENDENCY_EXTENSION)
+			continue;
+
+		dep.classId = pg_depend->classid;
+		dep.objectId = pg_depend->objid;
+		dep.objectSubId = pg_depend->objsubid;
+
+		if (dep.objectSubId != 0)	/* should not happen */
+			elog(ERROR, "extension should not have a sub-object dependency");
+
+		/* Relocate the object */
+		dep_oldNspOid = AlterObjectNamespace_oid(dep.classId,
+												 dep.objectId,
+												 nspOid,
+												 objsMoved);
+
+		/*
+		 * If not all the objects had the same old namespace (ignoring any
+		 * that are not in namespaces), complain.
+		 */
+		if (dep_oldNspOid != InvalidOid && dep_oldNspOid != oldNspOid)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("extension \"%s\" does not support SET SCHEMA",
+							NameStr(extForm->extname)),
+					 errdetail("%s is not in the extension's schema \"%s\"",
+							   getObjectDescription(&dep, false),
+							   get_namespace_name(oldNspOid))));
+	}
+
+	/* report old schema, if caller wants it */
+	if (oldschema)
+		*oldschema = oldNspOid;
+
+	systable_endscan(depScan);
+
+	relation_close(depRel, AccessShareLock);
+
+	/* Now adjust pg_extension.extnamespace */
+	extForm->extnamespace = nspOid;
+
+	CatalogTupleUpdate(extRel, &extTup->t_self, extTup);
+
+	table_close(extRel, RowExclusiveLock);
+
+	/* update dependencies to point to the new schema */
+	changeDependencyFor(ExtensionRelationId, extensionOid,
+						NamespaceRelationId, oldNspOid, nspOid);
+
+	InvokeObjectPostAlterHook(ExtensionRelationId, extensionOid, 0);
+
+	ObjectAddressSet(extAddr, ExtensionRelationId, extensionOid);
+
+	return extAddr;
+}
+
+/*
+ * Execute ALTER EXTENSION UPDATE
+ */
+ObjectAddress
+ExecAlterExtensionStmt(ParseState *pstate, AlterExtensionStmt *stmt)
+{
+	DefElem    *d_new_version = NULL;
+	char	   *versionName;
+	char	   *oldVersionName;
+	ExtensionControlFile *control;
+	Oid			extensionOid;
+	Relation	extRel;
+	ScanKeyData key[1];
+	SysScanDesc extScan;
+	HeapTuple	extTup;
+	List	   *updateVersions;
+	Datum		datum;
+	bool		isnull;
+	ListCell   *lc;
+	ObjectAddress address;
+
+	/*
+	 * We use global variables to track the extension being created, so we can
+	 * create/update only one extension at the same time.
+	 */
+	if (creating_extension)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("nested ALTER EXTENSION is not supported")));
+
+	/*
+	 * Look up the extension --- it must already exist in pg_extension
+	 */
+	extRel = table_open(ExtensionRelationId, AccessShareLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_extension_extname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(stmt->extname));
+
+	extScan = systable_beginscan(extRel, ExtensionNameIndexId, true,
+								 NULL, 1, key);
+
+	extTup = systable_getnext(extScan);
+
+	if (!HeapTupleIsValid(extTup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("extension \"%s\" does not exist",
+						stmt->extname)));
+
+	extensionOid = ((Form_pg_extension) GETSTRUCT(extTup))->oid;
+
+	/*
+	 * Determine the existing version we are updating from
+	 */
+	datum = heap_getattr(extTup, Anum_pg_extension_extversion,
+						 RelationGetDescr(extRel), &isnull);
+	if (isnull)
+		elog(ERROR, "extversion is null");
+	oldVersionName = text_to_cstring(DatumGetTextPP(datum));
+
+	systable_endscan(extScan);
+
+	table_close(extRel, AccessShareLock);
+
+	/* Permission check: must own extension */
+	if (!pg_extension_ownercheck(extensionOid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_EXTENSION,
+					   stmt->extname);
+
+	/*
+	 * Read the primary control file.  Note we assume that it does not contain
+	 * any non-ASCII data, so there is no need to worry about encoding at this
+	 * point.
+	 */
+	control = read_extension_control_file(stmt->extname);
+
+	/*
+	 * Read the statement option list
+	 */
+	foreach(lc, stmt->options)
+	{
+		DefElem    *defel = (DefElem *) lfirst(lc);
+
+		if (strcmp(defel->defname, "new_version") == 0)
+		{
+			if (d_new_version)
+				errorConflictingDefElem(defel, pstate);
+			d_new_version = defel;
+		}
+		else
+			elog(ERROR, "unrecognized option: %s", defel->defname);
+	}
+
+	/*
+	 * Determine the version to update to
+	 */
+	if (d_new_version && d_new_version->arg)
+		versionName = strVal(d_new_version->arg);
+	else if (control->default_version)
+		versionName = control->default_version;
+	else
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("version to install must be specified")));
+		versionName = NULL;		/* keep compiler quiet */
+	}
+	check_valid_version_name(versionName);
+
+	/*
+	 * If we're already at that version, just say so
+	 */
+	if (strcmp(oldVersionName, versionName) == 0)
+	{
+		ereport(NOTICE,
+				(errmsg("version \"%s\" of extension \"%s\" is already installed",
+						versionName, stmt->extname)));
+		return InvalidObjectAddress;
+	}
+
+	/*
+	 * Identify the series of update script files we need to execute
+	 */
+	updateVersions = identify_update_path(control,
+										  oldVersionName,
+										  versionName);
+
+	/*
+	 * Update the pg_extension row and execute the update scripts, one at a
+	 * time
+	 */
+	ApplyExtensionUpdates(extensionOid, control,
+						  oldVersionName, updateVersions,
+						  NULL, false, false);
+
+	ObjectAddressSet(address, ExtensionRelationId, extensionOid);
+
+	return address;
+}
+
+/*
+ * Apply a series of update scripts as though individual ALTER EXTENSION
+ * UPDATE commands had been given, including altering the pg_extension row
+ * and dependencies each time.
+ *
+ * This might be more work than necessary, but it ensures that old update
+ * scripts don't break if newer versions have different control parameters.
+ */
+static void
+ApplyExtensionUpdates(Oid extensionOid,
+					  ExtensionControlFile *pcontrol,
+					  const char *initialVersion,
+					  List *updateVersions,
+					  char *origSchemaName,
+					  bool cascade,
+					  bool is_create)
+{
+	const char *oldVersionName = initialVersion;
+	ListCell   *lcv;
+
+	foreach(lcv, updateVersions)
+	{
+		char	   *versionName = (char *) lfirst(lcv);
+		ExtensionControlFile *control;
+		char	   *schemaName;
+		Oid			schemaOid;
+		List	   *requiredExtensions;
+		List	   *requiredSchemas;
+		Relation	extRel;
+		ScanKeyData key[1];
+		SysScanDesc extScan;
+		HeapTuple	extTup;
+		Form_pg_extension extForm;
+		Datum		values[Natts_pg_extension];
+		bool		nulls[Natts_pg_extension];
+		bool		repl[Natts_pg_extension];
+		ObjectAddress myself;
+		ListCell   *lc;
+
+		/*
+		 * Fetch parameters for specific version (pcontrol is not changed)
+		 */
+		control = read_extension_aux_control_file(pcontrol, versionName);
+
+		/* Find the pg_extension tuple */
+		extRel = table_open(ExtensionRelationId, RowExclusiveLock);
+
+		ScanKeyInit(&key[0],
+					Anum_pg_extension_oid,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(extensionOid));
+
+		extScan = systable_beginscan(extRel, ExtensionOidIndexId, true,
+									 NULL, 1, key);
+
+		extTup = systable_getnext(extScan);
+
+		if (!HeapTupleIsValid(extTup))	/* should not happen */
+			elog(ERROR, "could not find tuple for extension %u",
+				 extensionOid);
+
+		extForm = (Form_pg_extension) GETSTRUCT(extTup);
+
+		/*
+		 * Determine the target schema (set by original install)
+		 */
+		schemaOid = extForm->extnamespace;
+		schemaName = get_namespace_name(schemaOid);
+
+		/*
+		 * Modify extrelocatable and extversion in the pg_extension tuple
+		 */
+		memset(values, 0, sizeof(values));
+		memset(nulls, 0, sizeof(nulls));
+		memset(repl, 0, sizeof(repl));
+
+		values[Anum_pg_extension_extrelocatable - 1] =
+			BoolGetDatum(control->relocatable);
+		repl[Anum_pg_extension_extrelocatable - 1] = true;
+		values[Anum_pg_extension_extversion - 1] =
+			CStringGetTextDatum(versionName);
+		repl[Anum_pg_extension_extversion - 1] = true;
+
+		extTup = heap_modify_tuple(extTup, RelationGetDescr(extRel),
+								   values, nulls, repl);
+
+		CatalogTupleUpdate(extRel, &extTup->t_self, extTup);
+
+		systable_endscan(extScan);
+
+		table_close(extRel, RowExclusiveLock);
+
+		/*
+		 * Look up the prerequisite extensions for this version, install them
+		 * if necessary, and build lists of their OIDs and the OIDs of their
+		 * target schemas.
+		 */
+		requiredExtensions = NIL;
+		requiredSchemas = NIL;
+		foreach(lc, control->requires)
+		{
+			char	   *curreq = (char *) lfirst(lc);
+			Oid			reqext;
+			Oid			reqschema;
+
+			reqext = get_required_extension(curreq,
+											control->name,
+											origSchemaName,
+											cascade,
+											NIL,
+											is_create);
+			reqschema = get_extension_schema(reqext);
+			requiredExtensions = lappend_oid(requiredExtensions, reqext);
+			requiredSchemas = lappend_oid(requiredSchemas, reqschema);
+		}
+
+		/*
+		 * Remove and recreate dependencies on prerequisite extensions
+		 */
+		deleteDependencyRecordsForClass(ExtensionRelationId, extensionOid,
+										ExtensionRelationId,
+										DEPENDENCY_NORMAL);
+
+		myself.classId = ExtensionRelationId;
+		myself.objectId = extensionOid;
+		myself.objectSubId = 0;
+
+		foreach(lc, requiredExtensions)
+		{
+			Oid			reqext = lfirst_oid(lc);
+			ObjectAddress otherext;
+
+			otherext.classId = ExtensionRelationId;
+			otherext.objectId = reqext;
+			otherext.objectSubId = 0;
+
+			recordDependencyOn(&myself, &otherext, DEPENDENCY_NORMAL);
+		}
+
+		InvokeObjectPostAlterHook(ExtensionRelationId, extensionOid, 0);
+
+		/*
+		 * Finally, execute the update script file
+		 */
+		execute_extension_script(extensionOid, control,
+								 oldVersionName, versionName,
+								 requiredSchemas,
+								 schemaName, schemaOid);
+
+		/*
+		 * Update prior-version name and loop around.  Since
+		 * execute_sql_string did a final CommandCounterIncrement, we can
+		 * update the pg_extension row again.
+		 */
+		oldVersionName = versionName;
+	}
+}
+
+/*
+ * Execute ALTER EXTENSION ADD/DROP
+ *
+ * Return value is the address of the altered extension.
+ *
+ * objAddr is an output argument which, if not NULL, is set to the address of
+ * the added/dropped object.
+ */
+ObjectAddress
+ExecAlterExtensionContentsStmt(AlterExtensionContentsStmt *stmt,
+							   ObjectAddress *objAddr)
+{
+	ObjectAddress extension;
+	ObjectAddress object;
+	Relation	relation;
+	Oid			oldExtension;
+
+	switch (stmt->objtype)
+	{
+		case OBJECT_DATABASE:
+		case OBJECT_EXTENSION:
+		case OBJECT_INDEX:
+		case OBJECT_PUBLICATION:
+		case OBJECT_ROLE:
+		case OBJECT_STATISTIC_EXT:
+		case OBJECT_SUBSCRIPTION:
+		case OBJECT_TABLESPACE:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("cannot add an object of this type to an extension")));
+			break;
+		default:
+			/* OK */
+			break;
+	}
+
+	/*
+	 * Find the extension and acquire a lock on it, to ensure it doesn't get
+	 * dropped concurrently.  A sharable lock seems sufficient: there's no
+	 * reason not to allow other sorts of manipulations, such as add/drop of
+	 * other objects, to occur concurrently.  Concurrently adding/dropping the
+	 * *same* object would be bad, but we prevent that by using a non-sharable
+	 * lock on the individual object, below.
+	 */
+	extension = get_object_address(OBJECT_EXTENSION,
+								   (Node *) makeString(stmt->extname),
+								   &relation, AccessShareLock, false);
+
+	/* Permission check: must own extension */
+	if (!pg_extension_ownercheck(extension.objectId, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_EXTENSION,
+					   stmt->extname);
+
+	/*
+	 * Translate the parser representation that identifies the object into an
+	 * ObjectAddress.  get_object_address() will throw an error if the object
+	 * does not exist, and will also acquire a lock on the object to guard
+	 * against concurrent DROP and ALTER EXTENSION ADD/DROP operations.
+	 */
+	object = get_object_address(stmt->objtype, stmt->object,
+								&relation, ShareUpdateExclusiveLock, false);
+
+	Assert(object.objectSubId == 0);
+	if (objAddr)
+		*objAddr = object;
+
+	/* Permission check: must own target object, too */
+	check_object_ownership(GetUserId(), stmt->objtype, object,
+						   stmt->object, relation);
+
+	/*
+	 * Check existing extension membership.
+	 */
+	oldExtension = getExtensionOfObject(object.classId, object.objectId);
+
+	if (stmt->action > 0)
+	{
+		/*
+		 * ADD, so complain if object is already attached to some extension.
+		 */
+		if (OidIsValid(oldExtension))
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("%s is already a member of extension \"%s\"",
+							getObjectDescription(&object, false),
+							get_extension_name(oldExtension))));
+
+		/*
+		 * Prevent a schema from being added to an extension if the schema
+		 * contains the extension.  That would create a dependency loop.
+		 */
+		if (object.classId == NamespaceRelationId &&
+			object.objectId == get_extension_schema(extension.objectId))
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("cannot add schema \"%s\" to extension \"%s\" "
+							"because the schema contains the extension",
+							get_namespace_name(object.objectId),
+							stmt->extname)));
+
+		/*
+		 * OK, add the dependency.
+		 */
+		recordDependencyOn(&object, &extension, DEPENDENCY_EXTENSION);
+
+		/*
+		 * Also record the initial ACL on the object, if any.
+		 *
+		 * Note that this will handle the object's ACLs, as well as any ACLs
+		 * on object subIds.  (In other words, when the object is a table,
+		 * this will record the table's ACL and the ACLs for the columns on
+		 * the table, if any).
+		 */
+		recordExtObjInitPriv(object.objectId, object.classId);
+	}
+	else
+	{
+		/*
+		 * DROP, so complain if it's not a member.
+		 */
+		if (oldExtension != extension.objectId)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("%s is not a member of extension \"%s\"",
+							getObjectDescription(&object, false),
+							stmt->extname)));
+
+		/*
+		 * OK, drop the dependency.
+		 */
+		if (deleteDependencyRecordsForClass(object.classId, object.objectId,
+											ExtensionRelationId,
+											DEPENDENCY_EXTENSION) != 1)
+			elog(ERROR, "unexpected number of extension dependency records");
+
+		/*
+		 * If it's a relation, it might have an entry in the extension's
+		 * extconfig array, which we must remove.
+		 */
+		if (object.classId == RelationRelationId)
+			extension_config_remove(extension.objectId, object.objectId);
+
+		/*
+		 * Remove all the initial ACLs, if any.
+		 *
+		 * Note that this will remove the object's ACLs, as well as any ACLs
+		 * on object subIds.  (In other words, when the object is a table,
+		 * this will remove the table's ACL and the ACLs for the columns on
+		 * the table, if any).
+		 */
+		removeExtObjInitPriv(object.objectId, object.classId);
+	}
+
+	InvokeObjectPostAlterHook(ExtensionRelationId, extension.objectId, 0);
+
+	/*
+	 * If get_object_address() opened the relation for us, we close it to keep
+	 * the reference count correct - but we retain any locks acquired by
+	 * get_object_address() until commit time, to guard against concurrent
+	 * activity.
+	 */
+	if (relation != NULL)
+		relation_close(relation, NoLock);
+
+	return extension;
+}
+
+/*
+ * Read the whole of file into memory.
+ *
+ * The file contents are returned as a single palloc'd chunk. For convenience
+ * of the callers, an extra \0 byte is added to the end.
+ */
+static char *
+read_whole_file(const char *filename, int *length)
+{
+	char	   *buf;
+	FILE	   *file;
+	size_t		bytes_to_read;
+	struct stat fst;
+
+	if (stat(filename, &fst) < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not stat file \"%s\": %m", filename)));
+
+	if (fst.st_size > (MaxAllocSize - 1))
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("file \"%s\" is too large", filename)));
+	bytes_to_read = (size_t) fst.st_size;
+
+	if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not open file \"%s\" for reading: %m",
+						filename)));
+
+	buf = (char *) palloc(bytes_to_read + 1);
+
+	*length = fread(buf, 1, bytes_to_read, file);
+
+	if (ferror(file))
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not read file \"%s\": %m", filename)));
+
+	FreeFile(file);
+
+	buf[*length] = '\0';
+	return buf;
+}
diff --git a/src/backend/commands/foreigncmds.c b/src/backend/commands/foreigncmds.c
new file mode 100644
index 0000000..91f4dd3
--- /dev/null
+++ b/src/backend/commands/foreigncmds.c
@@ -0,0 +1,1617 @@
+/*-------------------------------------------------------------------------
+ *
+ * foreigncmds.c
+ *	  foreign-data wrapper/server creation/manipulation commands
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/foreigncmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/reloptions.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_foreign_data_wrapper.h"
+#include "catalog/pg_foreign_server.h"
+#include "catalog/pg_foreign_table.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_type.h"
+#include "catalog/pg_user_mapping.h"
+#include "commands/defrem.h"
+#include "foreign/fdwapi.h"
+#include "foreign/foreign.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+typedef struct
+{
+	char	   *tablename;
+	char	   *cmd;
+} import_error_callback_arg;
+
+/* Internal functions */
+static void import_error_callback(void *arg);
+
+
+/*
+ * Convert a DefElem list to the text array format that is used in
+ * pg_foreign_data_wrapper, pg_foreign_server, pg_user_mapping, and
+ * pg_foreign_table.
+ *
+ * Returns the array in the form of a Datum, or PointerGetDatum(NULL)
+ * if the list is empty.
+ *
+ * Note: The array is usually stored to database without further
+ * processing, hence any validation should be done before this
+ * conversion.
+ */
+static Datum
+optionListToArray(List *options)
+{
+	ArrayBuildState *astate = NULL;
+	ListCell   *cell;
+
+	foreach(cell, options)
+	{
+		DefElem    *def = lfirst(cell);
+		const char *value;
+		Size		len;
+		text	   *t;
+
+		value = defGetString(def);
+		len = VARHDRSZ + strlen(def->defname) + 1 + strlen(value);
+		t = palloc(len + 1);
+		SET_VARSIZE(t, len);
+		sprintf(VARDATA(t), "%s=%s", def->defname, value);
+
+		astate = accumArrayResult(astate, PointerGetDatum(t),
+								  false, TEXTOID,
+								  CurrentMemoryContext);
+	}
+
+	if (astate)
+		return makeArrayResult(astate, CurrentMemoryContext);
+
+	return PointerGetDatum(NULL);
+}
+
+
+/*
+ * Transform a list of DefElem into text array format.  This is substantially
+ * the same thing as optionListToArray(), except we recognize SET/ADD/DROP
+ * actions for modifying an existing list of options, which is passed in
+ * Datum form as oldOptions.  Also, if fdwvalidator isn't InvalidOid
+ * it specifies a validator function to call on the result.
+ *
+ * Returns the array in the form of a Datum, or PointerGetDatum(NULL)
+ * if the list is empty.
+ *
+ * This is used by CREATE/ALTER of FOREIGN DATA WRAPPER/SERVER/USER MAPPING/
+ * FOREIGN TABLE.
+ */
+Datum
+transformGenericOptions(Oid catalogId,
+						Datum oldOptions,
+						List *options,
+						Oid fdwvalidator)
+{
+	List	   *resultOptions = untransformRelOptions(oldOptions);
+	ListCell   *optcell;
+	Datum		result;
+
+	foreach(optcell, options)
+	{
+		DefElem    *od = lfirst(optcell);
+		ListCell   *cell;
+
+		/*
+		 * Find the element in resultOptions.  We need this for validation in
+		 * all cases.
+		 */
+		foreach(cell, resultOptions)
+		{
+			DefElem    *def = lfirst(cell);
+
+			if (strcmp(def->defname, od->defname) == 0)
+				break;
+		}
+
+		/*
+		 * It is possible to perform multiple SET/DROP actions on the same
+		 * option.  The standard permits this, as long as the options to be
+		 * added are unique.  Note that an unspecified action is taken to be
+		 * ADD.
+		 */
+		switch (od->defaction)
+		{
+			case DEFELEM_DROP:
+				if (!cell)
+					ereport(ERROR,
+							(errcode(ERRCODE_UNDEFINED_OBJECT),
+							 errmsg("option \"%s\" not found",
+									od->defname)));
+				resultOptions = list_delete_cell(resultOptions, cell);
+				break;
+
+			case DEFELEM_SET:
+				if (!cell)
+					ereport(ERROR,
+							(errcode(ERRCODE_UNDEFINED_OBJECT),
+							 errmsg("option \"%s\" not found",
+									od->defname)));
+				lfirst(cell) = od;
+				break;
+
+			case DEFELEM_ADD:
+			case DEFELEM_UNSPEC:
+				if (cell)
+					ereport(ERROR,
+							(errcode(ERRCODE_DUPLICATE_OBJECT),
+							 errmsg("option \"%s\" provided more than once",
+									od->defname)));
+				resultOptions = lappend(resultOptions, od);
+				break;
+
+			default:
+				elog(ERROR, "unrecognized action %d on option \"%s\"",
+					 (int) od->defaction, od->defname);
+				break;
+		}
+	}
+
+	result = optionListToArray(resultOptions);
+
+	if (OidIsValid(fdwvalidator))
+	{
+		Datum		valarg = result;
+
+		/*
+		 * Pass a null options list as an empty array, so that validators
+		 * don't have to be declared non-strict to handle the case.
+		 */
+		if (DatumGetPointer(valarg) == NULL)
+			valarg = PointerGetDatum(construct_empty_array(TEXTOID));
+		OidFunctionCall2(fdwvalidator, valarg, ObjectIdGetDatum(catalogId));
+	}
+
+	return result;
+}
+
+
+/*
+ * Internal workhorse for changing a data wrapper's owner.
+ *
+ * Allow this only for superusers; also the new owner must be a
+ * superuser.
+ */
+static void
+AlterForeignDataWrapperOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId)
+{
+	Form_pg_foreign_data_wrapper form;
+	Datum		repl_val[Natts_pg_foreign_data_wrapper];
+	bool		repl_null[Natts_pg_foreign_data_wrapper];
+	bool		repl_repl[Natts_pg_foreign_data_wrapper];
+	Acl		   *newAcl;
+	Datum		aclDatum;
+	bool		isNull;
+
+	form = (Form_pg_foreign_data_wrapper) GETSTRUCT(tup);
+
+	/* Must be a superuser to change a FDW owner */
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied to change owner of foreign-data wrapper \"%s\"",
+						NameStr(form->fdwname)),
+				 errhint("Must be superuser to change owner of a foreign-data wrapper.")));
+
+	/* New owner must also be a superuser */
+	if (!superuser_arg(newOwnerId))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied to change owner of foreign-data wrapper \"%s\"",
+						NameStr(form->fdwname)),
+				 errhint("The owner of a foreign-data wrapper must be a superuser.")));
+
+	if (form->fdwowner != newOwnerId)
+	{
+		memset(repl_null, false, sizeof(repl_null));
+		memset(repl_repl, false, sizeof(repl_repl));
+
+		repl_repl[Anum_pg_foreign_data_wrapper_fdwowner - 1] = true;
+		repl_val[Anum_pg_foreign_data_wrapper_fdwowner - 1] = ObjectIdGetDatum(newOwnerId);
+
+		aclDatum = heap_getattr(tup,
+								Anum_pg_foreign_data_wrapper_fdwacl,
+								RelationGetDescr(rel),
+								&isNull);
+		/* Null ACLs do not require changes */
+		if (!isNull)
+		{
+			newAcl = aclnewowner(DatumGetAclP(aclDatum),
+								 form->fdwowner, newOwnerId);
+			repl_repl[Anum_pg_foreign_data_wrapper_fdwacl - 1] = true;
+			repl_val[Anum_pg_foreign_data_wrapper_fdwacl - 1] = PointerGetDatum(newAcl);
+		}
+
+		tup = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, repl_null,
+								repl_repl);
+
+		CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+		/* Update owner dependency reference */
+		changeDependencyOnOwner(ForeignDataWrapperRelationId,
+								form->oid,
+								newOwnerId);
+	}
+
+	InvokeObjectPostAlterHook(ForeignDataWrapperRelationId,
+							  form->oid, 0);
+}
+
+/*
+ * Change foreign-data wrapper owner -- by name
+ *
+ * Note restrictions in the "_internal" function, above.
+ */
+ObjectAddress
+AlterForeignDataWrapperOwner(const char *name, Oid newOwnerId)
+{
+	Oid			fdwId;
+	HeapTuple	tup;
+	Relation	rel;
+	ObjectAddress address;
+	Form_pg_foreign_data_wrapper form;
+
+
+	rel = table_open(ForeignDataWrapperRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(FOREIGNDATAWRAPPERNAME, CStringGetDatum(name));
+
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("foreign-data wrapper \"%s\" does not exist", name)));
+
+	form = (Form_pg_foreign_data_wrapper) GETSTRUCT(tup);
+	fdwId = form->oid;
+
+	AlterForeignDataWrapperOwner_internal(rel, tup, newOwnerId);
+
+	ObjectAddressSet(address, ForeignDataWrapperRelationId, fdwId);
+
+	heap_freetuple(tup);
+
+	table_close(rel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * Change foreign-data wrapper owner -- by OID
+ *
+ * Note restrictions in the "_internal" function, above.
+ */
+void
+AlterForeignDataWrapperOwner_oid(Oid fwdId, Oid newOwnerId)
+{
+	HeapTuple	tup;
+	Relation	rel;
+
+	rel = table_open(ForeignDataWrapperRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(FOREIGNDATAWRAPPEROID, ObjectIdGetDatum(fwdId));
+
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("foreign-data wrapper with OID %u does not exist", fwdId)));
+
+	AlterForeignDataWrapperOwner_internal(rel, tup, newOwnerId);
+
+	heap_freetuple(tup);
+
+	table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Internal workhorse for changing a foreign server's owner
+ */
+static void
+AlterForeignServerOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId)
+{
+	Form_pg_foreign_server form;
+	Datum		repl_val[Natts_pg_foreign_server];
+	bool		repl_null[Natts_pg_foreign_server];
+	bool		repl_repl[Natts_pg_foreign_server];
+	Acl		   *newAcl;
+	Datum		aclDatum;
+	bool		isNull;
+
+	form = (Form_pg_foreign_server) GETSTRUCT(tup);
+
+	if (form->srvowner != newOwnerId)
+	{
+		/* Superusers can always do it */
+		if (!superuser())
+		{
+			Oid			srvId;
+			AclResult	aclresult;
+
+			srvId = form->oid;
+
+			/* Must be owner */
+			if (!pg_foreign_server_ownercheck(srvId, GetUserId()))
+				aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FOREIGN_SERVER,
+							   NameStr(form->srvname));
+
+			/* Must be able to become new owner */
+			check_is_member_of_role(GetUserId(), newOwnerId);
+
+			/* New owner must have USAGE privilege on foreign-data wrapper */
+			aclresult = pg_foreign_data_wrapper_aclcheck(form->srvfdw, newOwnerId, ACL_USAGE);
+			if (aclresult != ACLCHECK_OK)
+			{
+				ForeignDataWrapper *fdw = GetForeignDataWrapper(form->srvfdw);
+
+				aclcheck_error(aclresult, OBJECT_FDW, fdw->fdwname);
+			}
+		}
+
+		memset(repl_null, false, sizeof(repl_null));
+		memset(repl_repl, false, sizeof(repl_repl));
+
+		repl_repl[Anum_pg_foreign_server_srvowner - 1] = true;
+		repl_val[Anum_pg_foreign_server_srvowner - 1] = ObjectIdGetDatum(newOwnerId);
+
+		aclDatum = heap_getattr(tup,
+								Anum_pg_foreign_server_srvacl,
+								RelationGetDescr(rel),
+								&isNull);
+		/* Null ACLs do not require changes */
+		if (!isNull)
+		{
+			newAcl = aclnewowner(DatumGetAclP(aclDatum),
+								 form->srvowner, newOwnerId);
+			repl_repl[Anum_pg_foreign_server_srvacl - 1] = true;
+			repl_val[Anum_pg_foreign_server_srvacl - 1] = PointerGetDatum(newAcl);
+		}
+
+		tup = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, repl_null,
+								repl_repl);
+
+		CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+		/* Update owner dependency reference */
+		changeDependencyOnOwner(ForeignServerRelationId, form->oid,
+								newOwnerId);
+	}
+
+	InvokeObjectPostAlterHook(ForeignServerRelationId,
+							  form->oid, 0);
+}
+
+/*
+ * Change foreign server owner -- by name
+ */
+ObjectAddress
+AlterForeignServerOwner(const char *name, Oid newOwnerId)
+{
+	Oid			servOid;
+	HeapTuple	tup;
+	Relation	rel;
+	ObjectAddress address;
+	Form_pg_foreign_server form;
+
+	rel = table_open(ForeignServerRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(FOREIGNSERVERNAME, CStringGetDatum(name));
+
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("server \"%s\" does not exist", name)));
+
+	form = (Form_pg_foreign_server) GETSTRUCT(tup);
+	servOid = form->oid;
+
+	AlterForeignServerOwner_internal(rel, tup, newOwnerId);
+
+	ObjectAddressSet(address, ForeignServerRelationId, servOid);
+
+	heap_freetuple(tup);
+
+	table_close(rel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * Change foreign server owner -- by OID
+ */
+void
+AlterForeignServerOwner_oid(Oid srvId, Oid newOwnerId)
+{
+	HeapTuple	tup;
+	Relation	rel;
+
+	rel = table_open(ForeignServerRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(FOREIGNSERVEROID, ObjectIdGetDatum(srvId));
+
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("foreign server with OID %u does not exist", srvId)));
+
+	AlterForeignServerOwner_internal(rel, tup, newOwnerId);
+
+	heap_freetuple(tup);
+
+	table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Convert a handler function name passed from the parser to an Oid.
+ */
+static Oid
+lookup_fdw_handler_func(DefElem *handler)
+{
+	Oid			handlerOid;
+
+	if (handler == NULL || handler->arg == NULL)
+		return InvalidOid;
+
+	/* handlers have no arguments */
+	handlerOid = LookupFuncName((List *) handler->arg, 0, NULL, false);
+
+	/* check that handler has correct return type */
+	if (get_func_rettype(handlerOid) != FDW_HANDLEROID)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("function %s must return type %s",
+						NameListToString((List *) handler->arg), "fdw_handler")));
+
+	return handlerOid;
+}
+
+/*
+ * Convert a validator function name passed from the parser to an Oid.
+ */
+static Oid
+lookup_fdw_validator_func(DefElem *validator)
+{
+	Oid			funcargtypes[2];
+
+	if (validator == NULL || validator->arg == NULL)
+		return InvalidOid;
+
+	/* validators take text[], oid */
+	funcargtypes[0] = TEXTARRAYOID;
+	funcargtypes[1] = OIDOID;
+
+	return LookupFuncName((List *) validator->arg, 2, funcargtypes, false);
+	/* validator's return value is ignored, so we don't check the type */
+}
+
+/*
+ * Process function options of CREATE/ALTER FDW
+ */
+static void
+parse_func_options(ParseState *pstate, List *func_options,
+				   bool *handler_given, Oid *fdwhandler,
+				   bool *validator_given, Oid *fdwvalidator)
+{
+	ListCell   *cell;
+
+	*handler_given = false;
+	*validator_given = false;
+	/* return InvalidOid if not given */
+	*fdwhandler = InvalidOid;
+	*fdwvalidator = InvalidOid;
+
+	foreach(cell, func_options)
+	{
+		DefElem    *def = (DefElem *) lfirst(cell);
+
+		if (strcmp(def->defname, "handler") == 0)
+		{
+			if (*handler_given)
+				errorConflictingDefElem(def, pstate);
+			*handler_given = true;
+			*fdwhandler = lookup_fdw_handler_func(def);
+		}
+		else if (strcmp(def->defname, "validator") == 0)
+		{
+			if (*validator_given)
+				errorConflictingDefElem(def, pstate);
+			*validator_given = true;
+			*fdwvalidator = lookup_fdw_validator_func(def);
+		}
+		else
+			elog(ERROR, "option \"%s\" not recognized",
+				 def->defname);
+	}
+}
+
+/*
+ * Create a foreign-data wrapper
+ */
+ObjectAddress
+CreateForeignDataWrapper(ParseState *pstate, CreateFdwStmt *stmt)
+{
+	Relation	rel;
+	Datum		values[Natts_pg_foreign_data_wrapper];
+	bool		nulls[Natts_pg_foreign_data_wrapper];
+	HeapTuple	tuple;
+	Oid			fdwId;
+	bool		handler_given;
+	bool		validator_given;
+	Oid			fdwhandler;
+	Oid			fdwvalidator;
+	Datum		fdwoptions;
+	Oid			ownerId;
+	ObjectAddress myself;
+	ObjectAddress referenced;
+
+	rel = table_open(ForeignDataWrapperRelationId, RowExclusiveLock);
+
+	/* Must be superuser */
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied to create foreign-data wrapper \"%s\"",
+						stmt->fdwname),
+				 errhint("Must be superuser to create a foreign-data wrapper.")));
+
+	/* For now the owner cannot be specified on create. Use effective user ID. */
+	ownerId = GetUserId();
+
+	/*
+	 * Check that there is no other foreign-data wrapper by this name.
+	 */
+	if (GetForeignDataWrapperByName(stmt->fdwname, true) != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("foreign-data wrapper \"%s\" already exists",
+						stmt->fdwname)));
+
+	/*
+	 * Insert tuple into pg_foreign_data_wrapper.
+	 */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	fdwId = GetNewOidWithIndex(rel, ForeignDataWrapperOidIndexId,
+							   Anum_pg_foreign_data_wrapper_oid);
+	values[Anum_pg_foreign_data_wrapper_oid - 1] = ObjectIdGetDatum(fdwId);
+	values[Anum_pg_foreign_data_wrapper_fdwname - 1] =
+		DirectFunctionCall1(namein, CStringGetDatum(stmt->fdwname));
+	values[Anum_pg_foreign_data_wrapper_fdwowner - 1] = ObjectIdGetDatum(ownerId);
+
+	/* Lookup handler and validator functions, if given */
+	parse_func_options(pstate, stmt->func_options,
+					   &handler_given, &fdwhandler,
+					   &validator_given, &fdwvalidator);
+
+	values[Anum_pg_foreign_data_wrapper_fdwhandler - 1] = ObjectIdGetDatum(fdwhandler);
+	values[Anum_pg_foreign_data_wrapper_fdwvalidator - 1] = ObjectIdGetDatum(fdwvalidator);
+
+	nulls[Anum_pg_foreign_data_wrapper_fdwacl - 1] = true;
+
+	fdwoptions = transformGenericOptions(ForeignDataWrapperRelationId,
+										 PointerGetDatum(NULL),
+										 stmt->options,
+										 fdwvalidator);
+
+	if (PointerIsValid(DatumGetPointer(fdwoptions)))
+		values[Anum_pg_foreign_data_wrapper_fdwoptions - 1] = fdwoptions;
+	else
+		nulls[Anum_pg_foreign_data_wrapper_fdwoptions - 1] = true;
+
+	tuple = heap_form_tuple(rel->rd_att, values, nulls);
+
+	CatalogTupleInsert(rel, tuple);
+
+	heap_freetuple(tuple);
+
+	/* record dependencies */
+	myself.classId = ForeignDataWrapperRelationId;
+	myself.objectId = fdwId;
+	myself.objectSubId = 0;
+
+	if (OidIsValid(fdwhandler))
+	{
+		referenced.classId = ProcedureRelationId;
+		referenced.objectId = fdwhandler;
+		referenced.objectSubId = 0;
+		recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+	}
+
+	if (OidIsValid(fdwvalidator))
+	{
+		referenced.classId = ProcedureRelationId;
+		referenced.objectId = fdwvalidator;
+		referenced.objectSubId = 0;
+		recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+	}
+
+	recordDependencyOnOwner(ForeignDataWrapperRelationId, fdwId, ownerId);
+
+	/* dependency on extension */
+	recordDependencyOnCurrentExtension(&myself, false);
+
+	/* Post creation hook for new foreign data wrapper */
+	InvokeObjectPostCreateHook(ForeignDataWrapperRelationId, fdwId, 0);
+
+	table_close(rel, RowExclusiveLock);
+
+	return myself;
+}
+
+
+/*
+ * Alter foreign-data wrapper
+ */
+ObjectAddress
+AlterForeignDataWrapper(ParseState *pstate, AlterFdwStmt *stmt)
+{
+	Relation	rel;
+	HeapTuple	tp;
+	Form_pg_foreign_data_wrapper fdwForm;
+	Datum		repl_val[Natts_pg_foreign_data_wrapper];
+	bool		repl_null[Natts_pg_foreign_data_wrapper];
+	bool		repl_repl[Natts_pg_foreign_data_wrapper];
+	Oid			fdwId;
+	bool		isnull;
+	Datum		datum;
+	bool		handler_given;
+	bool		validator_given;
+	Oid			fdwhandler;
+	Oid			fdwvalidator;
+	ObjectAddress myself;
+
+	rel = table_open(ForeignDataWrapperRelationId, RowExclusiveLock);
+
+	/* Must be superuser */
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied to alter foreign-data wrapper \"%s\"",
+						stmt->fdwname),
+				 errhint("Must be superuser to alter a foreign-data wrapper.")));
+
+	tp = SearchSysCacheCopy1(FOREIGNDATAWRAPPERNAME,
+							 CStringGetDatum(stmt->fdwname));
+
+	if (!HeapTupleIsValid(tp))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("foreign-data wrapper \"%s\" does not exist", stmt->fdwname)));
+
+	fdwForm = (Form_pg_foreign_data_wrapper) GETSTRUCT(tp);
+	fdwId = fdwForm->oid;
+
+	memset(repl_val, 0, sizeof(repl_val));
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+
+	parse_func_options(pstate, stmt->func_options,
+					   &handler_given, &fdwhandler,
+					   &validator_given, &fdwvalidator);
+
+	if (handler_given)
+	{
+		repl_val[Anum_pg_foreign_data_wrapper_fdwhandler - 1] = ObjectIdGetDatum(fdwhandler);
+		repl_repl[Anum_pg_foreign_data_wrapper_fdwhandler - 1] = true;
+
+		/*
+		 * It could be that the behavior of accessing foreign table changes
+		 * with the new handler.  Warn about this.
+		 */
+		ereport(WARNING,
+				(errmsg("changing the foreign-data wrapper handler can change behavior of existing foreign tables")));
+	}
+
+	if (validator_given)
+	{
+		repl_val[Anum_pg_foreign_data_wrapper_fdwvalidator - 1] = ObjectIdGetDatum(fdwvalidator);
+		repl_repl[Anum_pg_foreign_data_wrapper_fdwvalidator - 1] = true;
+
+		/*
+		 * It could be that existing options for the FDW or dependent SERVER,
+		 * USER MAPPING or FOREIGN TABLE objects are no longer valid according
+		 * to the new validator.  Warn about this.
+		 */
+		if (OidIsValid(fdwvalidator))
+			ereport(WARNING,
+					(errmsg("changing the foreign-data wrapper validator can cause "
+							"the options for dependent objects to become invalid")));
+	}
+	else
+	{
+		/*
+		 * Validator is not changed, but we need it for validating options.
+		 */
+		fdwvalidator = fdwForm->fdwvalidator;
+	}
+
+	/*
+	 * If options specified, validate and update.
+	 */
+	if (stmt->options)
+	{
+		/* Extract the current options */
+		datum = SysCacheGetAttr(FOREIGNDATAWRAPPEROID,
+								tp,
+								Anum_pg_foreign_data_wrapper_fdwoptions,
+								&isnull);
+		if (isnull)
+			datum = PointerGetDatum(NULL);
+
+		/* Transform the options */
+		datum = transformGenericOptions(ForeignDataWrapperRelationId,
+										datum,
+										stmt->options,
+										fdwvalidator);
+
+		if (PointerIsValid(DatumGetPointer(datum)))
+			repl_val[Anum_pg_foreign_data_wrapper_fdwoptions - 1] = datum;
+		else
+			repl_null[Anum_pg_foreign_data_wrapper_fdwoptions - 1] = true;
+
+		repl_repl[Anum_pg_foreign_data_wrapper_fdwoptions - 1] = true;
+	}
+
+	/* Everything looks good - update the tuple */
+	tp = heap_modify_tuple(tp, RelationGetDescr(rel),
+						   repl_val, repl_null, repl_repl);
+
+	CatalogTupleUpdate(rel, &tp->t_self, tp);
+
+	heap_freetuple(tp);
+
+	ObjectAddressSet(myself, ForeignDataWrapperRelationId, fdwId);
+
+	/* Update function dependencies if we changed them */
+	if (handler_given || validator_given)
+	{
+		ObjectAddress referenced;
+
+		/*
+		 * Flush all existing dependency records of this FDW on functions; we
+		 * assume there can be none other than the ones we are fixing.
+		 */
+		deleteDependencyRecordsForClass(ForeignDataWrapperRelationId,
+										fdwId,
+										ProcedureRelationId,
+										DEPENDENCY_NORMAL);
+
+		/* And build new ones. */
+
+		if (OidIsValid(fdwhandler))
+		{
+			referenced.classId = ProcedureRelationId;
+			referenced.objectId = fdwhandler;
+			referenced.objectSubId = 0;
+			recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+		}
+
+		if (OidIsValid(fdwvalidator))
+		{
+			referenced.classId = ProcedureRelationId;
+			referenced.objectId = fdwvalidator;
+			referenced.objectSubId = 0;
+			recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+		}
+	}
+
+	InvokeObjectPostAlterHook(ForeignDataWrapperRelationId, fdwId, 0);
+
+	table_close(rel, RowExclusiveLock);
+
+	return myself;
+}
+
+
+/*
+ * Create a foreign server
+ */
+ObjectAddress
+CreateForeignServer(CreateForeignServerStmt *stmt)
+{
+	Relation	rel;
+	Datum		srvoptions;
+	Datum		values[Natts_pg_foreign_server];
+	bool		nulls[Natts_pg_foreign_server];
+	HeapTuple	tuple;
+	Oid			srvId;
+	Oid			ownerId;
+	AclResult	aclresult;
+	ObjectAddress myself;
+	ObjectAddress referenced;
+	ForeignDataWrapper *fdw;
+
+	rel = table_open(ForeignServerRelationId, RowExclusiveLock);
+
+	/* For now the owner cannot be specified on create. Use effective user ID. */
+	ownerId = GetUserId();
+
+	/*
+	 * Check that there is no other foreign server by this name.  If there is
+	 * one, do nothing if IF NOT EXISTS was specified.
+	 */
+	srvId = get_foreign_server_oid(stmt->servername, true);
+	if (OidIsValid(srvId))
+	{
+		if (stmt->if_not_exists)
+		{
+			/*
+			 * If we are in an extension script, insist that the pre-existing
+			 * object be a member of the extension, to avoid security risks.
+			 */
+			ObjectAddressSet(myself, ForeignServerRelationId, srvId);
+			checkMembershipInCurrentExtension(&myself);
+
+			/* OK to skip */
+			ereport(NOTICE,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("server \"%s\" already exists, skipping",
+							stmt->servername)));
+			table_close(rel, RowExclusiveLock);
+			return InvalidObjectAddress;
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("server \"%s\" already exists",
+							stmt->servername)));
+	}
+
+	/*
+	 * Check that the FDW exists and that we have USAGE on it. Also get the
+	 * actual FDW for option validation etc.
+	 */
+	fdw = GetForeignDataWrapperByName(stmt->fdwname, false);
+
+	aclresult = pg_foreign_data_wrapper_aclcheck(fdw->fdwid, ownerId, ACL_USAGE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_FDW, fdw->fdwname);
+
+	/*
+	 * Insert tuple into pg_foreign_server.
+	 */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	srvId = GetNewOidWithIndex(rel, ForeignServerOidIndexId,
+							   Anum_pg_foreign_server_oid);
+	values[Anum_pg_foreign_server_oid - 1] = ObjectIdGetDatum(srvId);
+	values[Anum_pg_foreign_server_srvname - 1] =
+		DirectFunctionCall1(namein, CStringGetDatum(stmt->servername));
+	values[Anum_pg_foreign_server_srvowner - 1] = ObjectIdGetDatum(ownerId);
+	values[Anum_pg_foreign_server_srvfdw - 1] = ObjectIdGetDatum(fdw->fdwid);
+
+	/* Add server type if supplied */
+	if (stmt->servertype)
+		values[Anum_pg_foreign_server_srvtype - 1] =
+			CStringGetTextDatum(stmt->servertype);
+	else
+		nulls[Anum_pg_foreign_server_srvtype - 1] = true;
+
+	/* Add server version if supplied */
+	if (stmt->version)
+		values[Anum_pg_foreign_server_srvversion - 1] =
+			CStringGetTextDatum(stmt->version);
+	else
+		nulls[Anum_pg_foreign_server_srvversion - 1] = true;
+
+	/* Start with a blank acl */
+	nulls[Anum_pg_foreign_server_srvacl - 1] = true;
+
+	/* Add server options */
+	srvoptions = transformGenericOptions(ForeignServerRelationId,
+										 PointerGetDatum(NULL),
+										 stmt->options,
+										 fdw->fdwvalidator);
+
+	if (PointerIsValid(DatumGetPointer(srvoptions)))
+		values[Anum_pg_foreign_server_srvoptions - 1] = srvoptions;
+	else
+		nulls[Anum_pg_foreign_server_srvoptions - 1] = true;
+
+	tuple = heap_form_tuple(rel->rd_att, values, nulls);
+
+	CatalogTupleInsert(rel, tuple);
+
+	heap_freetuple(tuple);
+
+	/* record dependencies */
+	myself.classId = ForeignServerRelationId;
+	myself.objectId = srvId;
+	myself.objectSubId = 0;
+
+	referenced.classId = ForeignDataWrapperRelationId;
+	referenced.objectId = fdw->fdwid;
+	referenced.objectSubId = 0;
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+	recordDependencyOnOwner(ForeignServerRelationId, srvId, ownerId);
+
+	/* dependency on extension */
+	recordDependencyOnCurrentExtension(&myself, false);
+
+	/* Post creation hook for new foreign server */
+	InvokeObjectPostCreateHook(ForeignServerRelationId, srvId, 0);
+
+	table_close(rel, RowExclusiveLock);
+
+	return myself;
+}
+
+
+/*
+ * Alter foreign server
+ */
+ObjectAddress
+AlterForeignServer(AlterForeignServerStmt *stmt)
+{
+	Relation	rel;
+	HeapTuple	tp;
+	Datum		repl_val[Natts_pg_foreign_server];
+	bool		repl_null[Natts_pg_foreign_server];
+	bool		repl_repl[Natts_pg_foreign_server];
+	Oid			srvId;
+	Form_pg_foreign_server srvForm;
+	ObjectAddress address;
+
+	rel = table_open(ForeignServerRelationId, RowExclusiveLock);
+
+	tp = SearchSysCacheCopy1(FOREIGNSERVERNAME,
+							 CStringGetDatum(stmt->servername));
+
+	if (!HeapTupleIsValid(tp))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("server \"%s\" does not exist", stmt->servername)));
+
+	srvForm = (Form_pg_foreign_server) GETSTRUCT(tp);
+	srvId = srvForm->oid;
+
+	/*
+	 * Only owner or a superuser can ALTER a SERVER.
+	 */
+	if (!pg_foreign_server_ownercheck(srvId, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FOREIGN_SERVER,
+					   stmt->servername);
+
+	memset(repl_val, 0, sizeof(repl_val));
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+
+	if (stmt->has_version)
+	{
+		/*
+		 * Change the server VERSION string.
+		 */
+		if (stmt->version)
+			repl_val[Anum_pg_foreign_server_srvversion - 1] =
+				CStringGetTextDatum(stmt->version);
+		else
+			repl_null[Anum_pg_foreign_server_srvversion - 1] = true;
+
+		repl_repl[Anum_pg_foreign_server_srvversion - 1] = true;
+	}
+
+	if (stmt->options)
+	{
+		ForeignDataWrapper *fdw = GetForeignDataWrapper(srvForm->srvfdw);
+		Datum		datum;
+		bool		isnull;
+
+		/* Extract the current srvoptions */
+		datum = SysCacheGetAttr(FOREIGNSERVEROID,
+								tp,
+								Anum_pg_foreign_server_srvoptions,
+								&isnull);
+		if (isnull)
+			datum = PointerGetDatum(NULL);
+
+		/* Prepare the options array */
+		datum = transformGenericOptions(ForeignServerRelationId,
+										datum,
+										stmt->options,
+										fdw->fdwvalidator);
+
+		if (PointerIsValid(DatumGetPointer(datum)))
+			repl_val[Anum_pg_foreign_server_srvoptions - 1] = datum;
+		else
+			repl_null[Anum_pg_foreign_server_srvoptions - 1] = true;
+
+		repl_repl[Anum_pg_foreign_server_srvoptions - 1] = true;
+	}
+
+	/* Everything looks good - update the tuple */
+	tp = heap_modify_tuple(tp, RelationGetDescr(rel),
+						   repl_val, repl_null, repl_repl);
+
+	CatalogTupleUpdate(rel, &tp->t_self, tp);
+
+	InvokeObjectPostAlterHook(ForeignServerRelationId, srvId, 0);
+
+	ObjectAddressSet(address, ForeignServerRelationId, srvId);
+
+	heap_freetuple(tp);
+
+	table_close(rel, RowExclusiveLock);
+
+	return address;
+}
+
+
+/*
+ * Common routine to check permission for user-mapping-related DDL
+ * commands.  We allow server owners to operate on any mapping, and
+ * users to operate on their own mapping.
+ */
+static void
+user_mapping_ddl_aclcheck(Oid umuserid, Oid serverid, const char *servername)
+{
+	Oid			curuserid = GetUserId();
+
+	if (!pg_foreign_server_ownercheck(serverid, curuserid))
+	{
+		if (umuserid == curuserid)
+		{
+			AclResult	aclresult;
+
+			aclresult = pg_foreign_server_aclcheck(serverid, curuserid, ACL_USAGE);
+			if (aclresult != ACLCHECK_OK)
+				aclcheck_error(aclresult, OBJECT_FOREIGN_SERVER, servername);
+		}
+		else
+			aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FOREIGN_SERVER,
+						   servername);
+	}
+}
+
+
+/*
+ * Create user mapping
+ */
+ObjectAddress
+CreateUserMapping(CreateUserMappingStmt *stmt)
+{
+	Relation	rel;
+	Datum		useoptions;
+	Datum		values[Natts_pg_user_mapping];
+	bool		nulls[Natts_pg_user_mapping];
+	HeapTuple	tuple;
+	Oid			useId;
+	Oid			umId;
+	ObjectAddress myself;
+	ObjectAddress referenced;
+	ForeignServer *srv;
+	ForeignDataWrapper *fdw;
+	RoleSpec   *role = (RoleSpec *) stmt->user;
+
+	rel = table_open(UserMappingRelationId, RowExclusiveLock);
+
+	if (role->roletype == ROLESPEC_PUBLIC)
+		useId = ACL_ID_PUBLIC;
+	else
+		useId = get_rolespec_oid(stmt->user, false);
+
+	/* Check that the server exists. */
+	srv = GetForeignServerByName(stmt->servername, false);
+
+	user_mapping_ddl_aclcheck(useId, srv->serverid, stmt->servername);
+
+	/*
+	 * Check that the user mapping is unique within server.
+	 */
+	umId = GetSysCacheOid2(USERMAPPINGUSERSERVER, Anum_pg_user_mapping_oid,
+						   ObjectIdGetDatum(useId),
+						   ObjectIdGetDatum(srv->serverid));
+
+	if (OidIsValid(umId))
+	{
+		if (stmt->if_not_exists)
+		{
+			/*
+			 * Since user mappings aren't members of extensions (see comments
+			 * below), no need for checkMembershipInCurrentExtension here.
+			 */
+			ereport(NOTICE,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("user mapping for \"%s\" already exists for server \"%s\", skipping",
+							MappingUserName(useId),
+							stmt->servername)));
+
+			table_close(rel, RowExclusiveLock);
+			return InvalidObjectAddress;
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("user mapping for \"%s\" already exists for server \"%s\"",
+							MappingUserName(useId),
+							stmt->servername)));
+	}
+
+	fdw = GetForeignDataWrapper(srv->fdwid);
+
+	/*
+	 * Insert tuple into pg_user_mapping.
+	 */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	umId = GetNewOidWithIndex(rel, UserMappingOidIndexId,
+							  Anum_pg_user_mapping_oid);
+	values[Anum_pg_user_mapping_oid - 1] = ObjectIdGetDatum(umId);
+	values[Anum_pg_user_mapping_umuser - 1] = ObjectIdGetDatum(useId);
+	values[Anum_pg_user_mapping_umserver - 1] = ObjectIdGetDatum(srv->serverid);
+
+	/* Add user options */
+	useoptions = transformGenericOptions(UserMappingRelationId,
+										 PointerGetDatum(NULL),
+										 stmt->options,
+										 fdw->fdwvalidator);
+
+	if (PointerIsValid(DatumGetPointer(useoptions)))
+		values[Anum_pg_user_mapping_umoptions - 1] = useoptions;
+	else
+		nulls[Anum_pg_user_mapping_umoptions - 1] = true;
+
+	tuple = heap_form_tuple(rel->rd_att, values, nulls);
+
+	CatalogTupleInsert(rel, tuple);
+
+	heap_freetuple(tuple);
+
+	/* Add dependency on the server */
+	myself.classId = UserMappingRelationId;
+	myself.objectId = umId;
+	myself.objectSubId = 0;
+
+	referenced.classId = ForeignServerRelationId;
+	referenced.objectId = srv->serverid;
+	referenced.objectSubId = 0;
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+	if (OidIsValid(useId))
+	{
+		/* Record the mapped user dependency */
+		recordDependencyOnOwner(UserMappingRelationId, umId, useId);
+	}
+
+	/*
+	 * Perhaps someday there should be a recordDependencyOnCurrentExtension
+	 * call here; but since roles aren't members of extensions, it seems like
+	 * user mappings shouldn't be either.  Note that the grammar and pg_dump
+	 * would need to be extended too if we change this.
+	 */
+
+	/* Post creation hook for new user mapping */
+	InvokeObjectPostCreateHook(UserMappingRelationId, umId, 0);
+
+	table_close(rel, RowExclusiveLock);
+
+	return myself;
+}
+
+
+/*
+ * Alter user mapping
+ */
+ObjectAddress
+AlterUserMapping(AlterUserMappingStmt *stmt)
+{
+	Relation	rel;
+	HeapTuple	tp;
+	Datum		repl_val[Natts_pg_user_mapping];
+	bool		repl_null[Natts_pg_user_mapping];
+	bool		repl_repl[Natts_pg_user_mapping];
+	Oid			useId;
+	Oid			umId;
+	ForeignServer *srv;
+	ObjectAddress address;
+	RoleSpec   *role = (RoleSpec *) stmt->user;
+
+	rel = table_open(UserMappingRelationId, RowExclusiveLock);
+
+	if (role->roletype == ROLESPEC_PUBLIC)
+		useId = ACL_ID_PUBLIC;
+	else
+		useId = get_rolespec_oid(stmt->user, false);
+
+	srv = GetForeignServerByName(stmt->servername, false);
+
+	umId = GetSysCacheOid2(USERMAPPINGUSERSERVER, Anum_pg_user_mapping_oid,
+						   ObjectIdGetDatum(useId),
+						   ObjectIdGetDatum(srv->serverid));
+	if (!OidIsValid(umId))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("user mapping for \"%s\" does not exist for server \"%s\"",
+						MappingUserName(useId), stmt->servername)));
+
+	user_mapping_ddl_aclcheck(useId, srv->serverid, stmt->servername);
+
+	tp = SearchSysCacheCopy1(USERMAPPINGOID, ObjectIdGetDatum(umId));
+
+	if (!HeapTupleIsValid(tp))
+		elog(ERROR, "cache lookup failed for user mapping %u", umId);
+
+	memset(repl_val, 0, sizeof(repl_val));
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+
+	if (stmt->options)
+	{
+		ForeignDataWrapper *fdw;
+		Datum		datum;
+		bool		isnull;
+
+		/*
+		 * Process the options.
+		 */
+
+		fdw = GetForeignDataWrapper(srv->fdwid);
+
+		datum = SysCacheGetAttr(USERMAPPINGUSERSERVER,
+								tp,
+								Anum_pg_user_mapping_umoptions,
+								&isnull);
+		if (isnull)
+			datum = PointerGetDatum(NULL);
+
+		/* Prepare the options array */
+		datum = transformGenericOptions(UserMappingRelationId,
+										datum,
+										stmt->options,
+										fdw->fdwvalidator);
+
+		if (PointerIsValid(DatumGetPointer(datum)))
+			repl_val[Anum_pg_user_mapping_umoptions - 1] = datum;
+		else
+			repl_null[Anum_pg_user_mapping_umoptions - 1] = true;
+
+		repl_repl[Anum_pg_user_mapping_umoptions - 1] = true;
+	}
+
+	/* Everything looks good - update the tuple */
+	tp = heap_modify_tuple(tp, RelationGetDescr(rel),
+						   repl_val, repl_null, repl_repl);
+
+	CatalogTupleUpdate(rel, &tp->t_self, tp);
+
+	InvokeObjectPostAlterHook(UserMappingRelationId,
+							  umId, 0);
+
+	ObjectAddressSet(address, UserMappingRelationId, umId);
+
+	heap_freetuple(tp);
+
+	table_close(rel, RowExclusiveLock);
+
+	return address;
+}
+
+
+/*
+ * Drop user mapping
+ */
+Oid
+RemoveUserMapping(DropUserMappingStmt *stmt)
+{
+	ObjectAddress object;
+	Oid			useId;
+	Oid			umId;
+	ForeignServer *srv;
+	RoleSpec   *role = (RoleSpec *) stmt->user;
+
+	if (role->roletype == ROLESPEC_PUBLIC)
+		useId = ACL_ID_PUBLIC;
+	else
+	{
+		useId = get_rolespec_oid(stmt->user, stmt->missing_ok);
+		if (!OidIsValid(useId))
+		{
+			/*
+			 * IF EXISTS specified, role not found and not public. Notice this
+			 * and leave.
+			 */
+			elog(NOTICE, "role \"%s\" does not exist, skipping",
+				 role->rolename);
+			return InvalidOid;
+		}
+	}
+
+	srv = GetForeignServerByName(stmt->servername, true);
+
+	if (!srv)
+	{
+		if (!stmt->missing_ok)
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("server \"%s\" does not exist",
+							stmt->servername)));
+		/* IF EXISTS, just note it */
+		ereport(NOTICE,
+				(errmsg("server \"%s\" does not exist, skipping",
+						stmt->servername)));
+		return InvalidOid;
+	}
+
+	umId = GetSysCacheOid2(USERMAPPINGUSERSERVER, Anum_pg_user_mapping_oid,
+						   ObjectIdGetDatum(useId),
+						   ObjectIdGetDatum(srv->serverid));
+
+	if (!OidIsValid(umId))
+	{
+		if (!stmt->missing_ok)
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("user mapping for \"%s\" does not exist for server \"%s\"",
+							MappingUserName(useId), stmt->servername)));
+
+		/* IF EXISTS specified, just note it */
+		ereport(NOTICE,
+				(errmsg("user mapping for \"%s\" does not exist for server \"%s\", skipping",
+						MappingUserName(useId), stmt->servername)));
+		return InvalidOid;
+	}
+
+	user_mapping_ddl_aclcheck(useId, srv->serverid, srv->servername);
+
+	/*
+	 * Do the deletion
+	 */
+	object.classId = UserMappingRelationId;
+	object.objectId = umId;
+	object.objectSubId = 0;
+
+	performDeletion(&object, DROP_CASCADE, 0);
+
+	return umId;
+}
+
+
+/*
+ * Create a foreign table
+ * call after DefineRelation().
+ */
+void
+CreateForeignTable(CreateForeignTableStmt *stmt, Oid relid)
+{
+	Relation	ftrel;
+	Datum		ftoptions;
+	Datum		values[Natts_pg_foreign_table];
+	bool		nulls[Natts_pg_foreign_table];
+	HeapTuple	tuple;
+	AclResult	aclresult;
+	ObjectAddress myself;
+	ObjectAddress referenced;
+	Oid			ownerId;
+	ForeignDataWrapper *fdw;
+	ForeignServer *server;
+
+	/*
+	 * Advance command counter to ensure the pg_attribute tuple is visible;
+	 * the tuple might be updated to add constraints in previous step.
+	 */
+	CommandCounterIncrement();
+
+	ftrel = table_open(ForeignTableRelationId, RowExclusiveLock);
+
+	/*
+	 * For now the owner cannot be specified on create. Use effective user ID.
+	 */
+	ownerId = GetUserId();
+
+	/*
+	 * Check that the foreign server exists and that we have USAGE on it. Also
+	 * get the actual FDW for option validation etc.
+	 */
+	server = GetForeignServerByName(stmt->servername, false);
+	aclresult = pg_foreign_server_aclcheck(server->serverid, ownerId, ACL_USAGE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_FOREIGN_SERVER, server->servername);
+
+	fdw = GetForeignDataWrapper(server->fdwid);
+
+	/*
+	 * Insert tuple into pg_foreign_table.
+	 */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	values[Anum_pg_foreign_table_ftrelid - 1] = ObjectIdGetDatum(relid);
+	values[Anum_pg_foreign_table_ftserver - 1] = ObjectIdGetDatum(server->serverid);
+	/* Add table generic options */
+	ftoptions = transformGenericOptions(ForeignTableRelationId,
+										PointerGetDatum(NULL),
+										stmt->options,
+										fdw->fdwvalidator);
+
+	if (PointerIsValid(DatumGetPointer(ftoptions)))
+		values[Anum_pg_foreign_table_ftoptions - 1] = ftoptions;
+	else
+		nulls[Anum_pg_foreign_table_ftoptions - 1] = true;
+
+	tuple = heap_form_tuple(ftrel->rd_att, values, nulls);
+
+	CatalogTupleInsert(ftrel, tuple);
+
+	heap_freetuple(tuple);
+
+	/* Add pg_class dependency on the server */
+	myself.classId = RelationRelationId;
+	myself.objectId = relid;
+	myself.objectSubId = 0;
+
+	referenced.classId = ForeignServerRelationId;
+	referenced.objectId = server->serverid;
+	referenced.objectSubId = 0;
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+	table_close(ftrel, RowExclusiveLock);
+}
+
+/*
+ * Import a foreign schema
+ */
+void
+ImportForeignSchema(ImportForeignSchemaStmt *stmt)
+{
+	ForeignServer *server;
+	ForeignDataWrapper *fdw;
+	FdwRoutine *fdw_routine;
+	AclResult	aclresult;
+	List	   *cmd_list;
+	ListCell   *lc;
+
+	/* Check that the foreign server exists and that we have USAGE on it */
+	server = GetForeignServerByName(stmt->server_name, false);
+	aclresult = pg_foreign_server_aclcheck(server->serverid, GetUserId(), ACL_USAGE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_FOREIGN_SERVER, server->servername);
+
+	/* Check that the schema exists and we have CREATE permissions on it */
+	(void) LookupCreationNamespace(stmt->local_schema);
+
+	/* Get the FDW and check it supports IMPORT */
+	fdw = GetForeignDataWrapper(server->fdwid);
+	if (!OidIsValid(fdw->fdwhandler))
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("foreign-data wrapper \"%s\" has no handler",
+						fdw->fdwname)));
+	fdw_routine = GetFdwRoutine(fdw->fdwhandler);
+	if (fdw_routine->ImportForeignSchema == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FDW_NO_SCHEMAS),
+				 errmsg("foreign-data wrapper \"%s\" does not support IMPORT FOREIGN SCHEMA",
+						fdw->fdwname)));
+
+	/* Call FDW to get a list of commands */
+	cmd_list = fdw_routine->ImportForeignSchema(stmt, server->serverid);
+
+	/* Parse and execute each command */
+	foreach(lc, cmd_list)
+	{
+		char	   *cmd = (char *) lfirst(lc);
+		import_error_callback_arg callback_arg;
+		ErrorContextCallback sqlerrcontext;
+		List	   *raw_parsetree_list;
+		ListCell   *lc2;
+
+		/*
+		 * Setup error traceback support for ereport().  This is so that any
+		 * error in the generated SQL will be displayed nicely.
+		 */
+		callback_arg.tablename = NULL;	/* not known yet */
+		callback_arg.cmd = cmd;
+		sqlerrcontext.callback = import_error_callback;
+		sqlerrcontext.arg = (void *) &callback_arg;
+		sqlerrcontext.previous = error_context_stack;
+		error_context_stack = &sqlerrcontext;
+
+		/*
+		 * Parse the SQL string into a list of raw parse trees.
+		 */
+		raw_parsetree_list = pg_parse_query(cmd);
+
+		/*
+		 * Process each parse tree (we allow the FDW to put more than one
+		 * command per string, though this isn't really advised).
+		 */
+		foreach(lc2, raw_parsetree_list)
+		{
+			RawStmt    *rs = lfirst_node(RawStmt, lc2);
+			CreateForeignTableStmt *cstmt = (CreateForeignTableStmt *) rs->stmt;
+			PlannedStmt *pstmt;
+
+			/*
+			 * Because we only allow CreateForeignTableStmt, we can skip parse
+			 * analysis, rewrite, and planning steps here.
+			 */
+			if (!IsA(cstmt, CreateForeignTableStmt))
+				elog(ERROR,
+					 "foreign-data wrapper \"%s\" returned incorrect statement type %d",
+					 fdw->fdwname, (int) nodeTag(cstmt));
+
+			/* Ignore commands for tables excluded by filter options */
+			if (!IsImportableForeignTable(cstmt->base.relation->relname, stmt))
+				continue;
+
+			/* Enable reporting of current table's name on error */
+			callback_arg.tablename = cstmt->base.relation->relname;
+
+			/* Ensure creation schema is the one given in IMPORT statement */
+			cstmt->base.relation->schemaname = pstrdup(stmt->local_schema);
+
+			/* No planning needed, just make a wrapper PlannedStmt */
+			pstmt = makeNode(PlannedStmt);
+			pstmt->commandType = CMD_UTILITY;
+			pstmt->canSetTag = false;
+			pstmt->utilityStmt = (Node *) cstmt;
+			pstmt->stmt_location = rs->stmt_location;
+			pstmt->stmt_len = rs->stmt_len;
+
+			/* Execute statement */
+			ProcessUtility(pstmt, cmd, false,
+						   PROCESS_UTILITY_SUBCOMMAND, NULL, NULL,
+						   None_Receiver, NULL);
+
+			/* Be sure to advance the command counter between subcommands */
+			CommandCounterIncrement();
+
+			callback_arg.tablename = NULL;
+		}
+
+		error_context_stack = sqlerrcontext.previous;
+	}
+}
+
+/*
+ * error context callback to let us supply the failing SQL statement's text
+ */
+static void
+import_error_callback(void *arg)
+{
+	import_error_callback_arg *callback_arg = (import_error_callback_arg *) arg;
+	int			syntaxerrposition;
+
+	/* If it's a syntax error, convert to internal syntax error report */
+	syntaxerrposition = geterrposition();
+	if (syntaxerrposition > 0)
+	{
+		errposition(0);
+		internalerrposition(syntaxerrposition);
+		internalerrquery(callback_arg->cmd);
+	}
+
+	if (callback_arg->tablename)
+		errcontext("importing foreign table \"%s\"",
+				   callback_arg->tablename);
+}
diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c
new file mode 100644
index 0000000..00a6d28
--- /dev/null
+++ b/src/backend/commands/functioncmds.c
@@ -0,0 +1,2374 @@
+/*-------------------------------------------------------------------------
+ *
+ * functioncmds.c
+ *
+ *	  Routines for CREATE and DROP FUNCTION commands and CREATE and DROP
+ *	  CAST commands.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/functioncmds.c
+ *
+ * DESCRIPTION
+ *	  These routines take the parse tree and pick out the
+ *	  appropriate arguments/flags, and pass the results to the
+ *	  corresponding "FooDefine" routines (in src/catalog) that do
+ *	  the actual catalog-munging.  These routines also verify permission
+ *	  of the user to execute the command.
+ *
+ * NOTES
+ *	  These things must be defined and committed in the following order:
+ *		"create function":
+ *				input/output, recv/send procedures
+ *		"create type":
+ *				type
+ *		"create operator":
+ *				operators
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_aggregate.h"
+#include "catalog/pg_cast.h"
+#include "catalog/pg_language.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_transform.h"
+#include "catalog/pg_type.h"
+#include "commands/alter.h"
+#include "commands/defrem.h"
+#include "commands/extension.h"
+#include "commands/proclang.h"
+#include "executor/execdesc.h"
+#include "executor/executor.h"
+#include "executor/functions.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "optimizer/optimizer.h"
+#include "parser/analyze.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_func.h"
+#include "parser/parse_type.h"
+#include "pgstat.h"
+#include "tcop/pquery.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+/*
+ *	 Examine the RETURNS clause of the CREATE FUNCTION statement
+ *	 and return information about it as *prorettype_p and *returnsSet.
+ *
+ * This is more complex than the average typename lookup because we want to
+ * allow a shell type to be used, or even created if the specified return type
+ * doesn't exist yet.  (Without this, there's no way to define the I/O procs
+ * for a new type.)  But SQL function creation won't cope, so error out if
+ * the target language is SQL.  (We do this here, not in the SQL-function
+ * validator, so as not to produce a NOTICE and then an ERROR for the same
+ * condition.)
+ */
+static void
+compute_return_type(TypeName *returnType, Oid languageOid,
+					Oid *prorettype_p, bool *returnsSet_p)
+{
+	Oid			rettype;
+	Type		typtup;
+	AclResult	aclresult;
+
+	typtup = LookupTypeName(NULL, returnType, NULL, false);
+
+	if (typtup)
+	{
+		if (!((Form_pg_type) GETSTRUCT(typtup))->typisdefined)
+		{
+			if (languageOid == SQLlanguageId)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+						 errmsg("SQL function cannot return shell type %s",
+								TypeNameToString(returnType))));
+			else
+				ereport(NOTICE,
+						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+						 errmsg("return type %s is only a shell",
+								TypeNameToString(returnType))));
+		}
+		rettype = typeTypeId(typtup);
+		ReleaseSysCache(typtup);
+	}
+	else
+	{
+		char	   *typnam = TypeNameToString(returnType);
+		Oid			namespaceId;
+		AclResult	aclresult;
+		char	   *typname;
+		ObjectAddress address;
+
+		/*
+		 * Only C-coded functions can be I/O functions.  We enforce this
+		 * restriction here mainly to prevent littering the catalogs with
+		 * shell types due to simple typos in user-defined function
+		 * definitions.
+		 */
+		if (languageOid != INTERNALlanguageId &&
+			languageOid != ClanguageId)
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("type \"%s\" does not exist", typnam)));
+
+		/* Reject if there's typmod decoration, too */
+		if (returnType->typmods != NIL)
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("type modifier cannot be specified for shell type \"%s\"",
+							typnam)));
+
+		/* Otherwise, go ahead and make a shell type */
+		ereport(NOTICE,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("type \"%s\" is not yet defined", typnam),
+				 errdetail("Creating a shell type definition.")));
+		namespaceId = QualifiedNameGetCreationNamespace(returnType->names,
+														&typname);
+		aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
+										  ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_SCHEMA,
+						   get_namespace_name(namespaceId));
+		address = TypeShellMake(typname, namespaceId, GetUserId());
+		rettype = address.objectId;
+		Assert(OidIsValid(rettype));
+	}
+
+	aclresult = pg_type_aclcheck(rettype, GetUserId(), ACL_USAGE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error_type(aclresult, rettype);
+
+	*prorettype_p = rettype;
+	*returnsSet_p = returnType->setof;
+}
+
+/*
+ * Interpret the function parameter list of a CREATE FUNCTION,
+ * CREATE PROCEDURE, or CREATE AGGREGATE statement.
+ *
+ * Input parameters:
+ * parameters: list of FunctionParameter structs
+ * languageOid: OID of function language (InvalidOid if it's CREATE AGGREGATE)
+ * objtype: identifies type of object being created
+ *
+ * Results are stored into output parameters.  parameterTypes must always
+ * be created, but the other arrays/lists can be NULL pointers if not needed.
+ * variadicArgType is set to the variadic array type if there's a VARIADIC
+ * parameter (there can be only one); or to InvalidOid if not.
+ * requiredResultType is set to InvalidOid if there are no OUT parameters,
+ * else it is set to the OID of the implied result type.
+ */
+void
+interpret_function_parameter_list(ParseState *pstate,
+								  List *parameters,
+								  Oid languageOid,
+								  ObjectType objtype,
+								  oidvector **parameterTypes,
+								  List **parameterTypes_list,
+								  ArrayType **allParameterTypes,
+								  ArrayType **parameterModes,
+								  ArrayType **parameterNames,
+								  List **inParameterNames_list,
+								  List **parameterDefaults,
+								  Oid *variadicArgType,
+								  Oid *requiredResultType)
+{
+	int			parameterCount = list_length(parameters);
+	Oid		   *inTypes;
+	int			inCount = 0;
+	Datum	   *allTypes;
+	Datum	   *paramModes;
+	Datum	   *paramNames;
+	int			outCount = 0;
+	int			varCount = 0;
+	bool		have_names = false;
+	bool		have_defaults = false;
+	ListCell   *x;
+	int			i;
+
+	*variadicArgType = InvalidOid;	/* default result */
+	*requiredResultType = InvalidOid;	/* default result */
+
+	inTypes = (Oid *) palloc(parameterCount * sizeof(Oid));
+	allTypes = (Datum *) palloc(parameterCount * sizeof(Datum));
+	paramModes = (Datum *) palloc(parameterCount * sizeof(Datum));
+	paramNames = (Datum *) palloc0(parameterCount * sizeof(Datum));
+	*parameterDefaults = NIL;
+
+	/* Scan the list and extract data into work arrays */
+	i = 0;
+	foreach(x, parameters)
+	{
+		FunctionParameter *fp = (FunctionParameter *) lfirst(x);
+		TypeName   *t = fp->argType;
+		FunctionParameterMode fpmode = fp->mode;
+		bool		isinput = false;
+		Oid			toid;
+		Type		typtup;
+		AclResult	aclresult;
+
+		/* For our purposes here, a defaulted mode spec is identical to IN */
+		if (fpmode == FUNC_PARAM_DEFAULT)
+			fpmode = FUNC_PARAM_IN;
+
+		typtup = LookupTypeName(NULL, t, NULL, false);
+		if (typtup)
+		{
+			if (!((Form_pg_type) GETSTRUCT(typtup))->typisdefined)
+			{
+				/* As above, hard error if language is SQL */
+				if (languageOid == SQLlanguageId)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+							 errmsg("SQL function cannot accept shell type %s",
+									TypeNameToString(t))));
+				/* We don't allow creating aggregates on shell types either */
+				else if (objtype == OBJECT_AGGREGATE)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+							 errmsg("aggregate cannot accept shell type %s",
+									TypeNameToString(t))));
+				else
+					ereport(NOTICE,
+							(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+							 errmsg("argument type %s is only a shell",
+									TypeNameToString(t))));
+			}
+			toid = typeTypeId(typtup);
+			ReleaseSysCache(typtup);
+		}
+		else
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("type %s does not exist",
+							TypeNameToString(t))));
+			toid = InvalidOid;	/* keep compiler quiet */
+		}
+
+		aclresult = pg_type_aclcheck(toid, GetUserId(), ACL_USAGE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error_type(aclresult, toid);
+
+		if (t->setof)
+		{
+			if (objtype == OBJECT_AGGREGATE)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+						 errmsg("aggregates cannot accept set arguments")));
+			else if (objtype == OBJECT_PROCEDURE)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+						 errmsg("procedures cannot accept set arguments")));
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+						 errmsg("functions cannot accept set arguments")));
+		}
+
+		/* handle input parameters */
+		if (fpmode != FUNC_PARAM_OUT && fpmode != FUNC_PARAM_TABLE)
+		{
+			/* other input parameters can't follow a VARIADIC parameter */
+			if (varCount > 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+						 errmsg("VARIADIC parameter must be the last input parameter")));
+			inTypes[inCount++] = toid;
+			isinput = true;
+			if (parameterTypes_list)
+				*parameterTypes_list = lappend_oid(*parameterTypes_list, toid);
+		}
+
+		/* handle output parameters */
+		if (fpmode != FUNC_PARAM_IN && fpmode != FUNC_PARAM_VARIADIC)
+		{
+			if (objtype == OBJECT_PROCEDURE)
+			{
+				/*
+				 * We disallow OUT-after-VARIADIC only for procedures.  While
+				 * such a case causes no confusion in ordinary function calls,
+				 * it would cause confusion in a CALL statement.
+				 */
+				if (varCount > 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+							 errmsg("VARIADIC parameter must be the last parameter")));
+				/* Procedures with output parameters always return RECORD */
+				*requiredResultType = RECORDOID;
+			}
+			else if (outCount == 0) /* save first output param's type */
+				*requiredResultType = toid;
+			outCount++;
+		}
+
+		if (fpmode == FUNC_PARAM_VARIADIC)
+		{
+			*variadicArgType = toid;
+			varCount++;
+			/* validate variadic parameter type */
+			switch (toid)
+			{
+				case ANYARRAYOID:
+				case ANYCOMPATIBLEARRAYOID:
+				case ANYOID:
+					/* okay */
+					break;
+				default:
+					if (!OidIsValid(get_element_type(toid)))
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+								 errmsg("VARIADIC parameter must be an array")));
+					break;
+			}
+		}
+
+		allTypes[i] = ObjectIdGetDatum(toid);
+
+		paramModes[i] = CharGetDatum(fpmode);
+
+		if (fp->name && fp->name[0])
+		{
+			ListCell   *px;
+
+			/*
+			 * As of Postgres 9.0 we disallow using the same name for two
+			 * input or two output function parameters.  Depending on the
+			 * function's language, conflicting input and output names might
+			 * be bad too, but we leave it to the PL to complain if so.
+			 */
+			foreach(px, parameters)
+			{
+				FunctionParameter *prevfp = (FunctionParameter *) lfirst(px);
+				FunctionParameterMode prevfpmode;
+
+				if (prevfp == fp)
+					break;
+				/* as above, default mode is IN */
+				prevfpmode = prevfp->mode;
+				if (prevfpmode == FUNC_PARAM_DEFAULT)
+					prevfpmode = FUNC_PARAM_IN;
+				/* pure in doesn't conflict with pure out */
+				if ((fpmode == FUNC_PARAM_IN ||
+					 fpmode == FUNC_PARAM_VARIADIC) &&
+					(prevfpmode == FUNC_PARAM_OUT ||
+					 prevfpmode == FUNC_PARAM_TABLE))
+					continue;
+				if ((prevfpmode == FUNC_PARAM_IN ||
+					 prevfpmode == FUNC_PARAM_VARIADIC) &&
+					(fpmode == FUNC_PARAM_OUT ||
+					 fpmode == FUNC_PARAM_TABLE))
+					continue;
+				if (prevfp->name && prevfp->name[0] &&
+					strcmp(prevfp->name, fp->name) == 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+							 errmsg("parameter name \"%s\" used more than once",
+									fp->name)));
+			}
+
+			paramNames[i] = CStringGetTextDatum(fp->name);
+			have_names = true;
+		}
+
+		if (inParameterNames_list)
+			*inParameterNames_list = lappend(*inParameterNames_list, makeString(fp->name ? fp->name : pstrdup("")));
+
+		if (fp->defexpr)
+		{
+			Node	   *def;
+
+			if (!isinput)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+						 errmsg("only input parameters can have default values")));
+
+			def = transformExpr(pstate, fp->defexpr,
+								EXPR_KIND_FUNCTION_DEFAULT);
+			def = coerce_to_specific_type(pstate, def, toid, "DEFAULT");
+			assign_expr_collations(pstate, def);
+
+			/*
+			 * Make sure no variables are referred to (this is probably dead
+			 * code now that add_missing_from is history).
+			 */
+			if (list_length(pstate->p_rtable) != 0 ||
+				contain_var_clause(def))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+						 errmsg("cannot use table references in parameter default value")));
+
+			/*
+			 * transformExpr() should have already rejected subqueries,
+			 * aggregates, and window functions, based on the EXPR_KIND_ for a
+			 * default expression.
+			 *
+			 * It can't return a set either --- but coerce_to_specific_type
+			 * already checked that for us.
+			 *
+			 * Note: the point of these restrictions is to ensure that an
+			 * expression that, on its face, hasn't got subplans, aggregates,
+			 * etc cannot suddenly have them after function default arguments
+			 * are inserted.
+			 */
+
+			*parameterDefaults = lappend(*parameterDefaults, def);
+			have_defaults = true;
+		}
+		else
+		{
+			if (isinput && have_defaults)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+						 errmsg("input parameters after one with a default value must also have defaults")));
+
+			/*
+			 * For procedures, we also can't allow OUT parameters after one
+			 * with a default, because the same sort of confusion arises in a
+			 * CALL statement.
+			 */
+			if (objtype == OBJECT_PROCEDURE && have_defaults)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+						 errmsg("procedure OUT parameters cannot appear after one with a default value")));
+		}
+
+		i++;
+	}
+
+	/* Now construct the proper outputs as needed */
+	*parameterTypes = buildoidvector(inTypes, inCount);
+
+	if (outCount > 0 || varCount > 0)
+	{
+		*allParameterTypes = construct_array(allTypes, parameterCount, OIDOID,
+											 sizeof(Oid), true, TYPALIGN_INT);
+		*parameterModes = construct_array(paramModes, parameterCount, CHAROID,
+										  1, true, TYPALIGN_CHAR);
+		if (outCount > 1)
+			*requiredResultType = RECORDOID;
+		/* otherwise we set requiredResultType correctly above */
+	}
+	else
+	{
+		*allParameterTypes = NULL;
+		*parameterModes = NULL;
+	}
+
+	if (have_names)
+	{
+		for (i = 0; i < parameterCount; i++)
+		{
+			if (paramNames[i] == PointerGetDatum(NULL))
+				paramNames[i] = CStringGetTextDatum("");
+		}
+		*parameterNames = construct_array(paramNames, parameterCount, TEXTOID,
+										  -1, false, TYPALIGN_INT);
+	}
+	else
+		*parameterNames = NULL;
+}
+
+
+/*
+ * Recognize one of the options that can be passed to both CREATE
+ * FUNCTION and ALTER FUNCTION and return it via one of the out
+ * parameters. Returns true if the passed option was recognized. If
+ * the out parameter we were going to assign to points to non-NULL,
+ * raise a duplicate-clause error.  (We don't try to detect duplicate
+ * SET parameters though --- if you're redundant, the last one wins.)
+ */
+static bool
+compute_common_attribute(ParseState *pstate,
+						 bool is_procedure,
+						 DefElem *defel,
+						 DefElem **volatility_item,
+						 DefElem **strict_item,
+						 DefElem **security_item,
+						 DefElem **leakproof_item,
+						 List **set_items,
+						 DefElem **cost_item,
+						 DefElem **rows_item,
+						 DefElem **support_item,
+						 DefElem **parallel_item)
+{
+	if (strcmp(defel->defname, "volatility") == 0)
+	{
+		if (is_procedure)
+			goto procedure_error;
+		if (*volatility_item)
+			errorConflictingDefElem(defel, pstate);
+
+		*volatility_item = defel;
+	}
+	else if (strcmp(defel->defname, "strict") == 0)
+	{
+		if (is_procedure)
+			goto procedure_error;
+		if (*strict_item)
+			errorConflictingDefElem(defel, pstate);
+
+		*strict_item = defel;
+	}
+	else if (strcmp(defel->defname, "security") == 0)
+	{
+		if (*security_item)
+			errorConflictingDefElem(defel, pstate);
+
+		*security_item = defel;
+	}
+	else if (strcmp(defel->defname, "leakproof") == 0)
+	{
+		if (is_procedure)
+			goto procedure_error;
+		if (*leakproof_item)
+			errorConflictingDefElem(defel, pstate);
+
+		*leakproof_item = defel;
+	}
+	else if (strcmp(defel->defname, "set") == 0)
+	{
+		*set_items = lappend(*set_items, defel->arg);
+	}
+	else if (strcmp(defel->defname, "cost") == 0)
+	{
+		if (is_procedure)
+			goto procedure_error;
+		if (*cost_item)
+			errorConflictingDefElem(defel, pstate);
+
+		*cost_item = defel;
+	}
+	else if (strcmp(defel->defname, "rows") == 0)
+	{
+		if (is_procedure)
+			goto procedure_error;
+		if (*rows_item)
+			errorConflictingDefElem(defel, pstate);
+
+		*rows_item = defel;
+	}
+	else if (strcmp(defel->defname, "support") == 0)
+	{
+		if (is_procedure)
+			goto procedure_error;
+		if (*support_item)
+			errorConflictingDefElem(defel, pstate);
+
+		*support_item = defel;
+	}
+	else if (strcmp(defel->defname, "parallel") == 0)
+	{
+		if (is_procedure)
+			goto procedure_error;
+		if (*parallel_item)
+			errorConflictingDefElem(defel, pstate);
+
+		*parallel_item = defel;
+	}
+	else
+		return false;
+
+	/* Recognized an option */
+	return true;
+
+procedure_error:
+	ereport(ERROR,
+			(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+			 errmsg("invalid attribute in procedure definition"),
+			 parser_errposition(pstate, defel->location)));
+	return false;
+}
+
+static char
+interpret_func_volatility(DefElem *defel)
+{
+	char	   *str = strVal(defel->arg);
+
+	if (strcmp(str, "immutable") == 0)
+		return PROVOLATILE_IMMUTABLE;
+	else if (strcmp(str, "stable") == 0)
+		return PROVOLATILE_STABLE;
+	else if (strcmp(str, "volatile") == 0)
+		return PROVOLATILE_VOLATILE;
+	else
+	{
+		elog(ERROR, "invalid volatility \"%s\"", str);
+		return 0;				/* keep compiler quiet */
+	}
+}
+
+static char
+interpret_func_parallel(DefElem *defel)
+{
+	char	   *str = strVal(defel->arg);
+
+	if (strcmp(str, "safe") == 0)
+		return PROPARALLEL_SAFE;
+	else if (strcmp(str, "unsafe") == 0)
+		return PROPARALLEL_UNSAFE;
+	else if (strcmp(str, "restricted") == 0)
+		return PROPARALLEL_RESTRICTED;
+	else
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("parameter \"parallel\" must be SAFE, RESTRICTED, or UNSAFE")));
+		return PROPARALLEL_UNSAFE;	/* keep compiler quiet */
+	}
+}
+
+/*
+ * Update a proconfig value according to a list of VariableSetStmt items.
+ *
+ * The input and result may be NULL to signify a null entry.
+ */
+static ArrayType *
+update_proconfig_value(ArrayType *a, List *set_items)
+{
+	ListCell   *l;
+
+	foreach(l, set_items)
+	{
+		VariableSetStmt *sstmt = lfirst_node(VariableSetStmt, l);
+
+		if (sstmt->kind == VAR_RESET_ALL)
+			a = NULL;
+		else
+		{
+			char	   *valuestr = ExtractSetVariableArgs(sstmt);
+
+			if (valuestr)
+				a = GUCArrayAdd(a, sstmt->name, valuestr);
+			else				/* RESET */
+				a = GUCArrayDelete(a, sstmt->name);
+		}
+	}
+
+	return a;
+}
+
+static Oid
+interpret_func_support(DefElem *defel)
+{
+	List	   *procName = defGetQualifiedName(defel);
+	Oid			procOid;
+	Oid			argList[1];
+
+	/*
+	 * Support functions always take one INTERNAL argument and return
+	 * INTERNAL.
+	 */
+	argList[0] = INTERNALOID;
+
+	procOid = LookupFuncName(procName, 1, argList, true);
+	if (!OidIsValid(procOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_FUNCTION),
+				 errmsg("function %s does not exist",
+						func_signature_string(procName, 1, NIL, argList))));
+
+	if (get_func_rettype(procOid) != INTERNALOID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("support function %s must return type %s",
+						NameListToString(procName), "internal")));
+
+	/*
+	 * Someday we might want an ACL check here; but for now, we insist that
+	 * you be superuser to specify a support function, so privilege on the
+	 * support function is moot.
+	 */
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be superuser to specify a support function")));
+
+	return procOid;
+}
+
+
+/*
+ * Dissect the list of options assembled in gram.y into function
+ * attributes.
+ */
+static void
+compute_function_attributes(ParseState *pstate,
+							bool is_procedure,
+							List *options,
+							List **as,
+							char **language,
+							Node **transform,
+							bool *windowfunc_p,
+							char *volatility_p,
+							bool *strict_p,
+							bool *security_definer,
+							bool *leakproof_p,
+							ArrayType **proconfig,
+							float4 *procost,
+							float4 *prorows,
+							Oid *prosupport,
+							char *parallel_p)
+{
+	ListCell   *option;
+	DefElem    *as_item = NULL;
+	DefElem    *language_item = NULL;
+	DefElem    *transform_item = NULL;
+	DefElem    *windowfunc_item = NULL;
+	DefElem    *volatility_item = NULL;
+	DefElem    *strict_item = NULL;
+	DefElem    *security_item = NULL;
+	DefElem    *leakproof_item = NULL;
+	List	   *set_items = NIL;
+	DefElem    *cost_item = NULL;
+	DefElem    *rows_item = NULL;
+	DefElem    *support_item = NULL;
+	DefElem    *parallel_item = NULL;
+
+	foreach(option, options)
+	{
+		DefElem    *defel = (DefElem *) lfirst(option);
+
+		if (strcmp(defel->defname, "as") == 0)
+		{
+			if (as_item)
+				errorConflictingDefElem(defel, pstate);
+			as_item = defel;
+		}
+		else if (strcmp(defel->defname, "language") == 0)
+		{
+			if (language_item)
+				errorConflictingDefElem(defel, pstate);
+			language_item = defel;
+		}
+		else if (strcmp(defel->defname, "transform") == 0)
+		{
+			if (transform_item)
+				errorConflictingDefElem(defel, pstate);
+			transform_item = defel;
+		}
+		else if (strcmp(defel->defname, "window") == 0)
+		{
+			if (windowfunc_item)
+				errorConflictingDefElem(defel, pstate);
+			if (is_procedure)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+						 errmsg("invalid attribute in procedure definition"),
+						 parser_errposition(pstate, defel->location)));
+			windowfunc_item = defel;
+		}
+		else if (compute_common_attribute(pstate,
+										  is_procedure,
+										  defel,
+										  &volatility_item,
+										  &strict_item,
+										  &security_item,
+										  &leakproof_item,
+										  &set_items,
+										  &cost_item,
+										  &rows_item,
+										  &support_item,
+										  &parallel_item))
+		{
+			/* recognized common option */
+			continue;
+		}
+		else
+			elog(ERROR, "option \"%s\" not recognized",
+				 defel->defname);
+	}
+
+	if (as_item)
+		*as = (List *) as_item->arg;
+	if (language_item)
+		*language = strVal(language_item->arg);
+	if (transform_item)
+		*transform = transform_item->arg;
+	if (windowfunc_item)
+		*windowfunc_p = boolVal(windowfunc_item->arg);
+	if (volatility_item)
+		*volatility_p = interpret_func_volatility(volatility_item);
+	if (strict_item)
+		*strict_p = boolVal(strict_item->arg);
+	if (security_item)
+		*security_definer = boolVal(security_item->arg);
+	if (leakproof_item)
+		*leakproof_p = boolVal(leakproof_item->arg);
+	if (set_items)
+		*proconfig = update_proconfig_value(NULL, set_items);
+	if (cost_item)
+	{
+		*procost = defGetNumeric(cost_item);
+		if (*procost <= 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("COST must be positive")));
+	}
+	if (rows_item)
+	{
+		*prorows = defGetNumeric(rows_item);
+		if (*prorows <= 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("ROWS must be positive")));
+	}
+	if (support_item)
+		*prosupport = interpret_func_support(support_item);
+	if (parallel_item)
+		*parallel_p = interpret_func_parallel(parallel_item);
+}
+
+
+/*
+ * For a dynamically linked C language object, the form of the clause is
+ *
+ *	   AS <object file name> [, <link symbol name> ]
+ *
+ * In all other cases
+ *
+ *	   AS <object reference, or sql code>
+ */
+static void
+interpret_AS_clause(Oid languageOid, const char *languageName,
+					char *funcname, List *as, Node *sql_body_in,
+					List *parameterTypes, List *inParameterNames,
+					char **prosrc_str_p, char **probin_str_p,
+					Node **sql_body_out,
+					const char *queryString)
+{
+	if (!sql_body_in && !as)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+				 errmsg("no function body specified")));
+
+	if (sql_body_in && as)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+				 errmsg("duplicate function body specified")));
+
+	if (sql_body_in && languageOid != SQLlanguageId)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+				 errmsg("inline SQL function body only valid for language SQL")));
+
+	*sql_body_out = NULL;
+
+	if (languageOid == ClanguageId)
+	{
+		/*
+		 * For "C" language, store the file name in probin and, when given,
+		 * the link symbol name in prosrc.  If link symbol is omitted,
+		 * substitute procedure name.  We also allow link symbol to be
+		 * specified as "-", since that was the habit in PG versions before
+		 * 8.4, and there might be dump files out there that don't translate
+		 * that back to "omitted".
+		 */
+		*probin_str_p = strVal(linitial(as));
+		if (list_length(as) == 1)
+			*prosrc_str_p = funcname;
+		else
+		{
+			*prosrc_str_p = strVal(lsecond(as));
+			if (strcmp(*prosrc_str_p, "-") == 0)
+				*prosrc_str_p = funcname;
+		}
+	}
+	else if (sql_body_in)
+	{
+		SQLFunctionParseInfoPtr pinfo;
+
+		pinfo = (SQLFunctionParseInfoPtr) palloc0(sizeof(SQLFunctionParseInfo));
+
+		pinfo->fname = funcname;
+		pinfo->nargs = list_length(parameterTypes);
+		pinfo->argtypes = (Oid *) palloc(pinfo->nargs * sizeof(Oid));
+		pinfo->argnames = (char **) palloc(pinfo->nargs * sizeof(char *));
+		for (int i = 0; i < list_length(parameterTypes); i++)
+		{
+			char	   *s = strVal(list_nth(inParameterNames, i));
+
+			pinfo->argtypes[i] = list_nth_oid(parameterTypes, i);
+			if (IsPolymorphicType(pinfo->argtypes[i]))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+						 errmsg("SQL function with unquoted function body cannot have polymorphic arguments")));
+
+			if (s[0] != '\0')
+				pinfo->argnames[i] = s;
+			else
+				pinfo->argnames[i] = NULL;
+		}
+
+		if (IsA(sql_body_in, List))
+		{
+			List	   *stmts = linitial_node(List, castNode(List, sql_body_in));
+			ListCell   *lc;
+			List	   *transformed_stmts = NIL;
+
+			foreach(lc, stmts)
+			{
+				Node	   *stmt = lfirst(lc);
+				Query	   *q;
+				ParseState *pstate = make_parsestate(NULL);
+
+				pstate->p_sourcetext = queryString;
+				sql_fn_parser_setup(pstate, pinfo);
+				q = transformStmt(pstate, stmt);
+				if (q->commandType == CMD_UTILITY)
+					ereport(ERROR,
+							errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							errmsg("%s is not yet supported in unquoted SQL function body",
+								   GetCommandTagName(CreateCommandTag(q->utilityStmt))));
+				transformed_stmts = lappend(transformed_stmts, q);
+				free_parsestate(pstate);
+			}
+
+			*sql_body_out = (Node *) list_make1(transformed_stmts);
+		}
+		else
+		{
+			Query	   *q;
+			ParseState *pstate = make_parsestate(NULL);
+
+			pstate->p_sourcetext = queryString;
+			sql_fn_parser_setup(pstate, pinfo);
+			q = transformStmt(pstate, sql_body_in);
+			if (q->commandType == CMD_UTILITY)
+				ereport(ERROR,
+						errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						errmsg("%s is not yet supported in unquoted SQL function body",
+							   GetCommandTagName(CreateCommandTag(q->utilityStmt))));
+			free_parsestate(pstate);
+
+			*sql_body_out = (Node *) q;
+		}
+
+		/*
+		 * We must put something in prosrc.  For the moment, just record an
+		 * empty string.  It might be useful to store the original text of the
+		 * CREATE FUNCTION statement --- but to make actual use of that in
+		 * error reports, we'd also have to adjust readfuncs.c to not throw
+		 * away node location fields when reading prosqlbody.
+		 */
+		*prosrc_str_p = pstrdup("");
+
+		/* But we definitely don't need probin. */
+		*probin_str_p = NULL;
+	}
+	else
+	{
+		/* Everything else wants the given string in prosrc. */
+		*prosrc_str_p = strVal(linitial(as));
+		*probin_str_p = NULL;
+
+		if (list_length(as) != 1)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("only one AS item needed for language \"%s\"",
+							languageName)));
+
+		if (languageOid == INTERNALlanguageId)
+		{
+			/*
+			 * In PostgreSQL versions before 6.5, the SQL name of the created
+			 * function could not be different from the internal name, and
+			 * "prosrc" wasn't used.  So there is code out there that does
+			 * CREATE FUNCTION xyz AS '' LANGUAGE internal. To preserve some
+			 * modicum of backwards compatibility, accept an empty "prosrc"
+			 * value as meaning the supplied SQL function name.
+			 */
+			if (strlen(*prosrc_str_p) == 0)
+				*prosrc_str_p = funcname;
+		}
+	}
+}
+
+
+/*
+ * CreateFunction
+ *	 Execute a CREATE FUNCTION (or CREATE PROCEDURE) utility statement.
+ */
+ObjectAddress
+CreateFunction(ParseState *pstate, CreateFunctionStmt *stmt)
+{
+	char	   *probin_str;
+	char	   *prosrc_str;
+	Node	   *prosqlbody;
+	Oid			prorettype;
+	bool		returnsSet;
+	char	   *language;
+	Oid			languageOid;
+	Oid			languageValidator;
+	Node	   *transformDefElem = NULL;
+	char	   *funcname;
+	Oid			namespaceId;
+	AclResult	aclresult;
+	oidvector  *parameterTypes;
+	List	   *parameterTypes_list = NIL;
+	ArrayType  *allParameterTypes;
+	ArrayType  *parameterModes;
+	ArrayType  *parameterNames;
+	List	   *inParameterNames_list = NIL;
+	List	   *parameterDefaults;
+	Oid			variadicArgType;
+	List	   *trftypes_list = NIL;
+	ArrayType  *trftypes;
+	Oid			requiredResultType;
+	bool		isWindowFunc,
+				isStrict,
+				security,
+				isLeakProof;
+	char		volatility;
+	ArrayType  *proconfig;
+	float4		procost;
+	float4		prorows;
+	Oid			prosupport;
+	HeapTuple	languageTuple;
+	Form_pg_language languageStruct;
+	List	   *as_clause;
+	char		parallel;
+
+	/* Convert list of names to a name and namespace */
+	namespaceId = QualifiedNameGetCreationNamespace(stmt->funcname,
+													&funcname);
+
+	/* Check we have creation rights in target namespace */
+	aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_SCHEMA,
+					   get_namespace_name(namespaceId));
+
+	/* Set default attributes */
+	as_clause = NIL;
+	language = NULL;
+	isWindowFunc = false;
+	isStrict = false;
+	security = false;
+	isLeakProof = false;
+	volatility = PROVOLATILE_VOLATILE;
+	proconfig = NULL;
+	procost = -1;				/* indicates not set */
+	prorows = -1;				/* indicates not set */
+	prosupport = InvalidOid;
+	parallel = PROPARALLEL_UNSAFE;
+
+	/* Extract non-default attributes from stmt->options list */
+	compute_function_attributes(pstate,
+								stmt->is_procedure,
+								stmt->options,
+								&as_clause, &language, &transformDefElem,
+								&isWindowFunc, &volatility,
+								&isStrict, &security, &isLeakProof,
+								&proconfig, &procost, &prorows,
+								&prosupport, &parallel);
+
+	if (!language)
+	{
+		if (stmt->sql_body)
+			language = "sql";
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("no language specified")));
+	}
+
+	/* Look up the language and validate permissions */
+	languageTuple = SearchSysCache1(LANGNAME, PointerGetDatum(language));
+	if (!HeapTupleIsValid(languageTuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("language \"%s\" does not exist", language),
+				 (extension_file_exists(language) ?
+				  errhint("Use CREATE EXTENSION to load the language into the database.") : 0)));
+
+	languageStruct = (Form_pg_language) GETSTRUCT(languageTuple);
+	languageOid = languageStruct->oid;
+
+	if (languageStruct->lanpltrusted)
+	{
+		/* if trusted language, need USAGE privilege */
+		AclResult	aclresult;
+
+		aclresult = pg_language_aclcheck(languageOid, GetUserId(), ACL_USAGE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_LANGUAGE,
+						   NameStr(languageStruct->lanname));
+	}
+	else
+	{
+		/* if untrusted language, must be superuser */
+		if (!superuser())
+			aclcheck_error(ACLCHECK_NO_PRIV, OBJECT_LANGUAGE,
+						   NameStr(languageStruct->lanname));
+	}
+
+	languageValidator = languageStruct->lanvalidator;
+
+	ReleaseSysCache(languageTuple);
+
+	/*
+	 * Only superuser is allowed to create leakproof functions because
+	 * leakproof functions can see tuples which have not yet been filtered out
+	 * by security barrier views or row-level security policies.
+	 */
+	if (isLeakProof && !superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("only superuser can define a leakproof function")));
+
+	if (transformDefElem)
+	{
+		ListCell   *lc;
+
+		foreach(lc, castNode(List, transformDefElem))
+		{
+			Oid			typeid = typenameTypeId(NULL,
+												lfirst_node(TypeName, lc));
+			Oid			elt = get_base_element_type(typeid);
+
+			typeid = elt ? elt : typeid;
+
+			get_transform_oid(typeid, languageOid, false);
+			trftypes_list = lappend_oid(trftypes_list, typeid);
+		}
+	}
+
+	/*
+	 * Convert remaining parameters of CREATE to form wanted by
+	 * ProcedureCreate.
+	 */
+	interpret_function_parameter_list(pstate,
+									  stmt->parameters,
+									  languageOid,
+									  stmt->is_procedure ? OBJECT_PROCEDURE : OBJECT_FUNCTION,
+									  &parameterTypes,
+									  &parameterTypes_list,
+									  &allParameterTypes,
+									  &parameterModes,
+									  &parameterNames,
+									  &inParameterNames_list,
+									  &parameterDefaults,
+									  &variadicArgType,
+									  &requiredResultType);
+
+	if (stmt->is_procedure)
+	{
+		Assert(!stmt->returnType);
+		prorettype = requiredResultType ? requiredResultType : VOIDOID;
+		returnsSet = false;
+	}
+	else if (stmt->returnType)
+	{
+		/* explicit RETURNS clause */
+		compute_return_type(stmt->returnType, languageOid,
+							&prorettype, &returnsSet);
+		if (OidIsValid(requiredResultType) && prorettype != requiredResultType)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("function result type must be %s because of OUT parameters",
+							format_type_be(requiredResultType))));
+	}
+	else if (OidIsValid(requiredResultType))
+	{
+		/* default RETURNS clause from OUT parameters */
+		prorettype = requiredResultType;
+		returnsSet = false;
+	}
+	else
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+				 errmsg("function result type must be specified")));
+		/* Alternative possibility: default to RETURNS VOID */
+		prorettype = VOIDOID;
+		returnsSet = false;
+	}
+
+	if (list_length(trftypes_list) > 0)
+	{
+		ListCell   *lc;
+		Datum	   *arr;
+		int			i;
+
+		arr = palloc(list_length(trftypes_list) * sizeof(Datum));
+		i = 0;
+		foreach(lc, trftypes_list)
+			arr[i++] = ObjectIdGetDatum(lfirst_oid(lc));
+		trftypes = construct_array(arr, list_length(trftypes_list),
+								   OIDOID, sizeof(Oid), true, TYPALIGN_INT);
+	}
+	else
+	{
+		/* store SQL NULL instead of empty array */
+		trftypes = NULL;
+	}
+
+	interpret_AS_clause(languageOid, language, funcname, as_clause, stmt->sql_body,
+						parameterTypes_list, inParameterNames_list,
+						&prosrc_str, &probin_str, &prosqlbody,
+						pstate->p_sourcetext);
+
+	/*
+	 * Set default values for COST and ROWS depending on other parameters;
+	 * reject ROWS if it's not returnsSet.  NB: pg_dump knows these default
+	 * values, keep it in sync if you change them.
+	 */
+	if (procost < 0)
+	{
+		/* SQL and PL-language functions are assumed more expensive */
+		if (languageOid == INTERNALlanguageId ||
+			languageOid == ClanguageId)
+			procost = 1;
+		else
+			procost = 100;
+	}
+	if (prorows < 0)
+	{
+		if (returnsSet)
+			prorows = 1000;
+		else
+			prorows = 0;		/* dummy value if not returnsSet */
+	}
+	else if (!returnsSet)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("ROWS is not applicable when function does not return a set")));
+
+	/*
+	 * And now that we have all the parameters, and know we're permitted to do
+	 * so, go ahead and create the function.
+	 */
+	return ProcedureCreate(funcname,
+						   namespaceId,
+						   stmt->replace,
+						   returnsSet,
+						   prorettype,
+						   GetUserId(),
+						   languageOid,
+						   languageValidator,
+						   prosrc_str,	/* converted to text later */
+						   probin_str,	/* converted to text later */
+						   prosqlbody,
+						   stmt->is_procedure ? PROKIND_PROCEDURE : (isWindowFunc ? PROKIND_WINDOW : PROKIND_FUNCTION),
+						   security,
+						   isLeakProof,
+						   isStrict,
+						   volatility,
+						   parallel,
+						   parameterTypes,
+						   PointerGetDatum(allParameterTypes),
+						   PointerGetDatum(parameterModes),
+						   PointerGetDatum(parameterNames),
+						   parameterDefaults,
+						   PointerGetDatum(trftypes),
+						   PointerGetDatum(proconfig),
+						   prosupport,
+						   procost,
+						   prorows);
+}
+
+/*
+ * Guts of function deletion.
+ *
+ * Note: this is also used for aggregate deletion, since the OIDs of
+ * both functions and aggregates point to pg_proc.
+ */
+void
+RemoveFunctionById(Oid funcOid)
+{
+	Relation	relation;
+	HeapTuple	tup;
+	char		prokind;
+
+	/*
+	 * Delete the pg_proc tuple.
+	 */
+	relation = table_open(ProcedureRelationId, RowExclusiveLock);
+
+	tup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcOid));
+	if (!HeapTupleIsValid(tup)) /* should not happen */
+		elog(ERROR, "cache lookup failed for function %u", funcOid);
+
+	prokind = ((Form_pg_proc) GETSTRUCT(tup))->prokind;
+
+	CatalogTupleDelete(relation, &tup->t_self);
+
+	ReleaseSysCache(tup);
+
+	table_close(relation, RowExclusiveLock);
+
+	pgstat_drop_function(funcOid);
+
+	/*
+	 * If there's a pg_aggregate tuple, delete that too.
+	 */
+	if (prokind == PROKIND_AGGREGATE)
+	{
+		relation = table_open(AggregateRelationId, RowExclusiveLock);
+
+		tup = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(funcOid));
+		if (!HeapTupleIsValid(tup)) /* should not happen */
+			elog(ERROR, "cache lookup failed for pg_aggregate tuple for function %u", funcOid);
+
+		CatalogTupleDelete(relation, &tup->t_self);
+
+		ReleaseSysCache(tup);
+
+		table_close(relation, RowExclusiveLock);
+	}
+}
+
+/*
+ * Implements the ALTER FUNCTION utility command (except for the
+ * RENAME and OWNER clauses, which are handled as part of the generic
+ * ALTER framework).
+ */
+ObjectAddress
+AlterFunction(ParseState *pstate, AlterFunctionStmt *stmt)
+{
+	HeapTuple	tup;
+	Oid			funcOid;
+	Form_pg_proc procForm;
+	bool		is_procedure;
+	Relation	rel;
+	ListCell   *l;
+	DefElem    *volatility_item = NULL;
+	DefElem    *strict_item = NULL;
+	DefElem    *security_def_item = NULL;
+	DefElem    *leakproof_item = NULL;
+	List	   *set_items = NIL;
+	DefElem    *cost_item = NULL;
+	DefElem    *rows_item = NULL;
+	DefElem    *support_item = NULL;
+	DefElem    *parallel_item = NULL;
+	ObjectAddress address;
+
+	rel = table_open(ProcedureRelationId, RowExclusiveLock);
+
+	funcOid = LookupFuncWithArgs(stmt->objtype, stmt->func, false);
+
+	ObjectAddressSet(address, ProcedureRelationId, funcOid);
+
+	tup = SearchSysCacheCopy1(PROCOID, ObjectIdGetDatum(funcOid));
+	if (!HeapTupleIsValid(tup)) /* should not happen */
+		elog(ERROR, "cache lookup failed for function %u", funcOid);
+
+	procForm = (Form_pg_proc) GETSTRUCT(tup);
+
+	/* Permission check: must own function */
+	if (!pg_proc_ownercheck(funcOid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, stmt->objtype,
+					   NameListToString(stmt->func->objname));
+
+	if (procForm->prokind == PROKIND_AGGREGATE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is an aggregate function",
+						NameListToString(stmt->func->objname))));
+
+	is_procedure = (procForm->prokind == PROKIND_PROCEDURE);
+
+	/* Examine requested actions. */
+	foreach(l, stmt->actions)
+	{
+		DefElem    *defel = (DefElem *) lfirst(l);
+
+		if (compute_common_attribute(pstate,
+									 is_procedure,
+									 defel,
+									 &volatility_item,
+									 &strict_item,
+									 &security_def_item,
+									 &leakproof_item,
+									 &set_items,
+									 &cost_item,
+									 &rows_item,
+									 &support_item,
+									 &parallel_item) == false)
+			elog(ERROR, "option \"%s\" not recognized", defel->defname);
+	}
+
+	if (volatility_item)
+		procForm->provolatile = interpret_func_volatility(volatility_item);
+	if (strict_item)
+		procForm->proisstrict = boolVal(strict_item->arg);
+	if (security_def_item)
+		procForm->prosecdef = boolVal(security_def_item->arg);
+	if (leakproof_item)
+	{
+		procForm->proleakproof = boolVal(leakproof_item->arg);
+		if (procForm->proleakproof && !superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("only superuser can define a leakproof function")));
+	}
+	if (cost_item)
+	{
+		procForm->procost = defGetNumeric(cost_item);
+		if (procForm->procost <= 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("COST must be positive")));
+	}
+	if (rows_item)
+	{
+		procForm->prorows = defGetNumeric(rows_item);
+		if (procForm->prorows <= 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("ROWS must be positive")));
+		if (!procForm->proretset)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("ROWS is not applicable when function does not return a set")));
+	}
+	if (support_item)
+	{
+		/* interpret_func_support handles the privilege check */
+		Oid			newsupport = interpret_func_support(support_item);
+
+		/* Add or replace dependency on support function */
+		if (OidIsValid(procForm->prosupport))
+			changeDependencyFor(ProcedureRelationId, funcOid,
+								ProcedureRelationId, procForm->prosupport,
+								newsupport);
+		else
+		{
+			ObjectAddress referenced;
+
+			referenced.classId = ProcedureRelationId;
+			referenced.objectId = newsupport;
+			referenced.objectSubId = 0;
+			recordDependencyOn(&address, &referenced, DEPENDENCY_NORMAL);
+		}
+
+		procForm->prosupport = newsupport;
+	}
+	if (parallel_item)
+		procForm->proparallel = interpret_func_parallel(parallel_item);
+	if (set_items)
+	{
+		Datum		datum;
+		bool		isnull;
+		ArrayType  *a;
+		Datum		repl_val[Natts_pg_proc];
+		bool		repl_null[Natts_pg_proc];
+		bool		repl_repl[Natts_pg_proc];
+
+		/* extract existing proconfig setting */
+		datum = SysCacheGetAttr(PROCOID, tup, Anum_pg_proc_proconfig, &isnull);
+		a = isnull ? NULL : DatumGetArrayTypeP(datum);
+
+		/* update according to each SET or RESET item, left to right */
+		a = update_proconfig_value(a, set_items);
+
+		/* update the tuple */
+		memset(repl_repl, false, sizeof(repl_repl));
+		repl_repl[Anum_pg_proc_proconfig - 1] = true;
+
+		if (a == NULL)
+		{
+			repl_val[Anum_pg_proc_proconfig - 1] = (Datum) 0;
+			repl_null[Anum_pg_proc_proconfig - 1] = true;
+		}
+		else
+		{
+			repl_val[Anum_pg_proc_proconfig - 1] = PointerGetDatum(a);
+			repl_null[Anum_pg_proc_proconfig - 1] = false;
+		}
+
+		tup = heap_modify_tuple(tup, RelationGetDescr(rel),
+								repl_val, repl_null, repl_repl);
+	}
+	/* DO NOT put more touches of procForm below here; it's now dangling. */
+
+	/* Do the update */
+	CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+	InvokeObjectPostAlterHook(ProcedureRelationId, funcOid, 0);
+
+	table_close(rel, NoLock);
+	heap_freetuple(tup);
+
+	return address;
+}
+
+
+/*
+ * CREATE CAST
+ */
+ObjectAddress
+CreateCast(CreateCastStmt *stmt)
+{
+	Oid			sourcetypeid;
+	Oid			targettypeid;
+	char		sourcetyptype;
+	char		targettyptype;
+	Oid			funcid;
+	int			nargs;
+	char		castcontext;
+	char		castmethod;
+	HeapTuple	tuple;
+	AclResult	aclresult;
+	ObjectAddress myself;
+
+	sourcetypeid = typenameTypeId(NULL, stmt->sourcetype);
+	targettypeid = typenameTypeId(NULL, stmt->targettype);
+	sourcetyptype = get_typtype(sourcetypeid);
+	targettyptype = get_typtype(targettypeid);
+
+	/* No pseudo-types allowed */
+	if (sourcetyptype == TYPTYPE_PSEUDO)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("source data type %s is a pseudo-type",
+						TypeNameToString(stmt->sourcetype))));
+
+	if (targettyptype == TYPTYPE_PSEUDO)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("target data type %s is a pseudo-type",
+						TypeNameToString(stmt->targettype))));
+
+	/* Permission check */
+	if (!pg_type_ownercheck(sourcetypeid, GetUserId())
+		&& !pg_type_ownercheck(targettypeid, GetUserId()))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be owner of type %s or type %s",
+						format_type_be(sourcetypeid),
+						format_type_be(targettypeid))));
+
+	aclresult = pg_type_aclcheck(sourcetypeid, GetUserId(), ACL_USAGE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error_type(aclresult, sourcetypeid);
+
+	aclresult = pg_type_aclcheck(targettypeid, GetUserId(), ACL_USAGE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error_type(aclresult, targettypeid);
+
+	/* Domains are allowed for historical reasons, but we warn */
+	if (sourcetyptype == TYPTYPE_DOMAIN)
+		ereport(WARNING,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cast will be ignored because the source data type is a domain")));
+
+	else if (targettyptype == TYPTYPE_DOMAIN)
+		ereport(WARNING,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cast will be ignored because the target data type is a domain")));
+
+	/* Determine the cast method */
+	if (stmt->func != NULL)
+		castmethod = COERCION_METHOD_FUNCTION;
+	else if (stmt->inout)
+		castmethod = COERCION_METHOD_INOUT;
+	else
+		castmethod = COERCION_METHOD_BINARY;
+
+	if (castmethod == COERCION_METHOD_FUNCTION)
+	{
+		Form_pg_proc procstruct;
+
+		funcid = LookupFuncWithArgs(OBJECT_FUNCTION, stmt->func, false);
+
+		tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "cache lookup failed for function %u", funcid);
+
+		procstruct = (Form_pg_proc) GETSTRUCT(tuple);
+		nargs = procstruct->pronargs;
+		if (nargs < 1 || nargs > 3)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("cast function must take one to three arguments")));
+		if (!IsBinaryCoercible(sourcetypeid, procstruct->proargtypes.values[0]))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("argument of cast function must match or be binary-coercible from source data type")));
+		if (nargs > 1 && procstruct->proargtypes.values[1] != INT4OID)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("second argument of cast function must be type %s",
+							"integer")));
+		if (nargs > 2 && procstruct->proargtypes.values[2] != BOOLOID)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("third argument of cast function must be type %s",
+							"boolean")));
+		if (!IsBinaryCoercible(procstruct->prorettype, targettypeid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("return data type of cast function must match or be binary-coercible to target data type")));
+
+		/*
+		 * Restricting the volatility of a cast function may or may not be a
+		 * good idea in the abstract, but it definitely breaks many old
+		 * user-defined types.  Disable this check --- tgl 2/1/03
+		 */
+#ifdef NOT_USED
+		if (procstruct->provolatile == PROVOLATILE_VOLATILE)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("cast function must not be volatile")));
+#endif
+		if (procstruct->prokind != PROKIND_FUNCTION)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("cast function must be a normal function")));
+		if (procstruct->proretset)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("cast function must not return a set")));
+
+		ReleaseSysCache(tuple);
+	}
+	else
+	{
+		funcid = InvalidOid;
+		nargs = 0;
+	}
+
+	if (castmethod == COERCION_METHOD_BINARY)
+	{
+		int16		typ1len;
+		int16		typ2len;
+		bool		typ1byval;
+		bool		typ2byval;
+		char		typ1align;
+		char		typ2align;
+
+		/*
+		 * Must be superuser to create binary-compatible casts, since
+		 * erroneous casts can easily crash the backend.
+		 */
+		if (!superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to create a cast WITHOUT FUNCTION")));
+
+		/*
+		 * Also, insist that the types match as to size, alignment, and
+		 * pass-by-value attributes; this provides at least a crude check that
+		 * they have similar representations.  A pair of types that fail this
+		 * test should certainly not be equated.
+		 */
+		get_typlenbyvalalign(sourcetypeid, &typ1len, &typ1byval, &typ1align);
+		get_typlenbyvalalign(targettypeid, &typ2len, &typ2byval, &typ2align);
+		if (typ1len != typ2len ||
+			typ1byval != typ2byval ||
+			typ1align != typ2align)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("source and target data types are not physically compatible")));
+
+		/*
+		 * We know that composite, enum and array types are never binary-
+		 * compatible with each other.  They all have OIDs embedded in them.
+		 *
+		 * Theoretically you could build a user-defined base type that is
+		 * binary-compatible with a composite, enum, or array type.  But we
+		 * disallow that too, as in practice such a cast is surely a mistake.
+		 * You can always work around that by writing a cast function.
+		 */
+		if (sourcetyptype == TYPTYPE_COMPOSITE ||
+			targettyptype == TYPTYPE_COMPOSITE)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("composite data types are not binary-compatible")));
+
+		if (sourcetyptype == TYPTYPE_ENUM ||
+			targettyptype == TYPTYPE_ENUM)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("enum data types are not binary-compatible")));
+
+		if (OidIsValid(get_element_type(sourcetypeid)) ||
+			OidIsValid(get_element_type(targettypeid)))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("array data types are not binary-compatible")));
+
+		/*
+		 * We also disallow creating binary-compatibility casts involving
+		 * domains.  Casting from a domain to its base type is already
+		 * allowed, and casting the other way ought to go through domain
+		 * coercion to permit constraint checking.  Again, if you're intent on
+		 * having your own semantics for that, create a no-op cast function.
+		 *
+		 * NOTE: if we were to relax this, the above checks for composites
+		 * etc. would have to be modified to look through domains to their
+		 * base types.
+		 */
+		if (sourcetyptype == TYPTYPE_DOMAIN ||
+			targettyptype == TYPTYPE_DOMAIN)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("domain data types must not be marked binary-compatible")));
+	}
+
+	/*
+	 * Allow source and target types to be same only for length coercion
+	 * functions.  We assume a multi-arg function does length coercion.
+	 */
+	if (sourcetypeid == targettypeid && nargs < 2)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("source data type and target data type are the same")));
+
+	/* convert CoercionContext enum to char value for castcontext */
+	switch (stmt->context)
+	{
+		case COERCION_IMPLICIT:
+			castcontext = COERCION_CODE_IMPLICIT;
+			break;
+		case COERCION_ASSIGNMENT:
+			castcontext = COERCION_CODE_ASSIGNMENT;
+			break;
+			/* COERCION_PLPGSQL is intentionally not covered here */
+		case COERCION_EXPLICIT:
+			castcontext = COERCION_CODE_EXPLICIT;
+			break;
+		default:
+			elog(ERROR, "unrecognized CoercionContext: %d", stmt->context);
+			castcontext = 0;	/* keep compiler quiet */
+			break;
+	}
+
+	myself = CastCreate(sourcetypeid, targettypeid, funcid, castcontext,
+						castmethod, DEPENDENCY_NORMAL);
+	return myself;
+}
+
+
+static void
+check_transform_function(Form_pg_proc procstruct)
+{
+	if (procstruct->provolatile == PROVOLATILE_VOLATILE)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("transform function must not be volatile")));
+	if (procstruct->prokind != PROKIND_FUNCTION)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("transform function must be a normal function")));
+	if (procstruct->proretset)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("transform function must not return a set")));
+	if (procstruct->pronargs != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("transform function must take one argument")));
+	if (procstruct->proargtypes.values[0] != INTERNALOID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("first argument of transform function must be type %s",
+						"internal")));
+}
+
+
+/*
+ * CREATE TRANSFORM
+ */
+ObjectAddress
+CreateTransform(CreateTransformStmt *stmt)
+{
+	Oid			typeid;
+	char		typtype;
+	Oid			langid;
+	Oid			fromsqlfuncid;
+	Oid			tosqlfuncid;
+	AclResult	aclresult;
+	Form_pg_proc procstruct;
+	Datum		values[Natts_pg_transform];
+	bool		nulls[Natts_pg_transform];
+	bool		replaces[Natts_pg_transform];
+	Oid			transformid;
+	HeapTuple	tuple;
+	HeapTuple	newtuple;
+	Relation	relation;
+	ObjectAddress myself,
+				referenced;
+	ObjectAddresses *addrs;
+	bool		is_replace;
+
+	/*
+	 * Get the type
+	 */
+	typeid = typenameTypeId(NULL, stmt->type_name);
+	typtype = get_typtype(typeid);
+
+	if (typtype == TYPTYPE_PSEUDO)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("data type %s is a pseudo-type",
+						TypeNameToString(stmt->type_name))));
+
+	if (typtype == TYPTYPE_DOMAIN)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("data type %s is a domain",
+						TypeNameToString(stmt->type_name))));
+
+	if (!pg_type_ownercheck(typeid, GetUserId()))
+		aclcheck_error_type(ACLCHECK_NOT_OWNER, typeid);
+
+	aclresult = pg_type_aclcheck(typeid, GetUserId(), ACL_USAGE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error_type(aclresult, typeid);
+
+	/*
+	 * Get the language
+	 */
+	langid = get_language_oid(stmt->lang, false);
+
+	aclresult = pg_language_aclcheck(langid, GetUserId(), ACL_USAGE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_LANGUAGE, stmt->lang);
+
+	/*
+	 * Get the functions
+	 */
+	if (stmt->fromsql)
+	{
+		fromsqlfuncid = LookupFuncWithArgs(OBJECT_FUNCTION, stmt->fromsql, false);
+
+		if (!pg_proc_ownercheck(fromsqlfuncid, GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, NameListToString(stmt->fromsql->objname));
+
+		aclresult = pg_proc_aclcheck(fromsqlfuncid, GetUserId(), ACL_EXECUTE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_FUNCTION, NameListToString(stmt->fromsql->objname));
+
+		tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(fromsqlfuncid));
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "cache lookup failed for function %u", fromsqlfuncid);
+		procstruct = (Form_pg_proc) GETSTRUCT(tuple);
+		if (procstruct->prorettype != INTERNALOID)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("return data type of FROM SQL function must be %s",
+							"internal")));
+		check_transform_function(procstruct);
+		ReleaseSysCache(tuple);
+	}
+	else
+		fromsqlfuncid = InvalidOid;
+
+	if (stmt->tosql)
+	{
+		tosqlfuncid = LookupFuncWithArgs(OBJECT_FUNCTION, stmt->tosql, false);
+
+		if (!pg_proc_ownercheck(tosqlfuncid, GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, NameListToString(stmt->tosql->objname));
+
+		aclresult = pg_proc_aclcheck(tosqlfuncid, GetUserId(), ACL_EXECUTE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_FUNCTION, NameListToString(stmt->tosql->objname));
+
+		tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(tosqlfuncid));
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "cache lookup failed for function %u", tosqlfuncid);
+		procstruct = (Form_pg_proc) GETSTRUCT(tuple);
+		if (procstruct->prorettype != typeid)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("return data type of TO SQL function must be the transform data type")));
+		check_transform_function(procstruct);
+		ReleaseSysCache(tuple);
+	}
+	else
+		tosqlfuncid = InvalidOid;
+
+	/*
+	 * Ready to go
+	 */
+	values[Anum_pg_transform_trftype - 1] = ObjectIdGetDatum(typeid);
+	values[Anum_pg_transform_trflang - 1] = ObjectIdGetDatum(langid);
+	values[Anum_pg_transform_trffromsql - 1] = ObjectIdGetDatum(fromsqlfuncid);
+	values[Anum_pg_transform_trftosql - 1] = ObjectIdGetDatum(tosqlfuncid);
+
+	MemSet(nulls, false, sizeof(nulls));
+
+	relation = table_open(TransformRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCache2(TRFTYPELANG,
+							ObjectIdGetDatum(typeid),
+							ObjectIdGetDatum(langid));
+	if (HeapTupleIsValid(tuple))
+	{
+		Form_pg_transform form = (Form_pg_transform) GETSTRUCT(tuple);
+
+		if (!stmt->replace)
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("transform for type %s language \"%s\" already exists",
+							format_type_be(typeid),
+							stmt->lang)));
+
+		MemSet(replaces, false, sizeof(replaces));
+		replaces[Anum_pg_transform_trffromsql - 1] = true;
+		replaces[Anum_pg_transform_trftosql - 1] = true;
+
+		newtuple = heap_modify_tuple(tuple, RelationGetDescr(relation), values, nulls, replaces);
+		CatalogTupleUpdate(relation, &newtuple->t_self, newtuple);
+
+		transformid = form->oid;
+		ReleaseSysCache(tuple);
+		is_replace = true;
+	}
+	else
+	{
+		transformid = GetNewOidWithIndex(relation, TransformOidIndexId,
+										 Anum_pg_transform_oid);
+		values[Anum_pg_transform_oid - 1] = ObjectIdGetDatum(transformid);
+		newtuple = heap_form_tuple(RelationGetDescr(relation), values, nulls);
+		CatalogTupleInsert(relation, newtuple);
+		is_replace = false;
+	}
+
+	if (is_replace)
+		deleteDependencyRecordsFor(TransformRelationId, transformid, true);
+
+	addrs = new_object_addresses();
+
+	/* make dependency entries */
+	ObjectAddressSet(myself, TransformRelationId, transformid);
+
+	/* dependency on language */
+	ObjectAddressSet(referenced, LanguageRelationId, langid);
+	add_exact_object_address(&referenced, addrs);
+
+	/* dependency on type */
+	ObjectAddressSet(referenced, TypeRelationId, typeid);
+	add_exact_object_address(&referenced, addrs);
+
+	/* dependencies on functions */
+	if (OidIsValid(fromsqlfuncid))
+	{
+		ObjectAddressSet(referenced, ProcedureRelationId, fromsqlfuncid);
+		add_exact_object_address(&referenced, addrs);
+	}
+	if (OidIsValid(tosqlfuncid))
+	{
+		ObjectAddressSet(referenced, ProcedureRelationId, tosqlfuncid);
+		add_exact_object_address(&referenced, addrs);
+	}
+
+	record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
+	free_object_addresses(addrs);
+
+	/* dependency on extension */
+	recordDependencyOnCurrentExtension(&myself, is_replace);
+
+	/* Post creation hook for new transform */
+	InvokeObjectPostCreateHook(TransformRelationId, transformid, 0);
+
+	heap_freetuple(newtuple);
+
+	table_close(relation, RowExclusiveLock);
+
+	return myself;
+}
+
+
+/*
+ * get_transform_oid - given type OID and language OID, look up a transform OID
+ *
+ * If missing_ok is false, throw an error if the transform is not found.  If
+ * true, just return InvalidOid.
+ */
+Oid
+get_transform_oid(Oid type_id, Oid lang_id, bool missing_ok)
+{
+	Oid			oid;
+
+	oid = GetSysCacheOid2(TRFTYPELANG, Anum_pg_transform_oid,
+						  ObjectIdGetDatum(type_id),
+						  ObjectIdGetDatum(lang_id));
+	if (!OidIsValid(oid) && !missing_ok)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("transform for type %s language \"%s\" does not exist",
+						format_type_be(type_id),
+						get_language_name(lang_id, false))));
+	return oid;
+}
+
+
+/*
+ * Subroutine for ALTER FUNCTION/AGGREGATE SET SCHEMA/RENAME
+ *
+ * Is there a function with the given name and signature already in the given
+ * namespace?  If so, raise an appropriate error message.
+ */
+void
+IsThereFunctionInNamespace(const char *proname, int pronargs,
+						   oidvector *proargtypes, Oid nspOid)
+{
+	/* check for duplicate name (more friendly than unique-index failure) */
+	if (SearchSysCacheExists3(PROCNAMEARGSNSP,
+							  CStringGetDatum(proname),
+							  PointerGetDatum(proargtypes),
+							  ObjectIdGetDatum(nspOid)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_FUNCTION),
+				 errmsg("function %s already exists in schema \"%s\"",
+						funcname_signature_string(proname, pronargs,
+												  NIL, proargtypes->values),
+						get_namespace_name(nspOid))));
+}
+
+/*
+ * ExecuteDoStmt
+ *		Execute inline procedural-language code
+ *
+ * See at ExecuteCallStmt() about the atomic argument.
+ */
+void
+ExecuteDoStmt(ParseState *pstate, DoStmt *stmt, bool atomic)
+{
+	InlineCodeBlock *codeblock = makeNode(InlineCodeBlock);
+	ListCell   *arg;
+	DefElem    *as_item = NULL;
+	DefElem    *language_item = NULL;
+	char	   *language;
+	Oid			laninline;
+	HeapTuple	languageTuple;
+	Form_pg_language languageStruct;
+
+	/* Process options we got from gram.y */
+	foreach(arg, stmt->args)
+	{
+		DefElem    *defel = (DefElem *) lfirst(arg);
+
+		if (strcmp(defel->defname, "as") == 0)
+		{
+			if (as_item)
+				errorConflictingDefElem(defel, pstate);
+			as_item = defel;
+		}
+		else if (strcmp(defel->defname, "language") == 0)
+		{
+			if (language_item)
+				errorConflictingDefElem(defel, pstate);
+			language_item = defel;
+		}
+		else
+			elog(ERROR, "option \"%s\" not recognized",
+				 defel->defname);
+	}
+
+	if (as_item)
+		codeblock->source_text = strVal(as_item->arg);
+	else
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("no inline code specified")));
+
+	/* if LANGUAGE option wasn't specified, use the default */
+	if (language_item)
+		language = strVal(language_item->arg);
+	else
+		language = "plpgsql";
+
+	/* Look up the language and validate permissions */
+	languageTuple = SearchSysCache1(LANGNAME, PointerGetDatum(language));
+	if (!HeapTupleIsValid(languageTuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("language \"%s\" does not exist", language),
+				 (extension_file_exists(language) ?
+				  errhint("Use CREATE EXTENSION to load the language into the database.") : 0)));
+
+	languageStruct = (Form_pg_language) GETSTRUCT(languageTuple);
+	codeblock->langOid = languageStruct->oid;
+	codeblock->langIsTrusted = languageStruct->lanpltrusted;
+	codeblock->atomic = atomic;
+
+	if (languageStruct->lanpltrusted)
+	{
+		/* if trusted language, need USAGE privilege */
+		AclResult	aclresult;
+
+		aclresult = pg_language_aclcheck(codeblock->langOid, GetUserId(),
+										 ACL_USAGE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_LANGUAGE,
+						   NameStr(languageStruct->lanname));
+	}
+	else
+	{
+		/* if untrusted language, must be superuser */
+		if (!superuser())
+			aclcheck_error(ACLCHECK_NO_PRIV, OBJECT_LANGUAGE,
+						   NameStr(languageStruct->lanname));
+	}
+
+	/* get the handler function's OID */
+	laninline = languageStruct->laninline;
+	if (!OidIsValid(laninline))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("language \"%s\" does not support inline code execution",
+						NameStr(languageStruct->lanname))));
+
+	ReleaseSysCache(languageTuple);
+
+	/* execute the inline handler */
+	OidFunctionCall1(laninline, PointerGetDatum(codeblock));
+}
+
+/*
+ * Execute CALL statement
+ *
+ * Inside a top-level CALL statement, transaction-terminating commands such as
+ * COMMIT or a PL-specific equivalent are allowed.  The terminology in the SQL
+ * standard is that CALL establishes a non-atomic execution context.  Most
+ * other commands establish an atomic execution context, in which transaction
+ * control actions are not allowed.  If there are nested executions of CALL,
+ * we want to track the execution context recursively, so that the nested
+ * CALLs can also do transaction control.  Note, however, that for example in
+ * CALL -> SELECT -> CALL, the second call cannot do transaction control,
+ * because the SELECT in between establishes an atomic execution context.
+ *
+ * So when ExecuteCallStmt() is called from the top level, we pass in atomic =
+ * false (recall that that means transactions = yes).  We then create a
+ * CallContext node with content atomic = false, which is passed in the
+ * fcinfo->context field to the procedure invocation.  The language
+ * implementation should then take appropriate measures to allow or prevent
+ * transaction commands based on that information, e.g., call
+ * SPI_connect_ext(SPI_OPT_NONATOMIC).  The language should also pass on the
+ * atomic flag to any nested invocations to CALL.
+ *
+ * The expression data structures and execution context that we create
+ * within this function are children of the portalContext of the Portal
+ * that the CALL utility statement runs in.  Therefore, any pass-by-ref
+ * values that we're passing to the procedure will survive transaction
+ * commits that might occur inside the procedure.
+ */
+void
+ExecuteCallStmt(CallStmt *stmt, ParamListInfo params, bool atomic, DestReceiver *dest)
+{
+	LOCAL_FCINFO(fcinfo, FUNC_MAX_ARGS);
+	ListCell   *lc;
+	FuncExpr   *fexpr;
+	int			nargs;
+	int			i;
+	AclResult	aclresult;
+	FmgrInfo	flinfo;
+	CallContext *callcontext;
+	EState	   *estate;
+	ExprContext *econtext;
+	HeapTuple	tp;
+	PgStat_FunctionCallUsage fcusage;
+	Datum		retval;
+
+	fexpr = stmt->funcexpr;
+	Assert(fexpr);
+	Assert(IsA(fexpr, FuncExpr));
+
+	aclresult = pg_proc_aclcheck(fexpr->funcid, GetUserId(), ACL_EXECUTE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_PROCEDURE, get_func_name(fexpr->funcid));
+
+	/* Prep the context object we'll pass to the procedure */
+	callcontext = makeNode(CallContext);
+	callcontext->atomic = atomic;
+
+	tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(fexpr->funcid));
+	if (!HeapTupleIsValid(tp))
+		elog(ERROR, "cache lookup failed for function %u", fexpr->funcid);
+
+	/*
+	 * If proconfig is set we can't allow transaction commands because of the
+	 * way the GUC stacking works: The transaction boundary would have to pop
+	 * the proconfig setting off the stack.  That restriction could be lifted
+	 * by redesigning the GUC nesting mechanism a bit.
+	 */
+	if (!heap_attisnull(tp, Anum_pg_proc_proconfig, NULL))
+		callcontext->atomic = true;
+
+	/*
+	 * In security definer procedures, we can't allow transaction commands.
+	 * StartTransaction() insists that the security context stack is empty,
+	 * and AbortTransaction() resets the security context.  This could be
+	 * reorganized, but right now it doesn't work.
+	 */
+	if (((Form_pg_proc) GETSTRUCT(tp))->prosecdef)
+		callcontext->atomic = true;
+
+	ReleaseSysCache(tp);
+
+	/* safety check; see ExecInitFunc() */
+	nargs = list_length(fexpr->args);
+	if (nargs > FUNC_MAX_ARGS)
+		ereport(ERROR,
+				(errcode(ERRCODE_TOO_MANY_ARGUMENTS),
+				 errmsg_plural("cannot pass more than %d argument to a procedure",
+							   "cannot pass more than %d arguments to a procedure",
+							   FUNC_MAX_ARGS,
+							   FUNC_MAX_ARGS)));
+
+	/* Initialize function call structure */
+	InvokeFunctionExecuteHook(fexpr->funcid);
+	fmgr_info(fexpr->funcid, &flinfo);
+	fmgr_info_set_expr((Node *) fexpr, &flinfo);
+	InitFunctionCallInfoData(*fcinfo, &flinfo, nargs, fexpr->inputcollid,
+							 (Node *) callcontext, NULL);
+
+	/*
+	 * Evaluate procedure arguments inside a suitable execution context.  Note
+	 * we can't free this context till the procedure returns.
+	 */
+	estate = CreateExecutorState();
+	estate->es_param_list_info = params;
+	econtext = CreateExprContext(estate);
+
+	/*
+	 * If we're called in non-atomic context, we also have to ensure that the
+	 * argument expressions run with an up-to-date snapshot.  Our caller will
+	 * have provided a current snapshot in atomic contexts, but not in
+	 * non-atomic contexts, because the possibility of a COMMIT/ROLLBACK
+	 * destroying the snapshot makes higher-level management too complicated.
+	 */
+	if (!atomic)
+		PushActiveSnapshot(GetTransactionSnapshot());
+
+	i = 0;
+	foreach(lc, fexpr->args)
+	{
+		ExprState  *exprstate;
+		Datum		val;
+		bool		isnull;
+
+		exprstate = ExecPrepareExpr(lfirst(lc), estate);
+
+		val = ExecEvalExprSwitchContext(exprstate, econtext, &isnull);
+
+		fcinfo->args[i].value = val;
+		fcinfo->args[i].isnull = isnull;
+
+		i++;
+	}
+
+	/* Get rid of temporary snapshot for arguments, if we made one */
+	if (!atomic)
+		PopActiveSnapshot();
+
+	/* Here we actually call the procedure */
+	pgstat_init_function_usage(fcinfo, &fcusage);
+	retval = FunctionCallInvoke(fcinfo);
+	pgstat_end_function_usage(&fcusage, true);
+
+	/* Handle the procedure's outputs */
+	if (fexpr->funcresulttype == VOIDOID)
+	{
+		/* do nothing */
+	}
+	else if (fexpr->funcresulttype == RECORDOID)
+	{
+		/* send tuple to client */
+		HeapTupleHeader td;
+		Oid			tupType;
+		int32		tupTypmod;
+		TupleDesc	retdesc;
+		HeapTupleData rettupdata;
+		TupOutputState *tstate;
+		TupleTableSlot *slot;
+
+		if (fcinfo->isnull)
+			elog(ERROR, "procedure returned null record");
+
+		/*
+		 * Ensure there's an active snapshot whilst we execute whatever's
+		 * involved here.  Note that this is *not* sufficient to make the
+		 * world safe for TOAST pointers to be included in the returned data:
+		 * the referenced data could have gone away while we didn't hold a
+		 * snapshot.  Hence, it's incumbent on PLs that can do COMMIT/ROLLBACK
+		 * to not return TOAST pointers, unless those pointers were fetched
+		 * after the last COMMIT/ROLLBACK in the procedure.
+		 *
+		 * XXX that is a really nasty, hard-to-test requirement.  Is there a
+		 * way to remove it?
+		 */
+		EnsurePortalSnapshotExists();
+
+		td = DatumGetHeapTupleHeader(retval);
+		tupType = HeapTupleHeaderGetTypeId(td);
+		tupTypmod = HeapTupleHeaderGetTypMod(td);
+		retdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
+
+		tstate = begin_tup_output_tupdesc(dest, retdesc,
+										  &TTSOpsHeapTuple);
+
+		rettupdata.t_len = HeapTupleHeaderGetDatumLength(td);
+		ItemPointerSetInvalid(&(rettupdata.t_self));
+		rettupdata.t_tableOid = InvalidOid;
+		rettupdata.t_data = td;
+
+		slot = ExecStoreHeapTuple(&rettupdata, tstate->slot, false);
+		tstate->dest->receiveSlot(slot, tstate->dest);
+
+		end_tup_output(tstate);
+
+		ReleaseTupleDesc(retdesc);
+	}
+	else
+		elog(ERROR, "unexpected result type for procedure: %u",
+			 fexpr->funcresulttype);
+
+	FreeExecutorState(estate);
+}
+
+/*
+ * Construct the tuple descriptor for a CALL statement return
+ */
+TupleDesc
+CallStmtResultDesc(CallStmt *stmt)
+{
+	FuncExpr   *fexpr;
+	HeapTuple	tuple;
+	TupleDesc	tupdesc;
+
+	fexpr = stmt->funcexpr;
+
+	tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(fexpr->funcid));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for procedure %u", fexpr->funcid);
+
+	tupdesc = build_function_result_tupdesc_t(tuple);
+
+	ReleaseSysCache(tuple);
+
+	return tupdesc;
+}
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
new file mode 100644
index 0000000..d3f7b09
--- /dev/null
+++ b/src/backend/commands/indexcmds.c
@@ -0,0 +1,4355 @@
+/*-------------------------------------------------------------------------
+ *
+ * indexcmds.c
+ *	  POSTGRES define and remove index code.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/indexcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/amapi.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/reloptions.h"
+#include "access/sysattr.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/index.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_constraint.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_tablespace.h"
+#include "catalog/pg_type.h"
+#include "commands/comment.h"
+#include "commands/dbcommands.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/progress.h"
+#include "commands/tablecmds.h"
+#include "commands/tablespace.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_func.h"
+#include "parser/parse_oper.h"
+#include "partitioning/partdesc.h"
+#include "pgstat.h"
+#include "rewrite/rewriteManip.h"
+#include "storage/lmgr.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "storage/sinvaladt.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/partcache.h"
+#include "utils/pg_rusage.h"
+#include "utils/regproc.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+
+
+/* non-export function prototypes */
+static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
+static void CheckPredicate(Expr *predicate);
+static void ComputeIndexAttrs(IndexInfo *indexInfo,
+							  Oid *typeOidP,
+							  Oid *collationOidP,
+							  Oid *classOidP,
+							  int16 *colOptionP,
+							  List *attList,
+							  List *exclusionOpNames,
+							  Oid relId,
+							  const char *accessMethodName, Oid accessMethodId,
+							  bool amcanorder,
+							  bool isconstraint,
+							  Oid ddl_userid,
+							  int ddl_sec_context,
+							  int *ddl_save_nestlevel);
+static char *ChooseIndexName(const char *tabname, Oid namespaceId,
+							 List *colnames, List *exclusionOpNames,
+							 bool primary, bool isconstraint);
+static char *ChooseIndexNameAddition(List *colnames);
+static List *ChooseIndexColumnNames(List *indexElems);
+static void ReindexIndex(RangeVar *indexRelation, ReindexParams *params,
+						 bool isTopLevel);
+static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
+											Oid relId, Oid oldRelId, void *arg);
+static Oid	ReindexTable(RangeVar *relation, ReindexParams *params,
+						 bool isTopLevel);
+static void ReindexMultipleTables(const char *objectName,
+								  ReindexObjectType objectKind, ReindexParams *params);
+static void reindex_error_callback(void *args);
+static void ReindexPartitions(Oid relid, ReindexParams *params,
+							  bool isTopLevel);
+static void ReindexMultipleInternal(List *relids,
+									ReindexParams *params);
+static bool ReindexRelationConcurrently(Oid relationOid,
+										ReindexParams *params);
+static void update_relispartition(Oid relationId, bool newval);
+static inline void set_indexsafe_procflags(void);
+
+/*
+ * callback argument type for RangeVarCallbackForReindexIndex()
+ */
+struct ReindexIndexCallbackState
+{
+	ReindexParams params;		/* options from statement */
+	Oid			locked_table_oid;	/* tracks previously locked table */
+};
+
+/*
+ * callback arguments for reindex_error_callback()
+ */
+typedef struct ReindexErrorInfo
+{
+	char	   *relname;
+	char	   *relnamespace;
+	char		relkind;
+} ReindexErrorInfo;
+
+/*
+ * CheckIndexCompatible
+ *		Determine whether an existing index definition is compatible with a
+ *		prospective index definition, such that the existing index storage
+ *		could become the storage of the new index, avoiding a rebuild.
+ *
+ * 'oldId': the OID of the existing index
+ * 'accessMethodName': name of the AM to use.
+ * 'attributeList': a list of IndexElem specifying columns and expressions
+ *		to index on.
+ * 'exclusionOpNames': list of names of exclusion-constraint operators,
+ *		or NIL if not an exclusion constraint.
+ *
+ * This is tailored to the needs of ALTER TABLE ALTER TYPE, which recreates
+ * any indexes that depended on a changing column from their pg_get_indexdef
+ * or pg_get_constraintdef definitions.  We omit some of the sanity checks of
+ * DefineIndex.  We assume that the old and new indexes have the same number
+ * of columns and that if one has an expression column or predicate, both do.
+ * Errors arising from the attribute list still apply.
+ *
+ * Most column type changes that can skip a table rewrite do not invalidate
+ * indexes.  We acknowledge this when all operator classes, collations and
+ * exclusion operators match.  Though we could further permit intra-opfamily
+ * changes for btree and hash indexes, that adds subtle complexity with no
+ * concrete benefit for core types. Note, that INCLUDE columns aren't
+ * checked by this function, for them it's enough that table rewrite is
+ * skipped.
+ *
+ * When a comparison or exclusion operator has a polymorphic input type, the
+ * actual input types must also match.  This defends against the possibility
+ * that operators could vary behavior in response to get_fn_expr_argtype().
+ * At present, this hazard is theoretical: check_exclusion_constraint() and
+ * all core index access methods decline to set fn_expr for such calls.
+ *
+ * We do not yet implement a test to verify compatibility of expression
+ * columns or predicates, so assume any such index is incompatible.
+ */
+bool
+CheckIndexCompatible(Oid oldId,
+					 const char *accessMethodName,
+					 List *attributeList,
+					 List *exclusionOpNames)
+{
+	bool		isconstraint;
+	Oid		   *typeObjectId;
+	Oid		   *collationObjectId;
+	Oid		   *classObjectId;
+	Oid			accessMethodId;
+	Oid			relationId;
+	HeapTuple	tuple;
+	Form_pg_index indexForm;
+	Form_pg_am	accessMethodForm;
+	IndexAmRoutine *amRoutine;
+	bool		amcanorder;
+	int16	   *coloptions;
+	IndexInfo  *indexInfo;
+	int			numberOfAttributes;
+	int			old_natts;
+	bool		isnull;
+	bool		ret = true;
+	oidvector  *old_indclass;
+	oidvector  *old_indcollation;
+	Relation	irel;
+	int			i;
+	Datum		d;
+
+	/* Caller should already have the relation locked in some way. */
+	relationId = IndexGetRelation(oldId, false);
+
+	/*
+	 * We can pretend isconstraint = false unconditionally.  It only serves to
+	 * decide the text of an error message that should never happen for us.
+	 */
+	isconstraint = false;
+
+	numberOfAttributes = list_length(attributeList);
+	Assert(numberOfAttributes > 0);
+	Assert(numberOfAttributes <= INDEX_MAX_KEYS);
+
+	/* look up the access method */
+	tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("access method \"%s\" does not exist",
+						accessMethodName)));
+	accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
+	accessMethodId = accessMethodForm->oid;
+	amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
+	ReleaseSysCache(tuple);
+
+	amcanorder = amRoutine->amcanorder;
+
+	/*
+	 * Compute the operator classes, collations, and exclusion operators for
+	 * the new index, so we can test whether it's compatible with the existing
+	 * one.  Note that ComputeIndexAttrs might fail here, but that's OK:
+	 * DefineIndex would have failed later.  Our attributeList contains only
+	 * key attributes, thus we're filling ii_NumIndexAttrs and
+	 * ii_NumIndexKeyAttrs with same value.
+	 */
+	indexInfo = makeIndexInfo(numberOfAttributes, numberOfAttributes,
+							  accessMethodId, NIL, NIL, false, false, false, false);
+	typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
+	collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
+	classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
+	coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16));
+	ComputeIndexAttrs(indexInfo,
+					  typeObjectId, collationObjectId, classObjectId,
+					  coloptions, attributeList,
+					  exclusionOpNames, relationId,
+					  accessMethodName, accessMethodId,
+					  amcanorder, isconstraint, InvalidOid, 0, NULL);
+
+
+	/* Get the soon-obsolete pg_index tuple. */
+	tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldId));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for index %u", oldId);
+	indexForm = (Form_pg_index) GETSTRUCT(tuple);
+
+	/*
+	 * We don't assess expressions or predicates; assume incompatibility.
+	 * Also, if the index is invalid for any reason, treat it as incompatible.
+	 */
+	if (!(heap_attisnull(tuple, Anum_pg_index_indpred, NULL) &&
+		  heap_attisnull(tuple, Anum_pg_index_indexprs, NULL) &&
+		  indexForm->indisvalid))
+	{
+		ReleaseSysCache(tuple);
+		return false;
+	}
+
+	/* Any change in operator class or collation breaks compatibility. */
+	old_natts = indexForm->indnkeyatts;
+	Assert(old_natts == numberOfAttributes);
+
+	d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indcollation, &isnull);
+	Assert(!isnull);
+	old_indcollation = (oidvector *) DatumGetPointer(d);
+
+	d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indclass, &isnull);
+	Assert(!isnull);
+	old_indclass = (oidvector *) DatumGetPointer(d);
+
+	ret = (memcmp(old_indclass->values, classObjectId,
+				  old_natts * sizeof(Oid)) == 0 &&
+		   memcmp(old_indcollation->values, collationObjectId,
+				  old_natts * sizeof(Oid)) == 0);
+
+	ReleaseSysCache(tuple);
+
+	if (!ret)
+		return false;
+
+	/* For polymorphic opcintype, column type changes break compatibility. */
+	irel = index_open(oldId, AccessShareLock);	/* caller probably has a lock */
+	for (i = 0; i < old_natts; i++)
+	{
+		if (IsPolymorphicType(get_opclass_input_type(classObjectId[i])) &&
+			TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i])
+		{
+			ret = false;
+			break;
+		}
+	}
+
+	/* Any change in opclass options break compatibility. */
+	if (ret)
+	{
+		Datum	   *opclassOptions = RelationGetIndexRawAttOptions(irel);
+
+		ret = CompareOpclassOptions(opclassOptions,
+									indexInfo->ii_OpclassOptions, old_natts);
+
+		if (opclassOptions)
+			pfree(opclassOptions);
+	}
+
+	/* Any change in exclusion operator selections breaks compatibility. */
+	if (ret && indexInfo->ii_ExclusionOps != NULL)
+	{
+		Oid		   *old_operators,
+				   *old_procs;
+		uint16	   *old_strats;
+
+		RelationGetExclusionInfo(irel, &old_operators, &old_procs, &old_strats);
+		ret = memcmp(old_operators, indexInfo->ii_ExclusionOps,
+					 old_natts * sizeof(Oid)) == 0;
+
+		/* Require an exact input type match for polymorphic operators. */
+		if (ret)
+		{
+			for (i = 0; i < old_natts && ret; i++)
+			{
+				Oid			left,
+							right;
+
+				op_input_types(indexInfo->ii_ExclusionOps[i], &left, &right);
+				if ((IsPolymorphicType(left) || IsPolymorphicType(right)) &&
+					TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i])
+				{
+					ret = false;
+					break;
+				}
+			}
+		}
+	}
+
+	index_close(irel, NoLock);
+	return ret;
+}
+
+/*
+ * CompareOpclassOptions
+ *
+ * Compare per-column opclass options which are represented by arrays of text[]
+ * datums.  Both elements of arrays and array themselves can be NULL.
+ */
+static bool
+CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts)
+{
+	int			i;
+
+	if (!opts1 && !opts2)
+		return true;
+
+	for (i = 0; i < natts; i++)
+	{
+		Datum		opt1 = opts1 ? opts1[i] : (Datum) 0;
+		Datum		opt2 = opts2 ? opts2[i] : (Datum) 0;
+
+		if (opt1 == (Datum) 0)
+		{
+			if (opt2 == (Datum) 0)
+				continue;
+			else
+				return false;
+		}
+		else if (opt2 == (Datum) 0)
+			return false;
+
+		/* Compare non-NULL text[] datums. */
+		if (!DatumGetBool(DirectFunctionCall2(array_eq, opt1, opt2)))
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * WaitForOlderSnapshots
+ *
+ * Wait for transactions that might have an older snapshot than the given xmin
+ * limit, because it might not contain tuples deleted just before it has
+ * been taken. Obtain a list of VXIDs of such transactions, and wait for them
+ * individually. This is used when building an index concurrently.
+ *
+ * We can exclude any running transactions that have xmin > the xmin given;
+ * their oldest snapshot must be newer than our xmin limit.
+ * We can also exclude any transactions that have xmin = zero, since they
+ * evidently have no live snapshot at all (and any one they might be in
+ * process of taking is certainly newer than ours).  Transactions in other
+ * DBs can be ignored too, since they'll never even be able to see the
+ * index being worked on.
+ *
+ * We can also exclude autovacuum processes and processes running manual
+ * lazy VACUUMs, because they won't be fazed by missing index entries
+ * either.  (Manual ANALYZEs, however, can't be excluded because they
+ * might be within transactions that are going to do arbitrary operations
+ * later.)  Processes running CREATE INDEX CONCURRENTLY or REINDEX CONCURRENTLY
+ * on indexes that are neither expressional nor partial are also safe to
+ * ignore, since we know that those processes won't examine any data
+ * outside the table they're indexing.
+ *
+ * Also, GetCurrentVirtualXIDs never reports our own vxid, so we need not
+ * check for that.
+ *
+ * If a process goes idle-in-transaction with xmin zero, we do not need to
+ * wait for it anymore, per the above argument.  We do not have the
+ * infrastructure right now to stop waiting if that happens, but we can at
+ * least avoid the folly of waiting when it is idle at the time we would
+ * begin to wait.  We do this by repeatedly rechecking the output of
+ * GetCurrentVirtualXIDs.  If, during any iteration, a particular vxid
+ * doesn't show up in the output, we know we can forget about it.
+ */
+void
+WaitForOlderSnapshots(TransactionId limitXmin, bool progress)
+{
+	int			n_old_snapshots;
+	int			i;
+	VirtualTransactionId *old_snapshots;
+
+	old_snapshots = GetCurrentVirtualXIDs(limitXmin, true, false,
+										  PROC_IS_AUTOVACUUM | PROC_IN_VACUUM
+										  | PROC_IN_SAFE_IC,
+										  &n_old_snapshots);
+	if (progress)
+		pgstat_progress_update_param(PROGRESS_WAITFOR_TOTAL, n_old_snapshots);
+
+	for (i = 0; i < n_old_snapshots; i++)
+	{
+		if (!VirtualTransactionIdIsValid(old_snapshots[i]))
+			continue;			/* found uninteresting in previous cycle */
+
+		if (i > 0)
+		{
+			/* see if anything's changed ... */
+			VirtualTransactionId *newer_snapshots;
+			int			n_newer_snapshots;
+			int			j;
+			int			k;
+
+			newer_snapshots = GetCurrentVirtualXIDs(limitXmin,
+													true, false,
+													PROC_IS_AUTOVACUUM | PROC_IN_VACUUM
+													| PROC_IN_SAFE_IC,
+													&n_newer_snapshots);
+			for (j = i; j < n_old_snapshots; j++)
+			{
+				if (!VirtualTransactionIdIsValid(old_snapshots[j]))
+					continue;	/* found uninteresting in previous cycle */
+				for (k = 0; k < n_newer_snapshots; k++)
+				{
+					if (VirtualTransactionIdEquals(old_snapshots[j],
+												   newer_snapshots[k]))
+						break;
+				}
+				if (k >= n_newer_snapshots) /* not there anymore */
+					SetInvalidVirtualTransactionId(old_snapshots[j]);
+			}
+			pfree(newer_snapshots);
+		}
+
+		if (VirtualTransactionIdIsValid(old_snapshots[i]))
+		{
+			/* If requested, publish who we're going to wait for. */
+			if (progress)
+			{
+				PGPROC	   *holder = BackendIdGetProc(old_snapshots[i].backendId);
+
+				if (holder)
+					pgstat_progress_update_param(PROGRESS_WAITFOR_CURRENT_PID,
+												 holder->pid);
+			}
+			VirtualXactLock(old_snapshots[i], true);
+		}
+
+		if (progress)
+			pgstat_progress_update_param(PROGRESS_WAITFOR_DONE, i + 1);
+	}
+}
+
+
+/*
+ * DefineIndex
+ *		Creates a new index.
+ *
+ * This function manages the current userid according to the needs of pg_dump.
+ * Recreating old-database catalog entries in new-database is fine, regardless
+ * of which users would have permission to recreate those entries now.  That's
+ * just preservation of state.  Running opaque expressions, like calling a
+ * function named in a catalog entry or evaluating a pg_node_tree in a catalog
+ * entry, as anyone other than the object owner, is not fine.  To adhere to
+ * those principles and to remain fail-safe, use the table owner userid for
+ * most ACL checks.  Use the original userid for ACL checks reached without
+ * traversing opaque expressions.  (pg_dump can predict such ACL checks from
+ * catalogs.)  Overall, this is a mess.  Future DDL development should
+ * consider offering one DDL command for catalog setup and a separate DDL
+ * command for steps that run opaque expressions.
+ *
+ * 'relationId': the OID of the heap relation on which the index is to be
+ *		created
+ * 'stmt': IndexStmt describing the properties of the new index.
+ * 'indexRelationId': normally InvalidOid, but during bootstrap can be
+ *		nonzero to specify a preselected OID for the index.
+ * 'parentIndexId': the OID of the parent index; InvalidOid if not the child
+ *		of a partitioned index.
+ * 'parentConstraintId': the OID of the parent constraint; InvalidOid if not
+ *		the child of a constraint (only used when recursing)
+ * 'is_alter_table': this is due to an ALTER rather than a CREATE operation.
+ * 'check_rights': check for CREATE rights in namespace and tablespace.  (This
+ *		should be true except when ALTER is deleting/recreating an index.)
+ * 'check_not_in_use': check for table not already in use in current session.
+ *		This should be true unless caller is holding the table open, in which
+ *		case the caller had better have checked it earlier.
+ * 'skip_build': make the catalog entries but don't create the index files
+ * 'quiet': suppress the NOTICE chatter ordinarily provided for constraints.
+ *
+ * Returns the object address of the created index.
+ */
+ObjectAddress
+DefineIndex(Oid relationId,
+			IndexStmt *stmt,
+			Oid indexRelationId,
+			Oid parentIndexId,
+			Oid parentConstraintId,
+			bool is_alter_table,
+			bool check_rights,
+			bool check_not_in_use,
+			bool skip_build,
+			bool quiet)
+{
+	bool		concurrent;
+	char	   *indexRelationName;
+	char	   *accessMethodName;
+	Oid		   *typeObjectId;
+	Oid		   *collationObjectId;
+	Oid		   *classObjectId;
+	Oid			accessMethodId;
+	Oid			namespaceId;
+	Oid			tablespaceId;
+	Oid			createdConstraintId = InvalidOid;
+	List	   *indexColNames;
+	List	   *allIndexParams;
+	Relation	rel;
+	HeapTuple	tuple;
+	Form_pg_am	accessMethodForm;
+	IndexAmRoutine *amRoutine;
+	bool		amcanorder;
+	amoptions_function amoptions;
+	bool		partitioned;
+	bool		safe_index;
+	Datum		reloptions;
+	int16	   *coloptions;
+	IndexInfo  *indexInfo;
+	bits16		flags;
+	bits16		constr_flags;
+	int			numberOfAttributes;
+	int			numberOfKeyAttributes;
+	TransactionId limitXmin;
+	ObjectAddress address;
+	LockRelId	heaprelid;
+	LOCKTAG		heaplocktag;
+	LOCKMODE	lockmode;
+	Snapshot	snapshot;
+	Oid			root_save_userid;
+	int			root_save_sec_context;
+	int			root_save_nestlevel;
+	int			i;
+
+	root_save_nestlevel = NewGUCNestLevel();
+
+	/*
+	 * Some callers need us to run with an empty default_tablespace; this is a
+	 * necessary hack to be able to reproduce catalog state accurately when
+	 * recreating indexes after table-rewriting ALTER TABLE.
+	 */
+	if (stmt->reset_default_tblspc)
+		(void) set_config_option("default_tablespace", "",
+								 PGC_USERSET, PGC_S_SESSION,
+								 GUC_ACTION_SAVE, true, 0, false);
+
+	/*
+	 * Force non-concurrent build on temporary relations, even if CONCURRENTLY
+	 * was requested.  Other backends can't access a temporary relation, so
+	 * there's no harm in grabbing a stronger lock, and a non-concurrent DROP
+	 * is more efficient.  Do this before any use of the concurrent option is
+	 * done.
+	 */
+	if (stmt->concurrent && get_rel_persistence(relationId) != RELPERSISTENCE_TEMP)
+		concurrent = true;
+	else
+		concurrent = false;
+
+	/*
+	 * Start progress report.  If we're building a partition, this was already
+	 * done.
+	 */
+	if (!OidIsValid(parentIndexId))
+	{
+		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+									  relationId);
+		pgstat_progress_update_param(PROGRESS_CREATEIDX_COMMAND,
+									 concurrent ?
+									 PROGRESS_CREATEIDX_COMMAND_CREATE_CONCURRENTLY :
+									 PROGRESS_CREATEIDX_COMMAND_CREATE);
+	}
+
+	/*
+	 * No index OID to report yet
+	 */
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_INDEX_OID,
+								 InvalidOid);
+
+	/*
+	 * count key attributes in index
+	 */
+	numberOfKeyAttributes = list_length(stmt->indexParams);
+
+	/*
+	 * Calculate the new list of index columns including both key columns and
+	 * INCLUDE columns.  Later we can determine which of these are key
+	 * columns, and which are just part of the INCLUDE list by checking the
+	 * list position.  A list item in a position less than ii_NumIndexKeyAttrs
+	 * is part of the key columns, and anything equal to and over is part of
+	 * the INCLUDE columns.
+	 */
+	allIndexParams = list_concat_copy(stmt->indexParams,
+									  stmt->indexIncludingParams);
+	numberOfAttributes = list_length(allIndexParams);
+
+	if (numberOfKeyAttributes <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("must specify at least one column")));
+	if (numberOfAttributes > INDEX_MAX_KEYS)
+		ereport(ERROR,
+				(errcode(ERRCODE_TOO_MANY_COLUMNS),
+				 errmsg("cannot use more than %d columns in an index",
+						INDEX_MAX_KEYS)));
+
+	/*
+	 * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
+	 * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
+	 * (but not VACUUM).
+	 *
+	 * NB: Caller is responsible for making sure that relationId refers to the
+	 * relation on which the index should be built; except in bootstrap mode,
+	 * this will typically require the caller to have already locked the
+	 * relation.  To avoid lock upgrade hazards, that lock should be at least
+	 * as strong as the one we take here.
+	 *
+	 * NB: If the lock strength here ever changes, code that is run by
+	 * parallel workers under the control of certain particular ambuild
+	 * functions will need to be updated, too.
+	 */
+	lockmode = concurrent ? ShareUpdateExclusiveLock : ShareLock;
+	rel = table_open(relationId, lockmode);
+
+	/*
+	 * Switch to the table owner's userid, so that any index functions are run
+	 * as that user.  Also lock down security-restricted operations.  We
+	 * already arranged to make GUC variable changes local to this command.
+	 */
+	GetUserIdAndSecContext(&root_save_userid, &root_save_sec_context);
+	SetUserIdAndSecContext(rel->rd_rel->relowner,
+						   root_save_sec_context | SECURITY_RESTRICTED_OPERATION);
+
+	namespaceId = RelationGetNamespace(rel);
+
+	/* Ensure that it makes sense to index this kind of relation */
+	switch (rel->rd_rel->relkind)
+	{
+		case RELKIND_RELATION:
+		case RELKIND_MATVIEW:
+		case RELKIND_PARTITIONED_TABLE:
+			/* OK */
+			break;
+		default:
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot create index on relation \"%s\"",
+							RelationGetRelationName(rel)),
+					 errdetail_relkind_not_supported(rel->rd_rel->relkind)));
+			break;
+	}
+
+	/*
+	 * Establish behavior for partitioned tables, and verify sanity of
+	 * parameters.
+	 *
+	 * We do not build an actual index in this case; we only create a few
+	 * catalog entries.  The actual indexes are built by recursing for each
+	 * partition.
+	 */
+	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
+	if (partitioned)
+	{
+		/*
+		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
+		 * the error is thrown also for temporary tables.  Seems better to be
+		 * consistent, even though we could do it on temporary table because
+		 * we're not actually doing it concurrently.
+		 */
+		if (stmt->concurrent)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
+							RelationGetRelationName(rel))));
+		if (stmt->excludeOpNames)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot create exclusion constraints on partitioned table \"%s\"",
+							RelationGetRelationName(rel))));
+	}
+
+	/*
+	 * Don't try to CREATE INDEX on temp tables of other backends.
+	 */
+	if (RELATION_IS_OTHER_TEMP(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot create indexes on temporary tables of other sessions")));
+
+	/*
+	 * Unless our caller vouches for having checked this already, insist that
+	 * the table not be in use by our own session, either.  Otherwise we might
+	 * fail to make entries in the new index (for instance, if an INSERT or
+	 * UPDATE is in progress and has already made its list of target indexes).
+	 */
+	if (check_not_in_use)
+		CheckTableNotInUse(rel, "CREATE INDEX");
+
+	/*
+	 * Verify we (still) have CREATE rights in the rel's namespace.
+	 * (Presumably we did when the rel was created, but maybe not anymore.)
+	 * Skip check if caller doesn't want it.  Also skip check if
+	 * bootstrapping, since permissions machinery may not be working yet.
+	 */
+	if (check_rights && !IsBootstrapProcessingMode())
+	{
+		AclResult	aclresult;
+
+		aclresult = pg_namespace_aclcheck(namespaceId, root_save_userid,
+										  ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_SCHEMA,
+						   get_namespace_name(namespaceId));
+	}
+
+	/*
+	 * Select tablespace to use.  If not specified, use default tablespace
+	 * (which may in turn default to database's default).
+	 */
+	if (stmt->tableSpace)
+	{
+		tablespaceId = get_tablespace_oid(stmt->tableSpace, false);
+		if (partitioned && tablespaceId == MyDatabaseTableSpace)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot specify default tablespace for partitioned relations")));
+	}
+	else
+	{
+		tablespaceId = GetDefaultTablespace(rel->rd_rel->relpersistence,
+											partitioned);
+		/* note InvalidOid is OK in this case */
+	}
+
+	/* Check tablespace permissions */
+	if (check_rights &&
+		OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
+	{
+		AclResult	aclresult;
+
+		aclresult = pg_tablespace_aclcheck(tablespaceId, root_save_userid,
+										   ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_TABLESPACE,
+						   get_tablespace_name(tablespaceId));
+	}
+
+	/*
+	 * Force shared indexes into the pg_global tablespace.  This is a bit of a
+	 * hack but seems simpler than marking them in the BKI commands.  On the
+	 * other hand, if it's not shared, don't allow it to be placed there.
+	 */
+	if (rel->rd_rel->relisshared)
+		tablespaceId = GLOBALTABLESPACE_OID;
+	else if (tablespaceId == GLOBALTABLESPACE_OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("only shared relations can be placed in pg_global tablespace")));
+
+	/*
+	 * Choose the index column names.
+	 */
+	indexColNames = ChooseIndexColumnNames(allIndexParams);
+
+	/*
+	 * Select name for index if caller didn't specify
+	 */
+	indexRelationName = stmt->idxname;
+	if (indexRelationName == NULL)
+		indexRelationName = ChooseIndexName(RelationGetRelationName(rel),
+											namespaceId,
+											indexColNames,
+											stmt->excludeOpNames,
+											stmt->primary,
+											stmt->isconstraint);
+
+	/*
+	 * look up the access method, verify it can handle the requested features
+	 */
+	accessMethodName = stmt->accessMethod;
+	tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
+	if (!HeapTupleIsValid(tuple))
+	{
+		/*
+		 * Hack to provide more-or-less-transparent updating of old RTREE
+		 * indexes to GiST: if RTREE is requested and not found, use GIST.
+		 */
+		if (strcmp(accessMethodName, "rtree") == 0)
+		{
+			ereport(NOTICE,
+					(errmsg("substituting access method \"gist\" for obsolete method \"rtree\"")));
+			accessMethodName = "gist";
+			tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
+		}
+
+		if (!HeapTupleIsValid(tuple))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("access method \"%s\" does not exist",
+							accessMethodName)));
+	}
+	accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
+	accessMethodId = accessMethodForm->oid;
+	amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
+
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
+								 accessMethodId);
+
+	if (stmt->unique && !amRoutine->amcanunique)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("access method \"%s\" does not support unique indexes",
+						accessMethodName)));
+	if (stmt->indexIncludingParams != NIL && !amRoutine->amcaninclude)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("access method \"%s\" does not support included columns",
+						accessMethodName)));
+	if (numberOfKeyAttributes > 1 && !amRoutine->amcanmulticol)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("access method \"%s\" does not support multicolumn indexes",
+						accessMethodName)));
+	if (stmt->excludeOpNames && amRoutine->amgettuple == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("access method \"%s\" does not support exclusion constraints",
+						accessMethodName)));
+
+	amcanorder = amRoutine->amcanorder;
+	amoptions = amRoutine->amoptions;
+
+	pfree(amRoutine);
+	ReleaseSysCache(tuple);
+
+	/*
+	 * Validate predicate, if given
+	 */
+	if (stmt->whereClause)
+		CheckPredicate((Expr *) stmt->whereClause);
+
+	/*
+	 * Parse AM-specific options, convert to text array form, validate.
+	 */
+	reloptions = transformRelOptions((Datum) 0, stmt->options,
+									 NULL, NULL, false, false);
+
+	(void) index_reloptions(amoptions, reloptions, true);
+
+	/*
+	 * Prepare arguments for index_create, primarily an IndexInfo structure.
+	 * Note that predicates must be in implicit-AND format.  In a concurrent
+	 * build, mark it not-ready-for-inserts.
+	 */
+	indexInfo = makeIndexInfo(numberOfAttributes,
+							  numberOfKeyAttributes,
+							  accessMethodId,
+							  NIL,	/* expressions, NIL for now */
+							  make_ands_implicit((Expr *) stmt->whereClause),
+							  stmt->unique,
+							  stmt->nulls_not_distinct,
+							  !concurrent,
+							  concurrent);
+
+	typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
+	collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
+	classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
+	coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16));
+	ComputeIndexAttrs(indexInfo,
+					  typeObjectId, collationObjectId, classObjectId,
+					  coloptions, allIndexParams,
+					  stmt->excludeOpNames, relationId,
+					  accessMethodName, accessMethodId,
+					  amcanorder, stmt->isconstraint, root_save_userid,
+					  root_save_sec_context, &root_save_nestlevel);
+
+	/*
+	 * Extra checks when creating a PRIMARY KEY index.
+	 */
+	if (stmt->primary)
+		index_check_primary_key(rel, indexInfo, is_alter_table, stmt);
+
+	/*
+	 * If this table is partitioned and we're creating a unique index or a
+	 * primary key, make sure that the partition key is a subset of the
+	 * index's columns.  Otherwise it would be possible to violate uniqueness
+	 * by putting values that ought to be unique in different partitions.
+	 *
+	 * We could lift this limitation if we had global indexes, but those have
+	 * their own problems, so this is a useful feature combination.
+	 */
+	if (partitioned && (stmt->unique || stmt->primary))
+	{
+		PartitionKey key = RelationGetPartitionKey(rel);
+		const char *constraint_type;
+		int			i;
+
+		if (stmt->primary)
+			constraint_type = "PRIMARY KEY";
+		else if (stmt->unique)
+			constraint_type = "UNIQUE";
+		else if (stmt->excludeOpNames != NIL)
+			constraint_type = "EXCLUDE";
+		else
+		{
+			elog(ERROR, "unknown constraint type");
+			constraint_type = NULL; /* keep compiler quiet */
+		}
+
+		/*
+		 * Verify that all the columns in the partition key appear in the
+		 * unique key definition, with the same notion of equality.
+		 */
+		for (i = 0; i < key->partnatts; i++)
+		{
+			bool		found = false;
+			int			eq_strategy;
+			Oid			ptkey_eqop;
+			int			j;
+
+			/*
+			 * Identify the equality operator associated with this partkey
+			 * column.  For list and range partitioning, partkeys use btree
+			 * operator classes; hash partitioning uses hash operator classes.
+			 * (Keep this in sync with ComputePartitionAttrs!)
+			 */
+			if (key->strategy == PARTITION_STRATEGY_HASH)
+				eq_strategy = HTEqualStrategyNumber;
+			else
+				eq_strategy = BTEqualStrategyNumber;
+
+			ptkey_eqop = get_opfamily_member(key->partopfamily[i],
+											 key->partopcintype[i],
+											 key->partopcintype[i],
+											 eq_strategy);
+			if (!OidIsValid(ptkey_eqop))
+				elog(ERROR, "missing operator %d(%u,%u) in partition opfamily %u",
+					 eq_strategy, key->partopcintype[i], key->partopcintype[i],
+					 key->partopfamily[i]);
+
+			/*
+			 * We'll need to be able to identify the equality operators
+			 * associated with index columns, too.  We know what to do with
+			 * btree opclasses; if there are ever any other index types that
+			 * support unique indexes, this logic will need extension.
+			 */
+			if (accessMethodId == BTREE_AM_OID)
+				eq_strategy = BTEqualStrategyNumber;
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot match partition key to an index using access method \"%s\"",
+								accessMethodName)));
+
+			/*
+			 * It may be possible to support UNIQUE constraints when partition
+			 * keys are expressions, but is it worth it?  Give up for now.
+			 */
+			if (key->partattrs[i] == 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("unsupported %s constraint with partition key definition",
+								constraint_type),
+						 errdetail("%s constraints cannot be used when partition keys include expressions.",
+								   constraint_type)));
+
+			/* Search the index column(s) for a match */
+			for (j = 0; j < indexInfo->ii_NumIndexKeyAttrs; j++)
+			{
+				if (key->partattrs[i] == indexInfo->ii_IndexAttrNumbers[j])
+				{
+					/* Matched the column, now what about the equality op? */
+					Oid			idx_opfamily;
+					Oid			idx_opcintype;
+
+					if (get_opclass_opfamily_and_input_type(classObjectId[j],
+															&idx_opfamily,
+															&idx_opcintype))
+					{
+						Oid			idx_eqop;
+
+						idx_eqop = get_opfamily_member(idx_opfamily,
+													   idx_opcintype,
+													   idx_opcintype,
+													   eq_strategy);
+						if (ptkey_eqop == idx_eqop)
+						{
+							found = true;
+							break;
+						}
+					}
+				}
+			}
+
+			if (!found)
+			{
+				Form_pg_attribute att;
+
+				att = TupleDescAttr(RelationGetDescr(rel),
+									key->partattrs[i] - 1);
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("unique constraint on partitioned table must include all partitioning columns"),
+						 errdetail("%s constraint on table \"%s\" lacks column \"%s\" which is part of the partition key.",
+								   constraint_type, RelationGetRelationName(rel),
+								   NameStr(att->attname))));
+			}
+		}
+	}
+
+
+	/*
+	 * We disallow indexes on system columns.  They would not necessarily get
+	 * updated correctly, and they don't seem useful anyway.
+	 */
+	for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
+	{
+		AttrNumber	attno = indexInfo->ii_IndexAttrNumbers[i];
+
+		if (attno < 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("index creation on system columns is not supported")));
+	}
+
+	/*
+	 * Also check for system columns used in expressions or predicates.
+	 */
+	if (indexInfo->ii_Expressions || indexInfo->ii_Predicate)
+	{
+		Bitmapset  *indexattrs = NULL;
+
+		pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
+		pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs);
+
+		for (i = FirstLowInvalidHeapAttributeNumber + 1; i < 0; i++)
+		{
+			if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber,
+							  indexattrs))
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("index creation on system columns is not supported")));
+		}
+	}
+
+	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
+	safe_index = indexInfo->ii_Expressions == NIL &&
+		indexInfo->ii_Predicate == NIL;
+
+	/*
+	 * Report index creation if appropriate (delay this till after most of the
+	 * error checks)
+	 */
+	if (stmt->isconstraint && !quiet)
+	{
+		const char *constraint_type;
+
+		if (stmt->primary)
+			constraint_type = "PRIMARY KEY";
+		else if (stmt->unique)
+			constraint_type = "UNIQUE";
+		else if (stmt->excludeOpNames != NIL)
+			constraint_type = "EXCLUDE";
+		else
+		{
+			elog(ERROR, "unknown constraint type");
+			constraint_type = NULL; /* keep compiler quiet */
+		}
+
+		ereport(DEBUG1,
+				(errmsg_internal("%s %s will create implicit index \"%s\" for table \"%s\"",
+								 is_alter_table ? "ALTER TABLE / ADD" : "CREATE TABLE /",
+								 constraint_type,
+								 indexRelationName, RelationGetRelationName(rel))));
+	}
+
+	/*
+	 * A valid stmt->oldNode implies that we already have a built form of the
+	 * index.  The caller should also decline any index build.
+	 */
+	Assert(!OidIsValid(stmt->oldNode) || (skip_build && !concurrent));
+
+	/*
+	 * Make the catalog entries for the index, including constraints. This
+	 * step also actually builds the index, except if caller requested not to
+	 * or in concurrent mode, in which case it'll be done later, or doing a
+	 * partitioned index (because those don't have storage).
+	 */
+	flags = constr_flags = 0;
+	if (stmt->isconstraint)
+		flags |= INDEX_CREATE_ADD_CONSTRAINT;
+	if (skip_build || concurrent || partitioned)
+		flags |= INDEX_CREATE_SKIP_BUILD;
+	if (stmt->if_not_exists)
+		flags |= INDEX_CREATE_IF_NOT_EXISTS;
+	if (concurrent)
+		flags |= INDEX_CREATE_CONCURRENT;
+	if (partitioned)
+		flags |= INDEX_CREATE_PARTITIONED;
+	if (stmt->primary)
+		flags |= INDEX_CREATE_IS_PRIMARY;
+
+	/*
+	 * If the table is partitioned, and recursion was declined but partitions
+	 * exist, mark the index as invalid.
+	 */
+	if (partitioned && stmt->relation && !stmt->relation->inh)
+	{
+		PartitionDesc pd = RelationGetPartitionDesc(rel, true);
+
+		if (pd->nparts != 0)
+			flags |= INDEX_CREATE_INVALID;
+	}
+
+	if (stmt->deferrable)
+		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
+	if (stmt->initdeferred)
+		constr_flags |= INDEX_CONSTR_CREATE_INIT_DEFERRED;
+
+	indexRelationId =
+		index_create(rel, indexRelationName, indexRelationId, parentIndexId,
+					 parentConstraintId,
+					 stmt->oldNode, indexInfo, indexColNames,
+					 accessMethodId, tablespaceId,
+					 collationObjectId, classObjectId,
+					 coloptions, reloptions,
+					 flags, constr_flags,
+					 allowSystemTableMods, !check_rights,
+					 &createdConstraintId);
+
+	ObjectAddressSet(address, RelationRelationId, indexRelationId);
+
+	if (!OidIsValid(indexRelationId))
+	{
+		/*
+		 * Roll back any GUC changes executed by index functions.  Also revert
+		 * to original default_tablespace if we changed it above.
+		 */
+		AtEOXact_GUC(false, root_save_nestlevel);
+
+		/* Restore userid and security context */
+		SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+
+		table_close(rel, NoLock);
+
+		/* If this is the top-level index, we're done */
+		if (!OidIsValid(parentIndexId))
+			pgstat_progress_end_command();
+
+		return address;
+	}
+
+	/*
+	 * Roll back any GUC changes executed by index functions, and keep
+	 * subsequent changes local to this command.  This is essential if some
+	 * index function changed a behavior-affecting GUC, e.g. search_path.
+	 */
+	AtEOXact_GUC(false, root_save_nestlevel);
+	root_save_nestlevel = NewGUCNestLevel();
+
+	/* Add any requested comment */
+	if (stmt->idxcomment != NULL)
+		CreateComments(indexRelationId, RelationRelationId, 0,
+					   stmt->idxcomment);
+
+	if (partitioned)
+	{
+		PartitionDesc partdesc;
+
+		/*
+		 * Unless caller specified to skip this step (via ONLY), process each
+		 * partition to make sure they all contain a corresponding index.
+		 *
+		 * If we're called internally (no stmt->relation), recurse always.
+		 */
+		partdesc = RelationGetPartitionDesc(rel, true);
+		if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
+		{
+			int			nparts = partdesc->nparts;
+			Oid		   *part_oids = palloc(sizeof(Oid) * nparts);
+			bool		invalidate_parent = false;
+			Relation	parentIndex;
+			TupleDesc	parentDesc;
+
+			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
+										 nparts);
+
+			/* Make a local copy of partdesc->oids[], just for safety */
+			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
+
+			/*
+			 * We'll need an IndexInfo describing the parent index.  The one
+			 * built above is almost good enough, but not quite, because (for
+			 * example) its predicate expression if any hasn't been through
+			 * expression preprocessing.  The most reliable way to get an
+			 * IndexInfo that will match those for child indexes is to build
+			 * it the same way, using BuildIndexInfo().
+			 */
+			parentIndex = index_open(indexRelationId, lockmode);
+			indexInfo = BuildIndexInfo(parentIndex);
+
+			parentDesc = RelationGetDescr(rel);
+
+			/*
+			 * For each partition, scan all existing indexes; if one matches
+			 * our index definition and is not already attached to some other
+			 * parent index, attach it to the one we just created.
+			 *
+			 * If none matches, build a new index by calling ourselves
+			 * recursively with the same options (except for the index name).
+			 */
+			for (i = 0; i < nparts; i++)
+			{
+				Oid			childRelid = part_oids[i];
+				Relation	childrel;
+				Oid			child_save_userid;
+				int			child_save_sec_context;
+				int			child_save_nestlevel;
+				List	   *childidxs;
+				ListCell   *cell;
+				AttrMap    *attmap;
+				bool		found = false;
+
+				childrel = table_open(childRelid, lockmode);
+
+				GetUserIdAndSecContext(&child_save_userid,
+									   &child_save_sec_context);
+				SetUserIdAndSecContext(childrel->rd_rel->relowner,
+									   child_save_sec_context | SECURITY_RESTRICTED_OPERATION);
+				child_save_nestlevel = NewGUCNestLevel();
+
+				/*
+				 * Don't try to create indexes on foreign tables, though. Skip
+				 * those if a regular index, or fail if trying to create a
+				 * constraint index.
+				 */
+				if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+				{
+					if (stmt->unique || stmt->primary)
+						ereport(ERROR,
+								(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+								 errmsg("cannot create unique index on partitioned table \"%s\"",
+										RelationGetRelationName(rel)),
+								 errdetail("Table \"%s\" contains partitions that are foreign tables.",
+										   RelationGetRelationName(rel))));
+
+					AtEOXact_GUC(false, child_save_nestlevel);
+					SetUserIdAndSecContext(child_save_userid,
+										   child_save_sec_context);
+					table_close(childrel, lockmode);
+					continue;
+				}
+
+				childidxs = RelationGetIndexList(childrel);
+				attmap =
+					build_attrmap_by_name(RelationGetDescr(childrel),
+										  parentDesc);
+
+				foreach(cell, childidxs)
+				{
+					Oid			cldidxid = lfirst_oid(cell);
+					Relation	cldidx;
+					IndexInfo  *cldIdxInfo;
+
+					/* this index is already partition of another one */
+					if (has_superclass(cldidxid))
+						continue;
+
+					cldidx = index_open(cldidxid, lockmode);
+					cldIdxInfo = BuildIndexInfo(cldidx);
+					if (CompareIndexInfo(cldIdxInfo, indexInfo,
+										 cldidx->rd_indcollation,
+										 parentIndex->rd_indcollation,
+										 cldidx->rd_opfamily,
+										 parentIndex->rd_opfamily,
+										 attmap))
+					{
+						Oid			cldConstrOid = InvalidOid;
+
+						/*
+						 * Found a match.
+						 *
+						 * If this index is being created in the parent
+						 * because of a constraint, then the child needs to
+						 * have a constraint also, so look for one.  If there
+						 * is no such constraint, this index is no good, so
+						 * keep looking.
+						 */
+						if (createdConstraintId != InvalidOid)
+						{
+							cldConstrOid =
+								get_relation_idx_constraint_oid(childRelid,
+																cldidxid);
+							if (cldConstrOid == InvalidOid)
+							{
+								index_close(cldidx, lockmode);
+								continue;
+							}
+						}
+
+						/* Attach index to parent and we're done. */
+						IndexSetParentIndex(cldidx, indexRelationId);
+						if (createdConstraintId != InvalidOid)
+							ConstraintSetParentConstraint(cldConstrOid,
+														  createdConstraintId,
+														  childRelid);
+
+						if (!cldidx->rd_index->indisvalid)
+							invalidate_parent = true;
+
+						found = true;
+						/* keep lock till commit */
+						index_close(cldidx, NoLock);
+						break;
+					}
+
+					index_close(cldidx, lockmode);
+				}
+
+				list_free(childidxs);
+				AtEOXact_GUC(false, child_save_nestlevel);
+				SetUserIdAndSecContext(child_save_userid,
+									   child_save_sec_context);
+				table_close(childrel, NoLock);
+
+				/*
+				 * If no matching index was found, create our own.
+				 */
+				if (!found)
+				{
+					IndexStmt  *childStmt = copyObject(stmt);
+					bool		found_whole_row;
+					ListCell   *lc;
+					ObjectAddress childAddr;
+
+					/*
+					 * We can't use the same index name for the child index,
+					 * so clear idxname to let the recursive invocation choose
+					 * a new name.  Likewise, the existing target relation
+					 * field is wrong, and if indexOid or oldNode are set,
+					 * they mustn't be applied to the child either.
+					 */
+					childStmt->idxname = NULL;
+					childStmt->relation = NULL;
+					childStmt->indexOid = InvalidOid;
+					childStmt->oldNode = InvalidOid;
+					childStmt->oldCreateSubid = InvalidSubTransactionId;
+					childStmt->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
+
+					/*
+					 * Adjust any Vars (both in expressions and in the index's
+					 * WHERE clause) to match the partition's column numbering
+					 * in case it's different from the parent's.
+					 */
+					foreach(lc, childStmt->indexParams)
+					{
+						IndexElem  *ielem = lfirst(lc);
+
+						/*
+						 * If the index parameter is an expression, we must
+						 * translate it to contain child Vars.
+						 */
+						if (ielem->expr)
+						{
+							ielem->expr =
+								map_variable_attnos((Node *) ielem->expr,
+													1, 0, attmap,
+													InvalidOid,
+													&found_whole_row);
+							if (found_whole_row)
+								elog(ERROR, "cannot convert whole-row table reference");
+						}
+					}
+					childStmt->whereClause =
+						map_variable_attnos(stmt->whereClause, 1, 0,
+											attmap,
+											InvalidOid, &found_whole_row);
+					if (found_whole_row)
+						elog(ERROR, "cannot convert whole-row table reference");
+
+					/*
+					 * Recurse as the starting user ID.  Callee will use that
+					 * for permission checks, then switch again.
+					 */
+					Assert(GetUserId() == child_save_userid);
+					SetUserIdAndSecContext(root_save_userid,
+										   root_save_sec_context);
+					childAddr =
+						DefineIndex(childRelid, childStmt,
+									InvalidOid, /* no predefined OID */
+									indexRelationId,	/* this is our child */
+									createdConstraintId,
+									is_alter_table, check_rights,
+									check_not_in_use,
+									skip_build, quiet);
+					SetUserIdAndSecContext(child_save_userid,
+										   child_save_sec_context);
+
+					/*
+					 * Check if the index just created is valid or not, as it
+					 * could be possible that it has been switched as invalid
+					 * when recursing across multiple partition levels.
+					 */
+					if (!get_index_isvalid(childAddr.objectId))
+						invalidate_parent = true;
+				}
+
+				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+											 i + 1);
+				free_attrmap(attmap);
+			}
+
+			index_close(parentIndex, lockmode);
+
+			/*
+			 * The pg_index row we inserted for this index was marked
+			 * indisvalid=true.  But if we attached an existing index that is
+			 * invalid, this is incorrect, so update our row to invalid too.
+			 */
+			if (invalidate_parent)
+			{
+				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
+				HeapTuple	tup,
+							newtup;
+
+				tup = SearchSysCache1(INDEXRELID,
+									  ObjectIdGetDatum(indexRelationId));
+				if (!HeapTupleIsValid(tup))
+					elog(ERROR, "cache lookup failed for index %u",
+						 indexRelationId);
+				newtup = heap_copytuple(tup);
+				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
+				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
+				ReleaseSysCache(tup);
+				table_close(pg_index, RowExclusiveLock);
+				heap_freetuple(newtup);
+
+				/*
+				 * CCI here to make this update visible, in case this recurses
+				 * across multiple partition levels.
+				 */
+				CommandCounterIncrement();
+			}
+		}
+
+		/*
+		 * Indexes on partitioned tables are not themselves built, so we're
+		 * done here.
+		 */
+		AtEOXact_GUC(false, root_save_nestlevel);
+		SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+		table_close(rel, NoLock);
+		if (!OidIsValid(parentIndexId))
+			pgstat_progress_end_command();
+		return address;
+	}
+
+	AtEOXact_GUC(false, root_save_nestlevel);
+	SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+
+	if (!concurrent)
+	{
+		/* Close the heap and we're done, in the non-concurrent case */
+		table_close(rel, NoLock);
+
+		/* If this is the top-level index, we're done. */
+		if (!OidIsValid(parentIndexId))
+			pgstat_progress_end_command();
+
+		return address;
+	}
+
+	/* save lockrelid and locktag for below, then close rel */
+	heaprelid = rel->rd_lockInfo.lockRelId;
+	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
+	table_close(rel, NoLock);
+
+	/*
+	 * For a concurrent build, it's important to make the catalog entries
+	 * visible to other transactions before we start to build the index. That
+	 * will prevent them from making incompatible HOT updates.  The new index
+	 * will be marked not indisready and not indisvalid, so that no one else
+	 * tries to either insert into it or use it for queries.
+	 *
+	 * We must commit our current transaction so that the index becomes
+	 * visible; then start another.  Note that all the data structures we just
+	 * built are lost in the commit.  The only data we keep past here are the
+	 * relation IDs.
+	 *
+	 * Before committing, get a session-level lock on the table, to ensure
+	 * that neither it nor the index can be dropped before we finish. This
+	 * cannot block, even if someone else is waiting for access, because we
+	 * already have the same lock within our transaction.
+	 *
+	 * Note: we don't currently bother with a session lock on the index,
+	 * because there are no operations that could change its state while we
+	 * hold lock on the parent table.  This might need to change later.
+	 */
+	LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
+
+	PopActiveSnapshot();
+	CommitTransactionCommand();
+	StartTransactionCommand();
+
+	/* Tell concurrent index builds to ignore us, if index qualifies */
+	if (safe_index)
+		set_indexsafe_procflags();
+
+	/*
+	 * The index is now visible, so we can report the OID.  While on it,
+	 * include the report for the beginning of phase 2.
+	 */
+	{
+		const int	progress_cols[] = {
+			PROGRESS_CREATEIDX_INDEX_OID,
+			PROGRESS_CREATEIDX_PHASE
+		};
+		const int64 progress_vals[] = {
+			indexRelationId,
+			PROGRESS_CREATEIDX_PHASE_WAIT_1
+		};
+
+		pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
+	}
+
+	/*
+	 * Phase 2 of concurrent index build (see comments for validate_index()
+	 * for an overview of how this works)
+	 *
+	 * Now we must wait until no running transaction could have the table open
+	 * with the old list of indexes.  Use ShareLock to consider running
+	 * transactions that hold locks that permit writing to the table.  Note we
+	 * do not need to worry about xacts that open the table for writing after
+	 * this point; they will see the new index when they open it.
+	 *
+	 * Note: the reason we use actual lock acquisition here, rather than just
+	 * checking the ProcArray and sleeping, is that deadlock is possible if
+	 * one of the transactions in question is blocked trying to acquire an
+	 * exclusive lock on our table.  The lock code will detect deadlock and
+	 * error out properly.
+	 */
+	WaitForLockers(heaplocktag, ShareLock, true);
+
+	/*
+	 * At this moment we are sure that there are no transactions with the
+	 * table open for write that don't have this new index in their list of
+	 * indexes.  We have waited out all the existing transactions and any new
+	 * transaction will have the new index in its list, but the index is still
+	 * marked as "not-ready-for-inserts".  The index is consulted while
+	 * deciding HOT-safety though.  This arrangement ensures that no new HOT
+	 * chains can be created where the new tuple and the old tuple in the
+	 * chain have different index keys.
+	 *
+	 * We now take a new snapshot, and build the index using all tuples that
+	 * are visible in this snapshot.  We can be sure that any HOT updates to
+	 * these tuples will be compatible with the index, since any updates made
+	 * by transactions that didn't know about the index are now committed or
+	 * rolled back.  Thus, each visible tuple is either the end of its
+	 * HOT-chain or the extension of the chain is HOT-safe for this index.
+	 */
+
+	/* Set ActiveSnapshot since functions in the indexes may need it */
+	PushActiveSnapshot(GetTransactionSnapshot());
+
+	/* Perform concurrent build of index */
+	index_concurrently_build(relationId, indexRelationId);
+
+	/* we can do away with our snapshot */
+	PopActiveSnapshot();
+
+	/*
+	 * Commit this transaction to make the indisready update visible.
+	 */
+	CommitTransactionCommand();
+	StartTransactionCommand();
+
+	/* Tell concurrent index builds to ignore us, if index qualifies */
+	if (safe_index)
+		set_indexsafe_procflags();
+
+	/*
+	 * Phase 3 of concurrent index build
+	 *
+	 * We once again wait until no transaction can have the table open with
+	 * the index marked as read-only for updates.
+	 */
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
+								 PROGRESS_CREATEIDX_PHASE_WAIT_2);
+	WaitForLockers(heaplocktag, ShareLock, true);
+
+	/*
+	 * Now take the "reference snapshot" that will be used by validate_index()
+	 * to filter candidate tuples.  Beware!  There might still be snapshots in
+	 * use that treat some transaction as in-progress that our reference
+	 * snapshot treats as committed.  If such a recently-committed transaction
+	 * deleted tuples in the table, we will not include them in the index; yet
+	 * those transactions which see the deleting one as still-in-progress will
+	 * expect such tuples to be there once we mark the index as valid.
+	 *
+	 * We solve this by waiting for all endangered transactions to exit before
+	 * we mark the index as valid.
+	 *
+	 * We also set ActiveSnapshot to this snap, since functions in indexes may
+	 * need a snapshot.
+	 */
+	snapshot = RegisterSnapshot(GetTransactionSnapshot());
+	PushActiveSnapshot(snapshot);
+
+	/*
+	 * Scan the index and the heap, insert any missing index entries.
+	 */
+	validate_index(relationId, indexRelationId, snapshot);
+
+	/*
+	 * Drop the reference snapshot.  We must do this before waiting out other
+	 * snapshot holders, else we will deadlock against other processes also
+	 * doing CREATE INDEX CONCURRENTLY, which would see our snapshot as one
+	 * they must wait for.  But first, save the snapshot's xmin to use as
+	 * limitXmin for GetCurrentVirtualXIDs().
+	 */
+	limitXmin = snapshot->xmin;
+
+	PopActiveSnapshot();
+	UnregisterSnapshot(snapshot);
+
+	/*
+	 * The snapshot subsystem could still contain registered snapshots that
+	 * are holding back our process's advertised xmin; in particular, if
+	 * default_transaction_isolation = serializable, there is a transaction
+	 * snapshot that is still active.  The CatalogSnapshot is likewise a
+	 * hazard.  To ensure no deadlocks, we must commit and start yet another
+	 * transaction, and do our wait before any snapshot has been taken in it.
+	 */
+	CommitTransactionCommand();
+	StartTransactionCommand();
+
+	/* Tell concurrent index builds to ignore us, if index qualifies */
+	if (safe_index)
+		set_indexsafe_procflags();
+
+	/* We should now definitely not be advertising any xmin. */
+	Assert(MyProc->xmin == InvalidTransactionId);
+
+	/*
+	 * The index is now valid in the sense that it contains all currently
+	 * interesting tuples.  But since it might not contain tuples deleted just
+	 * before the reference snap was taken, we have to wait out any
+	 * transactions that might have older snapshots.
+	 */
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
+								 PROGRESS_CREATEIDX_PHASE_WAIT_3);
+	WaitForOlderSnapshots(limitXmin, true);
+
+	/*
+	 * Index can now be marked valid -- update its pg_index entry
+	 */
+	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
+
+	/*
+	 * The pg_index update will cause backends (including this one) to update
+	 * relcache entries for the index itself, but we should also send a
+	 * relcache inval on the parent table to force replanning of cached plans.
+	 * Otherwise existing sessions might fail to use the new index where it
+	 * would be useful.  (Note that our earlier commits did not create reasons
+	 * to replan; so relcache flush on the index itself was sufficient.)
+	 */
+	CacheInvalidateRelcacheByRelid(heaprelid.relId);
+
+	/*
+	 * Last thing to do is release the session-level lock on the parent table.
+	 */
+	UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
+
+	pgstat_progress_end_command();
+
+	return address;
+}
+
+
+/*
+ * CheckMutability
+ *		Test whether given expression is mutable
+ */
+static bool
+CheckMutability(Expr *expr)
+{
+	/*
+	 * First run the expression through the planner.  This has a couple of
+	 * important consequences.  First, function default arguments will get
+	 * inserted, which may affect volatility (consider "default now()").
+	 * Second, inline-able functions will get inlined, which may allow us to
+	 * conclude that the function is really less volatile than it's marked. As
+	 * an example, polymorphic functions must be marked with the most volatile
+	 * behavior that they have for any input type, but once we inline the
+	 * function we may be able to conclude that it's not so volatile for the
+	 * particular input type we're dealing with.
+	 *
+	 * We assume here that expression_planner() won't scribble on its input.
+	 */
+	expr = expression_planner(expr);
+
+	/* Now we can search for non-immutable functions */
+	return contain_mutable_functions((Node *) expr);
+}
+
+
+/*
+ * CheckPredicate
+ *		Checks that the given partial-index predicate is valid.
+ *
+ * This used to also constrain the form of the predicate to forms that
+ * indxpath.c could do something with.  However, that seems overly
+ * restrictive.  One useful application of partial indexes is to apply
+ * a UNIQUE constraint across a subset of a table, and in that scenario
+ * any evaluable predicate will work.  So accept any predicate here
+ * (except ones requiring a plan), and let indxpath.c fend for itself.
+ */
+static void
+CheckPredicate(Expr *predicate)
+{
+	/*
+	 * transformExpr() should have already rejected subqueries, aggregates,
+	 * and window functions, based on the EXPR_KIND_ for a predicate.
+	 */
+
+	/*
+	 * A predicate using mutable functions is probably wrong, for the same
+	 * reasons that we don't allow an index expression to use one.
+	 */
+	if (CheckMutability(predicate))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("functions in index predicate must be marked IMMUTABLE")));
+}
+
+/*
+ * Compute per-index-column information, including indexed column numbers
+ * or index expressions, opclasses and their options. Note, all output vectors
+ * should be allocated for all columns, including "including" ones.
+ *
+ * If the caller switched to the table owner, ddl_userid is the role for ACL
+ * checks reached without traversing opaque expressions.  Otherwise, it's
+ * InvalidOid, and other ddl_* arguments are undefined.
+ */
+static void
+ComputeIndexAttrs(IndexInfo *indexInfo,
+				  Oid *typeOidP,
+				  Oid *collationOidP,
+				  Oid *classOidP,
+				  int16 *colOptionP,
+				  List *attList,	/* list of IndexElem's */
+				  List *exclusionOpNames,
+				  Oid relId,
+				  const char *accessMethodName,
+				  Oid accessMethodId,
+				  bool amcanorder,
+				  bool isconstraint,
+				  Oid ddl_userid,
+				  int ddl_sec_context,
+				  int *ddl_save_nestlevel)
+{
+	ListCell   *nextExclOp;
+	ListCell   *lc;
+	int			attn;
+	int			nkeycols = indexInfo->ii_NumIndexKeyAttrs;
+	Oid			save_userid;
+	int			save_sec_context;
+
+	/* Allocate space for exclusion operator info, if needed */
+	if (exclusionOpNames)
+	{
+		Assert(list_length(exclusionOpNames) == nkeycols);
+		indexInfo->ii_ExclusionOps = (Oid *) palloc(sizeof(Oid) * nkeycols);
+		indexInfo->ii_ExclusionProcs = (Oid *) palloc(sizeof(Oid) * nkeycols);
+		indexInfo->ii_ExclusionStrats = (uint16 *) palloc(sizeof(uint16) * nkeycols);
+		nextExclOp = list_head(exclusionOpNames);
+	}
+	else
+		nextExclOp = NULL;
+
+	if (OidIsValid(ddl_userid))
+		GetUserIdAndSecContext(&save_userid, &save_sec_context);
+
+	/*
+	 * process attributeList
+	 */
+	attn = 0;
+	foreach(lc, attList)
+	{
+		IndexElem  *attribute = (IndexElem *) lfirst(lc);
+		Oid			atttype;
+		Oid			attcollation;
+
+		/*
+		 * Process the column-or-expression to be indexed.
+		 */
+		if (attribute->name != NULL)
+		{
+			/* Simple index attribute */
+			HeapTuple	atttuple;
+			Form_pg_attribute attform;
+
+			Assert(attribute->expr == NULL);
+			atttuple = SearchSysCacheAttName(relId, attribute->name);
+			if (!HeapTupleIsValid(atttuple))
+			{
+				/* difference in error message spellings is historical */
+				if (isconstraint)
+					ereport(ERROR,
+							(errcode(ERRCODE_UNDEFINED_COLUMN),
+							 errmsg("column \"%s\" named in key does not exist",
+									attribute->name)));
+				else
+					ereport(ERROR,
+							(errcode(ERRCODE_UNDEFINED_COLUMN),
+							 errmsg("column \"%s\" does not exist",
+									attribute->name)));
+			}
+			attform = (Form_pg_attribute) GETSTRUCT(atttuple);
+			indexInfo->ii_IndexAttrNumbers[attn] = attform->attnum;
+			atttype = attform->atttypid;
+			attcollation = attform->attcollation;
+			ReleaseSysCache(atttuple);
+		}
+		else
+		{
+			/* Index expression */
+			Node	   *expr = attribute->expr;
+
+			Assert(expr != NULL);
+
+			if (attn >= nkeycols)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("expressions are not supported in included columns")));
+			atttype = exprType(expr);
+			attcollation = exprCollation(expr);
+
+			/*
+			 * Strip any top-level COLLATE clause.  This ensures that we treat
+			 * "x COLLATE y" and "(x COLLATE y)" alike.
+			 */
+			while (IsA(expr, CollateExpr))
+				expr = (Node *) ((CollateExpr *) expr)->arg;
+
+			if (IsA(expr, Var) &&
+				((Var *) expr)->varattno != InvalidAttrNumber)
+			{
+				/*
+				 * User wrote "(column)" or "(column COLLATE something)".
+				 * Treat it like simple attribute anyway.
+				 */
+				indexInfo->ii_IndexAttrNumbers[attn] = ((Var *) expr)->varattno;
+			}
+			else
+			{
+				indexInfo->ii_IndexAttrNumbers[attn] = 0;	/* marks expression */
+				indexInfo->ii_Expressions = lappend(indexInfo->ii_Expressions,
+													expr);
+
+				/*
+				 * transformExpr() should have already rejected subqueries,
+				 * aggregates, and window functions, based on the EXPR_KIND_
+				 * for an index expression.
+				 */
+
+				/*
+				 * An expression using mutable functions is probably wrong,
+				 * since if you aren't going to get the same result for the
+				 * same data every time, it's not clear what the index entries
+				 * mean at all.
+				 */
+				if (CheckMutability((Expr *) expr))
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("functions in index expression must be marked IMMUTABLE")));
+			}
+		}
+
+		typeOidP[attn] = atttype;
+
+		/*
+		 * Included columns have no collation, no opclass and no ordering
+		 * options.
+		 */
+		if (attn >= nkeycols)
+		{
+			if (attribute->collation)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("including column does not support a collation")));
+			if (attribute->opclass)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("including column does not support an operator class")));
+			if (attribute->ordering != SORTBY_DEFAULT)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("including column does not support ASC/DESC options")));
+			if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("including column does not support NULLS FIRST/LAST options")));
+
+			classOidP[attn] = InvalidOid;
+			colOptionP[attn] = 0;
+			collationOidP[attn] = InvalidOid;
+			attn++;
+
+			continue;
+		}
+
+		/*
+		 * Apply collation override if any.  Use of ddl_userid is necessary
+		 * due to ACL checks therein, and it's safe because collations don't
+		 * contain opaque expressions (or non-opaque expressions).
+		 */
+		if (attribute->collation)
+		{
+			if (OidIsValid(ddl_userid))
+			{
+				AtEOXact_GUC(false, *ddl_save_nestlevel);
+				SetUserIdAndSecContext(ddl_userid, ddl_sec_context);
+			}
+			attcollation = get_collation_oid(attribute->collation, false);
+			if (OidIsValid(ddl_userid))
+			{
+				SetUserIdAndSecContext(save_userid, save_sec_context);
+				*ddl_save_nestlevel = NewGUCNestLevel();
+			}
+		}
+
+		/*
+		 * Check we have a collation iff it's a collatable type.  The only
+		 * expected failures here are (1) COLLATE applied to a noncollatable
+		 * type, or (2) index expression had an unresolved collation.  But we
+		 * might as well code this to be a complete consistency check.
+		 */
+		if (type_is_collatable(atttype))
+		{
+			if (!OidIsValid(attcollation))
+				ereport(ERROR,
+						(errcode(ERRCODE_INDETERMINATE_COLLATION),
+						 errmsg("could not determine which collation to use for index expression"),
+						 errhint("Use the COLLATE clause to set the collation explicitly.")));
+		}
+		else
+		{
+			if (OidIsValid(attcollation))
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("collations are not supported by type %s",
+								format_type_be(atttype))));
+		}
+
+		collationOidP[attn] = attcollation;
+
+		/*
+		 * Identify the opclass to use.  Use of ddl_userid is necessary due to
+		 * ACL checks therein.  This is safe despite opclasses containing
+		 * opaque expressions (specifically, functions), because only
+		 * superusers can define opclasses.
+		 */
+		if (OidIsValid(ddl_userid))
+		{
+			AtEOXact_GUC(false, *ddl_save_nestlevel);
+			SetUserIdAndSecContext(ddl_userid, ddl_sec_context);
+		}
+		classOidP[attn] = ResolveOpClass(attribute->opclass,
+										 atttype,
+										 accessMethodName,
+										 accessMethodId);
+		if (OidIsValid(ddl_userid))
+		{
+			SetUserIdAndSecContext(save_userid, save_sec_context);
+			*ddl_save_nestlevel = NewGUCNestLevel();
+		}
+
+		/*
+		 * Identify the exclusion operator, if any.
+		 */
+		if (nextExclOp)
+		{
+			List	   *opname = (List *) lfirst(nextExclOp);
+			Oid			opid;
+			Oid			opfamily;
+			int			strat;
+
+			/*
+			 * Find the operator --- it must accept the column datatype
+			 * without runtime coercion (but binary compatibility is OK).
+			 * Operators contain opaque expressions (specifically, functions).
+			 * compatible_oper_opid() boils down to oper() and
+			 * IsBinaryCoercible().  PostgreSQL would have security problems
+			 * elsewhere if oper() started calling opaque expressions.
+			 */
+			if (OidIsValid(ddl_userid))
+			{
+				AtEOXact_GUC(false, *ddl_save_nestlevel);
+				SetUserIdAndSecContext(ddl_userid, ddl_sec_context);
+			}
+			opid = compatible_oper_opid(opname, atttype, atttype, false);
+			if (OidIsValid(ddl_userid))
+			{
+				SetUserIdAndSecContext(save_userid, save_sec_context);
+				*ddl_save_nestlevel = NewGUCNestLevel();
+			}
+
+			/*
+			 * Only allow commutative operators to be used in exclusion
+			 * constraints. If X conflicts with Y, but Y does not conflict
+			 * with X, bad things will happen.
+			 */
+			if (get_commutator(opid) != opid)
+				ereport(ERROR,
+						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+						 errmsg("operator %s is not commutative",
+								format_operator(opid)),
+						 errdetail("Only commutative operators can be used in exclusion constraints.")));
+
+			/*
+			 * Operator must be a member of the right opfamily, too
+			 */
+			opfamily = get_opclass_family(classOidP[attn]);
+			strat = get_op_opfamily_strategy(opid, opfamily);
+			if (strat == 0)
+			{
+				HeapTuple	opftuple;
+				Form_pg_opfamily opfform;
+
+				/*
+				 * attribute->opclass might not explicitly name the opfamily,
+				 * so fetch the name of the selected opfamily for use in the
+				 * error message.
+				 */
+				opftuple = SearchSysCache1(OPFAMILYOID,
+										   ObjectIdGetDatum(opfamily));
+				if (!HeapTupleIsValid(opftuple))
+					elog(ERROR, "cache lookup failed for opfamily %u",
+						 opfamily);
+				opfform = (Form_pg_opfamily) GETSTRUCT(opftuple);
+
+				ereport(ERROR,
+						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+						 errmsg("operator %s is not a member of operator family \"%s\"",
+								format_operator(opid),
+								NameStr(opfform->opfname)),
+						 errdetail("The exclusion operator must be related to the index operator class for the constraint.")));
+			}
+
+			indexInfo->ii_ExclusionOps[attn] = opid;
+			indexInfo->ii_ExclusionProcs[attn] = get_opcode(opid);
+			indexInfo->ii_ExclusionStrats[attn] = strat;
+			nextExclOp = lnext(exclusionOpNames, nextExclOp);
+		}
+
+		/*
+		 * Set up the per-column options (indoption field).  For now, this is
+		 * zero for any un-ordered index, while ordered indexes have DESC and
+		 * NULLS FIRST/LAST options.
+		 */
+		colOptionP[attn] = 0;
+		if (amcanorder)
+		{
+			/* default ordering is ASC */
+			if (attribute->ordering == SORTBY_DESC)
+				colOptionP[attn] |= INDOPTION_DESC;
+			/* default null ordering is LAST for ASC, FIRST for DESC */
+			if (attribute->nulls_ordering == SORTBY_NULLS_DEFAULT)
+			{
+				if (attribute->ordering == SORTBY_DESC)
+					colOptionP[attn] |= INDOPTION_NULLS_FIRST;
+			}
+			else if (attribute->nulls_ordering == SORTBY_NULLS_FIRST)
+				colOptionP[attn] |= INDOPTION_NULLS_FIRST;
+		}
+		else
+		{
+			/* index AM does not support ordering */
+			if (attribute->ordering != SORTBY_DEFAULT)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("access method \"%s\" does not support ASC/DESC options",
+								accessMethodName)));
+			if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("access method \"%s\" does not support NULLS FIRST/LAST options",
+								accessMethodName)));
+		}
+
+		/* Set up the per-column opclass options (attoptions field). */
+		if (attribute->opclassopts)
+		{
+			Assert(attn < nkeycols);
+
+			if (!indexInfo->ii_OpclassOptions)
+				indexInfo->ii_OpclassOptions =
+					palloc0(sizeof(Datum) * indexInfo->ii_NumIndexAttrs);
+
+			indexInfo->ii_OpclassOptions[attn] =
+				transformRelOptions((Datum) 0, attribute->opclassopts,
+									NULL, NULL, false, false);
+		}
+
+		attn++;
+	}
+}
+
+/*
+ * Resolve possibly-defaulted operator class specification
+ *
+ * Note: This is used to resolve operator class specifications in index and
+ * partition key definitions.
+ */
+Oid
+ResolveOpClass(List *opclass, Oid attrType,
+			   const char *accessMethodName, Oid accessMethodId)
+{
+	char	   *schemaname;
+	char	   *opcname;
+	HeapTuple	tuple;
+	Form_pg_opclass opform;
+	Oid			opClassId,
+				opInputType;
+
+	if (opclass == NIL)
+	{
+		/* no operator class specified, so find the default */
+		opClassId = GetDefaultOpClass(attrType, accessMethodId);
+		if (!OidIsValid(opClassId))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("data type %s has no default operator class for access method \"%s\"",
+							format_type_be(attrType), accessMethodName),
+					 errhint("You must specify an operator class for the index or define a default operator class for the data type.")));
+		return opClassId;
+	}
+
+	/*
+	 * Specific opclass name given, so look up the opclass.
+	 */
+
+	/* deconstruct the name list */
+	DeconstructQualifiedName(opclass, &schemaname, &opcname);
+
+	if (schemaname)
+	{
+		/* Look in specific schema only */
+		Oid			namespaceId;
+
+		namespaceId = LookupExplicitNamespace(schemaname, false);
+		tuple = SearchSysCache3(CLAAMNAMENSP,
+								ObjectIdGetDatum(accessMethodId),
+								PointerGetDatum(opcname),
+								ObjectIdGetDatum(namespaceId));
+	}
+	else
+	{
+		/* Unqualified opclass name, so search the search path */
+		opClassId = OpclassnameGetOpcid(accessMethodId, opcname);
+		if (!OidIsValid(opClassId))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("operator class \"%s\" does not exist for access method \"%s\"",
+							opcname, accessMethodName)));
+		tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(opClassId));
+	}
+
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("operator class \"%s\" does not exist for access method \"%s\"",
+						NameListToString(opclass), accessMethodName)));
+
+	/*
+	 * Verify that the index operator class accepts this datatype.  Note we
+	 * will accept binary compatibility.
+	 */
+	opform = (Form_pg_opclass) GETSTRUCT(tuple);
+	opClassId = opform->oid;
+	opInputType = opform->opcintype;
+
+	if (!IsBinaryCoercible(attrType, opInputType))
+		ereport(ERROR,
+				(errcode(ERRCODE_DATATYPE_MISMATCH),
+				 errmsg("operator class \"%s\" does not accept data type %s",
+						NameListToString(opclass), format_type_be(attrType))));
+
+	ReleaseSysCache(tuple);
+
+	return opClassId;
+}
+
+/*
+ * GetDefaultOpClass
+ *
+ * Given the OIDs of a datatype and an access method, find the default
+ * operator class, if any.  Returns InvalidOid if there is none.
+ */
+Oid
+GetDefaultOpClass(Oid type_id, Oid am_id)
+{
+	Oid			result = InvalidOid;
+	int			nexact = 0;
+	int			ncompatible = 0;
+	int			ncompatiblepreferred = 0;
+	Relation	rel;
+	ScanKeyData skey[1];
+	SysScanDesc scan;
+	HeapTuple	tup;
+	TYPCATEGORY tcategory;
+
+	/* If it's a domain, look at the base type instead */
+	type_id = getBaseType(type_id);
+
+	tcategory = TypeCategory(type_id);
+
+	/*
+	 * We scan through all the opclasses available for the access method,
+	 * looking for one that is marked default and matches the target type
+	 * (either exactly or binary-compatibly, but prefer an exact match).
+	 *
+	 * We could find more than one binary-compatible match.  If just one is
+	 * for a preferred type, use that one; otherwise we fail, forcing the user
+	 * to specify which one he wants.  (The preferred-type special case is a
+	 * kluge for varchar: it's binary-compatible to both text and bpchar, so
+	 * we need a tiebreaker.)  If we find more than one exact match, then
+	 * someone put bogus entries in pg_opclass.
+	 */
+	rel = table_open(OperatorClassRelationId, AccessShareLock);
+
+	ScanKeyInit(&skey[0],
+				Anum_pg_opclass_opcmethod,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(am_id));
+
+	scan = systable_beginscan(rel, OpclassAmNameNspIndexId, true,
+							  NULL, 1, skey);
+
+	while (HeapTupleIsValid(tup = systable_getnext(scan)))
+	{
+		Form_pg_opclass opclass = (Form_pg_opclass) GETSTRUCT(tup);
+
+		/* ignore altogether if not a default opclass */
+		if (!opclass->opcdefault)
+			continue;
+		if (opclass->opcintype == type_id)
+		{
+			nexact++;
+			result = opclass->oid;
+		}
+		else if (nexact == 0 &&
+				 IsBinaryCoercible(type_id, opclass->opcintype))
+		{
+			if (IsPreferredType(tcategory, opclass->opcintype))
+			{
+				ncompatiblepreferred++;
+				result = opclass->oid;
+			}
+			else if (ncompatiblepreferred == 0)
+			{
+				ncompatible++;
+				result = opclass->oid;
+			}
+		}
+	}
+
+	systable_endscan(scan);
+
+	table_close(rel, AccessShareLock);
+
+	/* raise error if pg_opclass contains inconsistent data */
+	if (nexact > 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("there are multiple default operator classes for data type %s",
+						format_type_be(type_id))));
+
+	if (nexact == 1 ||
+		ncompatiblepreferred == 1 ||
+		(ncompatiblepreferred == 0 && ncompatible == 1))
+		return result;
+
+	return InvalidOid;
+}
+
+/*
+ *	makeObjectName()
+ *
+ *	Create a name for an implicitly created index, sequence, constraint,
+ *	extended statistics, etc.
+ *
+ *	The parameters are typically: the original table name, the original field
+ *	name, and a "type" string (such as "seq" or "pkey").    The field name
+ *	and/or type can be NULL if not relevant.
+ *
+ *	The result is a palloc'd string.
+ *
+ *	The basic result we want is "name1_name2_label", omitting "_name2" or
+ *	"_label" when those parameters are NULL.  However, we must generate
+ *	a name with less than NAMEDATALEN characters!  So, we truncate one or
+ *	both names if necessary to make a short-enough string.  The label part
+ *	is never truncated (so it had better be reasonably short).
+ *
+ *	The caller is responsible for checking uniqueness of the generated
+ *	name and retrying as needed; retrying will be done by altering the
+ *	"label" string (which is why we never truncate that part).
+ */
+char *
+makeObjectName(const char *name1, const char *name2, const char *label)
+{
+	char	   *name;
+	int			overhead = 0;	/* chars needed for label and underscores */
+	int			availchars;		/* chars available for name(s) */
+	int			name1chars;		/* chars allocated to name1 */
+	int			name2chars;		/* chars allocated to name2 */
+	int			ndx;
+
+	name1chars = strlen(name1);
+	if (name2)
+	{
+		name2chars = strlen(name2);
+		overhead++;				/* allow for separating underscore */
+	}
+	else
+		name2chars = 0;
+	if (label)
+		overhead += strlen(label) + 1;
+
+	availchars = NAMEDATALEN - 1 - overhead;
+	Assert(availchars > 0);		/* else caller chose a bad label */
+
+	/*
+	 * If we must truncate, preferentially truncate the longer name. This
+	 * logic could be expressed without a loop, but it's simple and obvious as
+	 * a loop.
+	 */
+	while (name1chars + name2chars > availchars)
+	{
+		if (name1chars > name2chars)
+			name1chars--;
+		else
+			name2chars--;
+	}
+
+	name1chars = pg_mbcliplen(name1, name1chars, name1chars);
+	if (name2)
+		name2chars = pg_mbcliplen(name2, name2chars, name2chars);
+
+	/* Now construct the string using the chosen lengths */
+	name = palloc(name1chars + name2chars + overhead + 1);
+	memcpy(name, name1, name1chars);
+	ndx = name1chars;
+	if (name2)
+	{
+		name[ndx++] = '_';
+		memcpy(name + ndx, name2, name2chars);
+		ndx += name2chars;
+	}
+	if (label)
+	{
+		name[ndx++] = '_';
+		strcpy(name + ndx, label);
+	}
+	else
+		name[ndx] = '\0';
+
+	return name;
+}
+
+/*
+ * Select a nonconflicting name for a new relation.  This is ordinarily
+ * used to choose index names (which is why it's here) but it can also
+ * be used for sequences, or any autogenerated relation kind.
+ *
+ * name1, name2, and label are used the same way as for makeObjectName(),
+ * except that the label can't be NULL; digits will be appended to the label
+ * if needed to create a name that is unique within the specified namespace.
+ *
+ * If isconstraint is true, we also avoid choosing a name matching any
+ * existing constraint in the same namespace.  (This is stricter than what
+ * Postgres itself requires, but the SQL standard says that constraint names
+ * should be unique within schemas, so we follow that for autogenerated
+ * constraint names.)
+ *
+ * Note: it is theoretically possible to get a collision anyway, if someone
+ * else chooses the same name concurrently.  This is fairly unlikely to be
+ * a problem in practice, especially if one is holding an exclusive lock on
+ * the relation identified by name1.  However, if choosing multiple names
+ * within a single command, you'd better create the new object and do
+ * CommandCounterIncrement before choosing the next one!
+ *
+ * Returns a palloc'd string.
+ */
+char *
+ChooseRelationName(const char *name1, const char *name2,
+				   const char *label, Oid namespaceid,
+				   bool isconstraint)
+{
+	int			pass = 0;
+	char	   *relname = NULL;
+	char		modlabel[NAMEDATALEN];
+
+	/* try the unmodified label first */
+	strlcpy(modlabel, label, sizeof(modlabel));
+
+	for (;;)
+	{
+		relname = makeObjectName(name1, name2, modlabel);
+
+		if (!OidIsValid(get_relname_relid(relname, namespaceid)))
+		{
+			if (!isconstraint ||
+				!ConstraintNameExists(relname, namespaceid))
+				break;
+		}
+
+		/* found a conflict, so try a new name component */
+		pfree(relname);
+		snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
+	}
+
+	return relname;
+}
+
+/*
+ * Select the name to be used for an index.
+ *
+ * The argument list is pretty ad-hoc :-(
+ */
+static char *
+ChooseIndexName(const char *tabname, Oid namespaceId,
+				List *colnames, List *exclusionOpNames,
+				bool primary, bool isconstraint)
+{
+	char	   *indexname;
+
+	if (primary)
+	{
+		/* the primary key's name does not depend on the specific column(s) */
+		indexname = ChooseRelationName(tabname,
+									   NULL,
+									   "pkey",
+									   namespaceId,
+									   true);
+	}
+	else if (exclusionOpNames != NIL)
+	{
+		indexname = ChooseRelationName(tabname,
+									   ChooseIndexNameAddition(colnames),
+									   "excl",
+									   namespaceId,
+									   true);
+	}
+	else if (isconstraint)
+	{
+		indexname = ChooseRelationName(tabname,
+									   ChooseIndexNameAddition(colnames),
+									   "key",
+									   namespaceId,
+									   true);
+	}
+	else
+	{
+		indexname = ChooseRelationName(tabname,
+									   ChooseIndexNameAddition(colnames),
+									   "idx",
+									   namespaceId,
+									   false);
+	}
+
+	return indexname;
+}
+
+/*
+ * Generate "name2" for a new index given the list of column names for it
+ * (as produced by ChooseIndexColumnNames).  This will be passed to
+ * ChooseRelationName along with the parent table name and a suitable label.
+ *
+ * We know that less than NAMEDATALEN characters will actually be used,
+ * so we can truncate the result once we've generated that many.
+ *
+ * XXX See also ChooseForeignKeyConstraintNameAddition and
+ * ChooseExtendedStatisticNameAddition.
+ */
+static char *
+ChooseIndexNameAddition(List *colnames)
+{
+	char		buf[NAMEDATALEN * 2];
+	int			buflen = 0;
+	ListCell   *lc;
+
+	buf[0] = '\0';
+	foreach(lc, colnames)
+	{
+		const char *name = (const char *) lfirst(lc);
+
+		if (buflen > 0)
+			buf[buflen++] = '_';	/* insert _ between names */
+
+		/*
+		 * At this point we have buflen <= NAMEDATALEN.  name should be less
+		 * than NAMEDATALEN already, but use strlcpy for paranoia.
+		 */
+		strlcpy(buf + buflen, name, NAMEDATALEN);
+		buflen += strlen(buf + buflen);
+		if (buflen >= NAMEDATALEN)
+			break;
+	}
+	return pstrdup(buf);
+}
+
+/*
+ * Select the actual names to be used for the columns of an index, given the
+ * list of IndexElems for the columns.  This is mostly about ensuring the
+ * names are unique so we don't get a conflicting-attribute-names error.
+ *
+ * Returns a List of plain strings (char *, not String nodes).
+ */
+static List *
+ChooseIndexColumnNames(List *indexElems)
+{
+	List	   *result = NIL;
+	ListCell   *lc;
+
+	foreach(lc, indexElems)
+	{
+		IndexElem  *ielem = (IndexElem *) lfirst(lc);
+		const char *origname;
+		const char *curname;
+		int			i;
+		char		buf[NAMEDATALEN];
+
+		/* Get the preliminary name from the IndexElem */
+		if (ielem->indexcolname)
+			origname = ielem->indexcolname; /* caller-specified name */
+		else if (ielem->name)
+			origname = ielem->name; /* simple column reference */
+		else
+			origname = "expr";	/* default name for expression */
+
+		/* If it conflicts with any previous column, tweak it */
+		curname = origname;
+		for (i = 1;; i++)
+		{
+			ListCell   *lc2;
+			char		nbuf[32];
+			int			nlen;
+
+			foreach(lc2, result)
+			{
+				if (strcmp(curname, (char *) lfirst(lc2)) == 0)
+					break;
+			}
+			if (lc2 == NULL)
+				break;			/* found nonconflicting name */
+
+			sprintf(nbuf, "%d", i);
+
+			/* Ensure generated names are shorter than NAMEDATALEN */
+			nlen = pg_mbcliplen(origname, strlen(origname),
+								NAMEDATALEN - 1 - strlen(nbuf));
+			memcpy(buf, origname, nlen);
+			strcpy(buf + nlen, nbuf);
+			curname = buf;
+		}
+
+		/* And attach to the result list */
+		result = lappend(result, pstrdup(curname));
+	}
+	return result;
+}
+
+/*
+ * ExecReindex
+ *
+ * Primary entry point for manual REINDEX commands.  This is mainly a
+ * preparation wrapper for the real operations that will happen in
+ * each subroutine of REINDEX.
+ */
+void
+ExecReindex(ParseState *pstate, ReindexStmt *stmt, bool isTopLevel)
+{
+	ReindexParams params = {0};
+	ListCell   *lc;
+	bool		concurrently = false;
+	bool		verbose = false;
+	char	   *tablespacename = NULL;
+
+	/* Parse option list */
+	foreach(lc, stmt->params)
+	{
+		DefElem    *opt = (DefElem *) lfirst(lc);
+
+		if (strcmp(opt->defname, "verbose") == 0)
+			verbose = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "concurrently") == 0)
+			concurrently = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "tablespace") == 0)
+			tablespacename = defGetString(opt);
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized REINDEX option \"%s\"",
+							opt->defname),
+					 parser_errposition(pstate, opt->location)));
+	}
+
+	if (concurrently)
+		PreventInTransactionBlock(isTopLevel,
+								  "REINDEX CONCURRENTLY");
+
+	params.options =
+		(verbose ? REINDEXOPT_VERBOSE : 0) |
+		(concurrently ? REINDEXOPT_CONCURRENTLY : 0);
+
+	/*
+	 * Assign the tablespace OID to move indexes to, with InvalidOid to do
+	 * nothing.
+	 */
+	if (tablespacename != NULL)
+	{
+		params.tablespaceOid = get_tablespace_oid(tablespacename, false);
+
+		/* Check permissions except when moving to database's default */
+		if (OidIsValid(params.tablespaceOid) &&
+			params.tablespaceOid != MyDatabaseTableSpace)
+		{
+			AclResult	aclresult;
+
+			aclresult = pg_tablespace_aclcheck(params.tablespaceOid,
+											   GetUserId(), ACL_CREATE);
+			if (aclresult != ACLCHECK_OK)
+				aclcheck_error(aclresult, OBJECT_TABLESPACE,
+							   get_tablespace_name(params.tablespaceOid));
+		}
+	}
+	else
+		params.tablespaceOid = InvalidOid;
+
+	switch (stmt->kind)
+	{
+		case REINDEX_OBJECT_INDEX:
+			ReindexIndex(stmt->relation, &params, isTopLevel);
+			break;
+		case REINDEX_OBJECT_TABLE:
+			ReindexTable(stmt->relation, &params, isTopLevel);
+			break;
+		case REINDEX_OBJECT_SCHEMA:
+		case REINDEX_OBJECT_SYSTEM:
+		case REINDEX_OBJECT_DATABASE:
+
+			/*
+			 * This cannot run inside a user transaction block; if we were
+			 * inside a transaction, then its commit- and
+			 * start-transaction-command calls would not have the intended
+			 * effect!
+			 */
+			PreventInTransactionBlock(isTopLevel,
+									  (stmt->kind == REINDEX_OBJECT_SCHEMA) ? "REINDEX SCHEMA" :
+									  (stmt->kind == REINDEX_OBJECT_SYSTEM) ? "REINDEX SYSTEM" :
+									  "REINDEX DATABASE");
+			ReindexMultipleTables(stmt->name, stmt->kind, &params);
+			break;
+		default:
+			elog(ERROR, "unrecognized object type: %d",
+				 (int) stmt->kind);
+			break;
+	}
+}
+
+/*
+ * ReindexIndex
+ *		Recreate a specific index.
+ */
+static void
+ReindexIndex(RangeVar *indexRelation, ReindexParams *params, bool isTopLevel)
+{
+	struct ReindexIndexCallbackState state;
+	Oid			indOid;
+	char		persistence;
+	char		relkind;
+
+	/*
+	 * Find and lock index, and check permissions on table; use callback to
+	 * obtain lock on table first, to avoid deadlock hazard.  The lock level
+	 * used here must match the index lock obtained in reindex_index().
+	 *
+	 * If it's a temporary index, we will perform a non-concurrent reindex,
+	 * even if CONCURRENTLY was requested.  In that case, reindex_index() will
+	 * upgrade the lock, but that's OK, because other sessions can't hold
+	 * locks on our temporary table.
+	 */
+	state.params = *params;
+	state.locked_table_oid = InvalidOid;
+	indOid = RangeVarGetRelidExtended(indexRelation,
+									  (params->options & REINDEXOPT_CONCURRENTLY) != 0 ?
+									  ShareUpdateExclusiveLock : AccessExclusiveLock,
+									  0,
+									  RangeVarCallbackForReindexIndex,
+									  &state);
+
+	/*
+	 * Obtain the current persistence and kind of the existing index.  We
+	 * already hold a lock on the index.
+	 */
+	persistence = get_rel_persistence(indOid);
+	relkind = get_rel_relkind(indOid);
+
+	if (relkind == RELKIND_PARTITIONED_INDEX)
+		ReindexPartitions(indOid, params, isTopLevel);
+	else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
+			 persistence != RELPERSISTENCE_TEMP)
+		ReindexRelationConcurrently(indOid, params);
+	else
+	{
+		ReindexParams newparams = *params;
+
+		newparams.options |= REINDEXOPT_REPORT_PROGRESS;
+		reindex_index(indOid, false, persistence, &newparams);
+	}
+}
+
+/*
+ * Check permissions on table before acquiring relation lock; also lock
+ * the heap before the RangeVarGetRelidExtended takes the index lock, to avoid
+ * deadlocks.
+ */
+static void
+RangeVarCallbackForReindexIndex(const RangeVar *relation,
+								Oid relId, Oid oldRelId, void *arg)
+{
+	char		relkind;
+	struct ReindexIndexCallbackState *state = arg;
+	LOCKMODE	table_lockmode;
+
+	/*
+	 * Lock level here should match table lock in reindex_index() for
+	 * non-concurrent case and table locks used by index_concurrently_*() for
+	 * concurrent case.
+	 */
+	table_lockmode = (state->params.options & REINDEXOPT_CONCURRENTLY) != 0 ?
+		ShareUpdateExclusiveLock : ShareLock;
+
+	/*
+	 * If we previously locked some other index's heap, and the name we're
+	 * looking up no longer refers to that relation, release the now-useless
+	 * lock.
+	 */
+	if (relId != oldRelId && OidIsValid(oldRelId))
+	{
+		UnlockRelationOid(state->locked_table_oid, table_lockmode);
+		state->locked_table_oid = InvalidOid;
+	}
+
+	/* If the relation does not exist, there's nothing more to do. */
+	if (!OidIsValid(relId))
+		return;
+
+	/*
+	 * If the relation does exist, check whether it's an index.  But note that
+	 * the relation might have been dropped between the time we did the name
+	 * lookup and now.  In that case, there's nothing to do.
+	 */
+	relkind = get_rel_relkind(relId);
+	if (!relkind)
+		return;
+	if (relkind != RELKIND_INDEX &&
+		relkind != RELKIND_PARTITIONED_INDEX)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not an index", relation->relname)));
+
+	/* Check permissions */
+	if (!pg_class_ownercheck(relId, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_INDEX, relation->relname);
+
+	/* Lock heap before index to avoid deadlock. */
+	if (relId != oldRelId)
+	{
+		Oid			table_oid = IndexGetRelation(relId, true);
+
+		/*
+		 * If the OID isn't valid, it means the index was concurrently
+		 * dropped, which is not a problem for us; just return normally.
+		 */
+		if (OidIsValid(table_oid))
+		{
+			LockRelationOid(table_oid, table_lockmode);
+			state->locked_table_oid = table_oid;
+		}
+	}
+}
+
+/*
+ * ReindexTable
+ *		Recreate all indexes of a table (and of its toast table, if any)
+ */
+static Oid
+ReindexTable(RangeVar *relation, ReindexParams *params, bool isTopLevel)
+{
+	Oid			heapOid;
+	bool		result;
+
+	/*
+	 * The lock level used here should match reindex_relation().
+	 *
+	 * If it's a temporary table, we will perform a non-concurrent reindex,
+	 * even if CONCURRENTLY was requested.  In that case, reindex_relation()
+	 * will upgrade the lock, but that's OK, because other sessions can't hold
+	 * locks on our temporary table.
+	 */
+	heapOid = RangeVarGetRelidExtended(relation,
+									   (params->options & REINDEXOPT_CONCURRENTLY) != 0 ?
+									   ShareUpdateExclusiveLock : ShareLock,
+									   0,
+									   RangeVarCallbackOwnsTable, NULL);
+
+	if (get_rel_relkind(heapOid) == RELKIND_PARTITIONED_TABLE)
+		ReindexPartitions(heapOid, params, isTopLevel);
+	else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
+			 get_rel_persistence(heapOid) != RELPERSISTENCE_TEMP)
+	{
+		result = ReindexRelationConcurrently(heapOid, params);
+
+		if (!result)
+			ereport(NOTICE,
+					(errmsg("table \"%s\" has no indexes that can be reindexed concurrently",
+							relation->relname)));
+	}
+	else
+	{
+		ReindexParams newparams = *params;
+
+		newparams.options |= REINDEXOPT_REPORT_PROGRESS;
+		result = reindex_relation(heapOid,
+								  REINDEX_REL_PROCESS_TOAST |
+								  REINDEX_REL_CHECK_CONSTRAINTS,
+								  &newparams);
+		if (!result)
+			ereport(NOTICE,
+					(errmsg("table \"%s\" has no indexes to reindex",
+							relation->relname)));
+	}
+
+	return heapOid;
+}
+
+/*
+ * ReindexMultipleTables
+ *		Recreate indexes of tables selected by objectName/objectKind.
+ *
+ * To reduce the probability of deadlocks, each table is reindexed in a
+ * separate transaction, so we can release the lock on it right away.
+ * That means this must not be called within a user transaction block!
+ */
+static void
+ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
+					  ReindexParams *params)
+{
+	Oid			objectOid;
+	Relation	relationRelation;
+	TableScanDesc scan;
+	ScanKeyData scan_keys[1];
+	HeapTuple	tuple;
+	MemoryContext private_context;
+	MemoryContext old;
+	List	   *relids = NIL;
+	int			num_keys;
+	bool		concurrent_warning = false;
+	bool		tablespace_warning = false;
+
+	AssertArg(objectName);
+	Assert(objectKind == REINDEX_OBJECT_SCHEMA ||
+		   objectKind == REINDEX_OBJECT_SYSTEM ||
+		   objectKind == REINDEX_OBJECT_DATABASE);
+
+	if (objectKind == REINDEX_OBJECT_SYSTEM &&
+		(params->options & REINDEXOPT_CONCURRENTLY) != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot reindex system catalogs concurrently")));
+
+	/*
+	 * Get OID of object to reindex, being the database currently being used
+	 * by session for a database or for system catalogs, or the schema defined
+	 * by caller. At the same time do permission checks that need different
+	 * processing depending on the object type.
+	 */
+	if (objectKind == REINDEX_OBJECT_SCHEMA)
+	{
+		objectOid = get_namespace_oid(objectName, false);
+
+		if (!pg_namespace_ownercheck(objectOid, GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA,
+						   objectName);
+	}
+	else
+	{
+		objectOid = MyDatabaseId;
+
+		if (strcmp(objectName, get_database_name(objectOid)) != 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("can only reindex the currently open database")));
+		if (!pg_database_ownercheck(objectOid, GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+						   objectName);
+	}
+
+	/*
+	 * Create a memory context that will survive forced transaction commits we
+	 * do below.  Since it is a child of PortalContext, it will go away
+	 * eventually even if we suffer an error; there's no need for special
+	 * abort cleanup logic.
+	 */
+	private_context = AllocSetContextCreate(PortalContext,
+											"ReindexMultipleTables",
+											ALLOCSET_SMALL_SIZES);
+
+	/*
+	 * Define the search keys to find the objects to reindex. For a schema, we
+	 * select target relations using relnamespace, something not necessary for
+	 * a database-wide operation.
+	 */
+	if (objectKind == REINDEX_OBJECT_SCHEMA)
+	{
+		num_keys = 1;
+		ScanKeyInit(&scan_keys[0],
+					Anum_pg_class_relnamespace,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(objectOid));
+	}
+	else
+		num_keys = 0;
+
+	/*
+	 * Scan pg_class to build a list of the relations we need to reindex.
+	 *
+	 * We only consider plain relations and materialized views here (toast
+	 * rels will be processed indirectly by reindex_relation).
+	 */
+	relationRelation = table_open(RelationRelationId, AccessShareLock);
+	scan = table_beginscan_catalog(relationRelation, num_keys, scan_keys);
+	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	{
+		Form_pg_class classtuple = (Form_pg_class) GETSTRUCT(tuple);
+		Oid			relid = classtuple->oid;
+
+		/*
+		 * Only regular tables and matviews can have indexes, so ignore any
+		 * other kind of relation.
+		 *
+		 * Partitioned tables/indexes are skipped but matching leaf partitions
+		 * are processed.
+		 */
+		if (classtuple->relkind != RELKIND_RELATION &&
+			classtuple->relkind != RELKIND_MATVIEW)
+			continue;
+
+		/* Skip temp tables of other backends; we can't reindex them at all */
+		if (classtuple->relpersistence == RELPERSISTENCE_TEMP &&
+			!isTempNamespace(classtuple->relnamespace))
+			continue;
+
+		/* Check user/system classification, and optionally skip */
+		if (objectKind == REINDEX_OBJECT_SYSTEM &&
+			!IsSystemClass(relid, classtuple))
+			continue;
+
+		/*
+		 * The table can be reindexed if the user is superuser, the table
+		 * owner, or the database/schema owner (but in the latter case, only
+		 * if it's not a shared relation).  pg_class_ownercheck includes the
+		 * superuser case, and depending on objectKind we already know that
+		 * the user has permission to run REINDEX on this database or schema
+		 * per the permission checks at the beginning of this routine.
+		 */
+		if (classtuple->relisshared &&
+			!pg_class_ownercheck(relid, GetUserId()))
+			continue;
+
+		/*
+		 * Skip system tables, since index_create() would reject indexing them
+		 * concurrently (and it would likely fail if we tried).
+		 */
+		if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
+			IsCatalogRelationOid(relid))
+		{
+			if (!concurrent_warning)
+				ereport(WARNING,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot reindex system catalogs concurrently, skipping all")));
+			concurrent_warning = true;
+			continue;
+		}
+
+		/*
+		 * If a new tablespace is set, check if this relation has to be
+		 * skipped.
+		 */
+		if (OidIsValid(params->tablespaceOid))
+		{
+			bool		skip_rel = false;
+
+			/*
+			 * Mapped relations cannot be moved to different tablespaces (in
+			 * particular this eliminates all shared catalogs.).
+			 */
+			if (RELKIND_HAS_STORAGE(classtuple->relkind) &&
+				!OidIsValid(classtuple->relfilenode))
+				skip_rel = true;
+
+			/*
+			 * A system relation is always skipped, even with
+			 * allow_system_table_mods enabled.
+			 */
+			if (IsSystemClass(relid, classtuple))
+				skip_rel = true;
+
+			if (skip_rel)
+			{
+				if (!tablespace_warning)
+					ereport(WARNING,
+							(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+							 errmsg("cannot move system relations, skipping all")));
+				tablespace_warning = true;
+				continue;
+			}
+		}
+
+		/* Save the list of relation OIDs in private context */
+		old = MemoryContextSwitchTo(private_context);
+
+		/*
+		 * We always want to reindex pg_class first if it's selected to be
+		 * reindexed.  This ensures that if there is any corruption in
+		 * pg_class' indexes, they will be fixed before we process any other
+		 * tables.  This is critical because reindexing itself will try to
+		 * update pg_class.
+		 */
+		if (relid == RelationRelationId)
+			relids = lcons_oid(relid, relids);
+		else
+			relids = lappend_oid(relids, relid);
+
+		MemoryContextSwitchTo(old);
+	}
+	table_endscan(scan);
+	table_close(relationRelation, AccessShareLock);
+
+	/*
+	 * Process each relation listed in a separate transaction.  Note that this
+	 * commits and then starts a new transaction immediately.
+	 */
+	ReindexMultipleInternal(relids, params);
+
+	MemoryContextDelete(private_context);
+}
+
+/*
+ * Error callback specific to ReindexPartitions().
+ */
+static void
+reindex_error_callback(void *arg)
+{
+	ReindexErrorInfo *errinfo = (ReindexErrorInfo *) arg;
+
+	Assert(RELKIND_HAS_PARTITIONS(errinfo->relkind));
+
+	if (errinfo->relkind == RELKIND_PARTITIONED_TABLE)
+		errcontext("while reindexing partitioned table \"%s.%s\"",
+				   errinfo->relnamespace, errinfo->relname);
+	else if (errinfo->relkind == RELKIND_PARTITIONED_INDEX)
+		errcontext("while reindexing partitioned index \"%s.%s\"",
+				   errinfo->relnamespace, errinfo->relname);
+}
+
+/*
+ * ReindexPartitions
+ *
+ * Reindex a set of partitions, per the partitioned index or table given
+ * by the caller.
+ */
+static void
+ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
+{
+	List	   *partitions = NIL;
+	char		relkind = get_rel_relkind(relid);
+	char	   *relname = get_rel_name(relid);
+	char	   *relnamespace = get_namespace_name(get_rel_namespace(relid));
+	MemoryContext reindex_context;
+	List	   *inhoids;
+	ListCell   *lc;
+	ErrorContextCallback errcallback;
+	ReindexErrorInfo errinfo;
+
+	Assert(RELKIND_HAS_PARTITIONS(relkind));
+
+	/*
+	 * Check if this runs in a transaction block, with an error callback to
+	 * provide more context under which a problem happens.
+	 */
+	errinfo.relname = pstrdup(relname);
+	errinfo.relnamespace = pstrdup(relnamespace);
+	errinfo.relkind = relkind;
+	errcallback.callback = reindex_error_callback;
+	errcallback.arg = (void *) &errinfo;
+	errcallback.previous = error_context_stack;
+	error_context_stack = &errcallback;
+
+	PreventInTransactionBlock(isTopLevel,
+							  relkind == RELKIND_PARTITIONED_TABLE ?
+							  "REINDEX TABLE" : "REINDEX INDEX");
+
+	/* Pop the error context stack */
+	error_context_stack = errcallback.previous;
+
+	/*
+	 * Create special memory context for cross-transaction storage.
+	 *
+	 * Since it is a child of PortalContext, it will go away eventually even
+	 * if we suffer an error so there is no need for special abort cleanup
+	 * logic.
+	 */
+	reindex_context = AllocSetContextCreate(PortalContext, "Reindex",
+											ALLOCSET_DEFAULT_SIZES);
+
+	/* ShareLock is enough to prevent schema modifications */
+	inhoids = find_all_inheritors(relid, ShareLock, NULL);
+
+	/*
+	 * The list of relations to reindex are the physical partitions of the
+	 * tree so discard any partitioned table or index.
+	 */
+	foreach(lc, inhoids)
+	{
+		Oid			partoid = lfirst_oid(lc);
+		char		partkind = get_rel_relkind(partoid);
+		MemoryContext old_context;
+
+		/*
+		 * This discards partitioned tables, partitioned indexes and foreign
+		 * tables.
+		 */
+		if (!RELKIND_HAS_STORAGE(partkind))
+			continue;
+
+		Assert(partkind == RELKIND_INDEX ||
+			   partkind == RELKIND_RELATION);
+
+		/* Save partition OID */
+		old_context = MemoryContextSwitchTo(reindex_context);
+		partitions = lappend_oid(partitions, partoid);
+		MemoryContextSwitchTo(old_context);
+	}
+
+	/*
+	 * Process each partition listed in a separate transaction.  Note that
+	 * this commits and then starts a new transaction immediately.
+	 */
+	ReindexMultipleInternal(partitions, params);
+
+	/*
+	 * Clean up working storage --- note we must do this after
+	 * StartTransactionCommand, else we might be trying to delete the active
+	 * context!
+	 */
+	MemoryContextDelete(reindex_context);
+}
+
+/*
+ * ReindexMultipleInternal
+ *
+ * Reindex a list of relations, each one being processed in its own
+ * transaction.  This commits the existing transaction immediately,
+ * and starts a new transaction when finished.
+ */
+static void
+ReindexMultipleInternal(List *relids, ReindexParams *params)
+{
+	ListCell   *l;
+
+	PopActiveSnapshot();
+	CommitTransactionCommand();
+
+	foreach(l, relids)
+	{
+		Oid			relid = lfirst_oid(l);
+		char		relkind;
+		char		relpersistence;
+
+		StartTransactionCommand();
+
+		/* functions in indexes may want a snapshot set */
+		PushActiveSnapshot(GetTransactionSnapshot());
+
+		/* check if the relation still exists */
+		if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(relid)))
+		{
+			PopActiveSnapshot();
+			CommitTransactionCommand();
+			continue;
+		}
+
+		/*
+		 * Check permissions except when moving to database's default if a new
+		 * tablespace is chosen.  Note that this check also happens in
+		 * ExecReindex(), but we do an extra check here as this runs across
+		 * multiple transactions.
+		 */
+		if (OidIsValid(params->tablespaceOid) &&
+			params->tablespaceOid != MyDatabaseTableSpace)
+		{
+			AclResult	aclresult;
+
+			aclresult = pg_tablespace_aclcheck(params->tablespaceOid,
+											   GetUserId(), ACL_CREATE);
+			if (aclresult != ACLCHECK_OK)
+				aclcheck_error(aclresult, OBJECT_TABLESPACE,
+							   get_tablespace_name(params->tablespaceOid));
+		}
+
+		relkind = get_rel_relkind(relid);
+		relpersistence = get_rel_persistence(relid);
+
+		/*
+		 * Partitioned tables and indexes can never be processed directly, and
+		 * a list of their leaves should be built first.
+		 */
+		Assert(!RELKIND_HAS_PARTITIONS(relkind));
+
+		if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
+			relpersistence != RELPERSISTENCE_TEMP)
+		{
+			ReindexParams newparams = *params;
+
+			newparams.options |= REINDEXOPT_MISSING_OK;
+			(void) ReindexRelationConcurrently(relid, &newparams);
+			/* ReindexRelationConcurrently() does the verbose output */
+		}
+		else if (relkind == RELKIND_INDEX)
+		{
+			ReindexParams newparams = *params;
+
+			newparams.options |=
+				REINDEXOPT_REPORT_PROGRESS | REINDEXOPT_MISSING_OK;
+			reindex_index(relid, false, relpersistence, &newparams);
+			PopActiveSnapshot();
+			/* reindex_index() does the verbose output */
+		}
+		else
+		{
+			bool		result;
+			ReindexParams newparams = *params;
+
+			newparams.options |=
+				REINDEXOPT_REPORT_PROGRESS | REINDEXOPT_MISSING_OK;
+			result = reindex_relation(relid,
+									  REINDEX_REL_PROCESS_TOAST |
+									  REINDEX_REL_CHECK_CONSTRAINTS,
+									  &newparams);
+
+			if (result && (params->options & REINDEXOPT_VERBOSE) != 0)
+				ereport(INFO,
+						(errmsg("table \"%s.%s\" was reindexed",
+								get_namespace_name(get_rel_namespace(relid)),
+								get_rel_name(relid))));
+
+			PopActiveSnapshot();
+		}
+
+		CommitTransactionCommand();
+	}
+
+	StartTransactionCommand();
+}
+
+
+/*
+ * ReindexRelationConcurrently - process REINDEX CONCURRENTLY for given
+ * relation OID
+ *
+ * 'relationOid' can either belong to an index, a table or a materialized
+ * view.  For tables and materialized views, all its indexes will be rebuilt,
+ * excluding invalid indexes and any indexes used in exclusion constraints,
+ * but including its associated toast table indexes.  For indexes, the index
+ * itself will be rebuilt.
+ *
+ * The locks taken on parent tables and involved indexes are kept until the
+ * transaction is committed, at which point a session lock is taken on each
+ * relation.  Both of these protect against concurrent schema changes.
+ *
+ * Returns true if any indexes have been rebuilt (including toast table's
+ * indexes, when relevant), otherwise returns false.
+ *
+ * NOTE: This cannot be used on temporary relations.  A concurrent build would
+ * cause issues with ON COMMIT actions triggered by the transactions of the
+ * concurrent build.  Temporary relations are not subject to concurrent
+ * concerns, so there's no need for the more complicated concurrent build,
+ * anyway, and a non-concurrent reindex is more efficient.
+ */
+static bool
+ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
+{
+	typedef struct ReindexIndexInfo
+	{
+		Oid			indexId;
+		Oid			tableId;
+		Oid			amId;
+		bool		safe;		/* for set_indexsafe_procflags */
+	} ReindexIndexInfo;
+	List	   *heapRelationIds = NIL;
+	List	   *indexIds = NIL;
+	List	   *newIndexIds = NIL;
+	List	   *relationLocks = NIL;
+	List	   *lockTags = NIL;
+	ListCell   *lc,
+			   *lc2;
+	MemoryContext private_context;
+	MemoryContext oldcontext;
+	char		relkind;
+	char	   *relationName = NULL;
+	char	   *relationNamespace = NULL;
+	PGRUsage	ru0;
+	const int	progress_index[] = {
+		PROGRESS_CREATEIDX_COMMAND,
+		PROGRESS_CREATEIDX_PHASE,
+		PROGRESS_CREATEIDX_INDEX_OID,
+		PROGRESS_CREATEIDX_ACCESS_METHOD_OID
+	};
+	int64		progress_vals[4];
+
+	/*
+	 * Create a memory context that will survive forced transaction commits we
+	 * do below.  Since it is a child of PortalContext, it will go away
+	 * eventually even if we suffer an error; there's no need for special
+	 * abort cleanup logic.
+	 */
+	private_context = AllocSetContextCreate(PortalContext,
+											"ReindexConcurrent",
+											ALLOCSET_SMALL_SIZES);
+
+	if ((params->options & REINDEXOPT_VERBOSE) != 0)
+	{
+		/* Save data needed by REINDEX VERBOSE in private context */
+		oldcontext = MemoryContextSwitchTo(private_context);
+
+		relationName = get_rel_name(relationOid);
+		relationNamespace = get_namespace_name(get_rel_namespace(relationOid));
+
+		pg_rusage_init(&ru0);
+
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	relkind = get_rel_relkind(relationOid);
+
+	/*
+	 * Extract the list of indexes that are going to be rebuilt based on the
+	 * relation Oid given by caller.
+	 */
+	switch (relkind)
+	{
+		case RELKIND_RELATION:
+		case RELKIND_MATVIEW:
+		case RELKIND_TOASTVALUE:
+			{
+				/*
+				 * In the case of a relation, find all its indexes including
+				 * toast indexes.
+				 */
+				Relation	heapRelation;
+
+				/* Save the list of relation OIDs in private context */
+				oldcontext = MemoryContextSwitchTo(private_context);
+
+				/* Track this relation for session locks */
+				heapRelationIds = lappend_oid(heapRelationIds, relationOid);
+
+				MemoryContextSwitchTo(oldcontext);
+
+				if (IsCatalogRelationOid(relationOid))
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot reindex system catalogs concurrently")));
+
+				/* Open relation to get its indexes */
+				if ((params->options & REINDEXOPT_MISSING_OK) != 0)
+				{
+					heapRelation = try_table_open(relationOid,
+												  ShareUpdateExclusiveLock);
+					/* leave if relation does not exist */
+					if (!heapRelation)
+						break;
+				}
+				else
+					heapRelation = table_open(relationOid,
+											  ShareUpdateExclusiveLock);
+
+				if (OidIsValid(params->tablespaceOid) &&
+					IsSystemRelation(heapRelation))
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot move system relation \"%s\"",
+									RelationGetRelationName(heapRelation))));
+
+				/* Add all the valid indexes of relation to list */
+				foreach(lc, RelationGetIndexList(heapRelation))
+				{
+					Oid			cellOid = lfirst_oid(lc);
+					Relation	indexRelation = index_open(cellOid,
+														   ShareUpdateExclusiveLock);
+
+					if (!indexRelation->rd_index->indisvalid)
+						ereport(WARNING,
+								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+								 errmsg("cannot reindex invalid index \"%s.%s\" concurrently, skipping",
+										get_namespace_name(get_rel_namespace(cellOid)),
+										get_rel_name(cellOid))));
+					else if (indexRelation->rd_index->indisexclusion)
+						ereport(WARNING,
+								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+								 errmsg("cannot reindex exclusion constraint index \"%s.%s\" concurrently, skipping",
+										get_namespace_name(get_rel_namespace(cellOid)),
+										get_rel_name(cellOid))));
+					else
+					{
+						ReindexIndexInfo *idx;
+
+						/* Save the list of relation OIDs in private context */
+						oldcontext = MemoryContextSwitchTo(private_context);
+
+						idx = palloc(sizeof(ReindexIndexInfo));
+						idx->indexId = cellOid;
+						/* other fields set later */
+
+						indexIds = lappend(indexIds, idx);
+
+						MemoryContextSwitchTo(oldcontext);
+					}
+
+					index_close(indexRelation, NoLock);
+				}
+
+				/* Also add the toast indexes */
+				if (OidIsValid(heapRelation->rd_rel->reltoastrelid))
+				{
+					Oid			toastOid = heapRelation->rd_rel->reltoastrelid;
+					Relation	toastRelation = table_open(toastOid,
+														   ShareUpdateExclusiveLock);
+
+					/* Save the list of relation OIDs in private context */
+					oldcontext = MemoryContextSwitchTo(private_context);
+
+					/* Track this relation for session locks */
+					heapRelationIds = lappend_oid(heapRelationIds, toastOid);
+
+					MemoryContextSwitchTo(oldcontext);
+
+					foreach(lc2, RelationGetIndexList(toastRelation))
+					{
+						Oid			cellOid = lfirst_oid(lc2);
+						Relation	indexRelation = index_open(cellOid,
+															   ShareUpdateExclusiveLock);
+
+						if (!indexRelation->rd_index->indisvalid)
+							ereport(WARNING,
+									(errcode(ERRCODE_INDEX_CORRUPTED),
+									 errmsg("cannot reindex invalid index \"%s.%s\" concurrently, skipping",
+											get_namespace_name(get_rel_namespace(cellOid)),
+											get_rel_name(cellOid))));
+						else
+						{
+							ReindexIndexInfo *idx;
+
+							/*
+							 * Save the list of relation OIDs in private
+							 * context
+							 */
+							oldcontext = MemoryContextSwitchTo(private_context);
+
+							idx = palloc(sizeof(ReindexIndexInfo));
+							idx->indexId = cellOid;
+							indexIds = lappend(indexIds, idx);
+							/* other fields set later */
+
+							MemoryContextSwitchTo(oldcontext);
+						}
+
+						index_close(indexRelation, NoLock);
+					}
+
+					table_close(toastRelation, NoLock);
+				}
+
+				table_close(heapRelation, NoLock);
+				break;
+			}
+		case RELKIND_INDEX:
+			{
+				Oid			heapId = IndexGetRelation(relationOid,
+													  (params->options & REINDEXOPT_MISSING_OK) != 0);
+				Relation	heapRelation;
+				ReindexIndexInfo *idx;
+
+				/* if relation is missing, leave */
+				if (!OidIsValid(heapId))
+					break;
+
+				if (IsCatalogRelationOid(heapId))
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot reindex system catalogs concurrently")));
+
+				/*
+				 * Don't allow reindex for an invalid index on TOAST table, as
+				 * if rebuilt it would not be possible to drop it.  Match
+				 * error message in reindex_index().
+				 */
+				if (IsToastNamespace(get_rel_namespace(relationOid)) &&
+					!get_index_isvalid(relationOid))
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot reindex invalid index on TOAST table")));
+
+				/*
+				 * Check if parent relation can be locked and if it exists,
+				 * this needs to be done at this stage as the list of indexes
+				 * to rebuild is not complete yet, and REINDEXOPT_MISSING_OK
+				 * should not be used once all the session locks are taken.
+				 */
+				if ((params->options & REINDEXOPT_MISSING_OK) != 0)
+				{
+					heapRelation = try_table_open(heapId,
+												  ShareUpdateExclusiveLock);
+					/* leave if relation does not exist */
+					if (!heapRelation)
+						break;
+				}
+				else
+					heapRelation = table_open(heapId,
+											  ShareUpdateExclusiveLock);
+
+				if (OidIsValid(params->tablespaceOid) &&
+					IsSystemRelation(heapRelation))
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot move system relation \"%s\"",
+									get_rel_name(relationOid))));
+
+				table_close(heapRelation, NoLock);
+
+				/* Save the list of relation OIDs in private context */
+				oldcontext = MemoryContextSwitchTo(private_context);
+
+				/* Track the heap relation of this index for session locks */
+				heapRelationIds = list_make1_oid(heapId);
+
+				/*
+				 * Save the list of relation OIDs in private context.  Note
+				 * that invalid indexes are allowed here.
+				 */
+				idx = palloc(sizeof(ReindexIndexInfo));
+				idx->indexId = relationOid;
+				indexIds = lappend(indexIds, idx);
+				/* other fields set later */
+
+				MemoryContextSwitchTo(oldcontext);
+				break;
+			}
+
+		case RELKIND_PARTITIONED_TABLE:
+		case RELKIND_PARTITIONED_INDEX:
+		default:
+			/* Return error if type of relation is not supported */
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot reindex this type of relation concurrently")));
+			break;
+	}
+
+	/*
+	 * Definitely no indexes, so leave.  Any checks based on
+	 * REINDEXOPT_MISSING_OK should be done only while the list of indexes to
+	 * work on is built as the session locks taken before this transaction
+	 * commits will make sure that they cannot be dropped by a concurrent
+	 * session until this operation completes.
+	 */
+	if (indexIds == NIL)
+	{
+		PopActiveSnapshot();
+		return false;
+	}
+
+	/* It's not a shared catalog, so refuse to move it to shared tablespace */
+	if (params->tablespaceOid == GLOBALTABLESPACE_OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot move non-shared relation to tablespace \"%s\"",
+						get_tablespace_name(params->tablespaceOid))));
+
+	Assert(heapRelationIds != NIL);
+
+	/*-----
+	 * Now we have all the indexes we want to process in indexIds.
+	 *
+	 * The phases now are:
+	 *
+	 * 1. create new indexes in the catalog
+	 * 2. build new indexes
+	 * 3. let new indexes catch up with tuples inserted in the meantime
+	 * 4. swap index names
+	 * 5. mark old indexes as dead
+	 * 6. drop old indexes
+	 *
+	 * We process each phase for all indexes before moving to the next phase,
+	 * for efficiency.
+	 */
+
+	/*
+	 * Phase 1 of REINDEX CONCURRENTLY
+	 *
+	 * Create a new index with the same properties as the old one, but it is
+	 * only registered in catalogs and will be built later.  Then get session
+	 * locks on all involved tables.  See analogous code in DefineIndex() for
+	 * more detailed comments.
+	 */
+
+	foreach(lc, indexIds)
+	{
+		char	   *concurrentName;
+		ReindexIndexInfo *idx = lfirst(lc);
+		ReindexIndexInfo *newidx;
+		Oid			newIndexId;
+		Relation	indexRel;
+		Relation	heapRel;
+		Oid			save_userid;
+		int			save_sec_context;
+		int			save_nestlevel;
+		Relation	newIndexRel;
+		LockRelId  *lockrelid;
+		Oid			tablespaceid;
+
+		indexRel = index_open(idx->indexId, ShareUpdateExclusiveLock);
+		heapRel = table_open(indexRel->rd_index->indrelid,
+							 ShareUpdateExclusiveLock);
+
+		/*
+		 * Switch to the table owner's userid, so that any index functions are
+		 * run as that user.  Also lock down security-restricted operations
+		 * and arrange to make GUC variable changes local to this command.
+		 */
+		GetUserIdAndSecContext(&save_userid, &save_sec_context);
+		SetUserIdAndSecContext(heapRel->rd_rel->relowner,
+							   save_sec_context | SECURITY_RESTRICTED_OPERATION);
+		save_nestlevel = NewGUCNestLevel();
+
+		/* determine safety of this index for set_indexsafe_procflags */
+		idx->safe = (indexRel->rd_indexprs == NIL &&
+					 indexRel->rd_indpred == NIL);
+		idx->tableId = RelationGetRelid(heapRel);
+		idx->amId = indexRel->rd_rel->relam;
+
+		/* This function shouldn't be called for temporary relations. */
+		if (indexRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+			elog(ERROR, "cannot reindex a temporary table concurrently");
+
+		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+									  idx->tableId);
+
+		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
+		progress_vals[1] = 0;	/* initializing */
+		progress_vals[2] = idx->indexId;
+		progress_vals[3] = idx->amId;
+		pgstat_progress_update_multi_param(4, progress_index, progress_vals);
+
+		/* Choose a temporary relation name for the new index */
+		concurrentName = ChooseRelationName(get_rel_name(idx->indexId),
+											NULL,
+											"ccnew",
+											get_rel_namespace(indexRel->rd_index->indrelid),
+											false);
+
+		/* Choose the new tablespace, indexes of toast tables are not moved */
+		if (OidIsValid(params->tablespaceOid) &&
+			heapRel->rd_rel->relkind != RELKIND_TOASTVALUE)
+			tablespaceid = params->tablespaceOid;
+		else
+			tablespaceid = indexRel->rd_rel->reltablespace;
+
+		/* Create new index definition based on given index */
+		newIndexId = index_concurrently_create_copy(heapRel,
+													idx->indexId,
+													tablespaceid,
+													concurrentName);
+
+		/*
+		 * Now open the relation of the new index, a session-level lock is
+		 * also needed on it.
+		 */
+		newIndexRel = index_open(newIndexId, ShareUpdateExclusiveLock);
+
+		/*
+		 * Save the list of OIDs and locks in private context
+		 */
+		oldcontext = MemoryContextSwitchTo(private_context);
+
+		newidx = palloc(sizeof(ReindexIndexInfo));
+		newidx->indexId = newIndexId;
+		newidx->safe = idx->safe;
+		newidx->tableId = idx->tableId;
+		newidx->amId = idx->amId;
+
+		newIndexIds = lappend(newIndexIds, newidx);
+
+		/*
+		 * Save lockrelid to protect each relation from drop then close
+		 * relations. The lockrelid on parent relation is not taken here to
+		 * avoid multiple locks taken on the same relation, instead we rely on
+		 * parentRelationIds built earlier.
+		 */
+		lockrelid = palloc(sizeof(*lockrelid));
+		*lockrelid = indexRel->rd_lockInfo.lockRelId;
+		relationLocks = lappend(relationLocks, lockrelid);
+		lockrelid = palloc(sizeof(*lockrelid));
+		*lockrelid = newIndexRel->rd_lockInfo.lockRelId;
+		relationLocks = lappend(relationLocks, lockrelid);
+
+		MemoryContextSwitchTo(oldcontext);
+
+		index_close(indexRel, NoLock);
+		index_close(newIndexRel, NoLock);
+
+		/* Roll back any GUC changes executed by index functions */
+		AtEOXact_GUC(false, save_nestlevel);
+
+		/* Restore userid and security context */
+		SetUserIdAndSecContext(save_userid, save_sec_context);
+
+		table_close(heapRel, NoLock);
+	}
+
+	/*
+	 * Save the heap lock for following visibility checks with other backends
+	 * might conflict with this session.
+	 */
+	foreach(lc, heapRelationIds)
+	{
+		Relation	heapRelation = table_open(lfirst_oid(lc), ShareUpdateExclusiveLock);
+		LockRelId  *lockrelid;
+		LOCKTAG    *heaplocktag;
+
+		/* Save the list of locks in private context */
+		oldcontext = MemoryContextSwitchTo(private_context);
+
+		/* Add lockrelid of heap relation to the list of locked relations */
+		lockrelid = palloc(sizeof(*lockrelid));
+		*lockrelid = heapRelation->rd_lockInfo.lockRelId;
+		relationLocks = lappend(relationLocks, lockrelid);
+
+		heaplocktag = (LOCKTAG *) palloc(sizeof(LOCKTAG));
+
+		/* Save the LOCKTAG for this parent relation for the wait phase */
+		SET_LOCKTAG_RELATION(*heaplocktag, lockrelid->dbId, lockrelid->relId);
+		lockTags = lappend(lockTags, heaplocktag);
+
+		MemoryContextSwitchTo(oldcontext);
+
+		/* Close heap relation */
+		table_close(heapRelation, NoLock);
+	}
+
+	/* Get a session-level lock on each table. */
+	foreach(lc, relationLocks)
+	{
+		LockRelId  *lockrelid = (LockRelId *) lfirst(lc);
+
+		LockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
+	}
+
+	PopActiveSnapshot();
+	CommitTransactionCommand();
+	StartTransactionCommand();
+
+	/*
+	 * Because we don't take a snapshot in this transaction, there's no need
+	 * to set the PROC_IN_SAFE_IC flag here.
+	 */
+
+	/*
+	 * Phase 2 of REINDEX CONCURRENTLY
+	 *
+	 * Build the new indexes in a separate transaction for each index to avoid
+	 * having open transactions for an unnecessary long time.  But before
+	 * doing that, wait until no running transactions could have the table of
+	 * the index open with the old list of indexes.  See "phase 2" in
+	 * DefineIndex() for more details.
+	 */
+
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
+								 PROGRESS_CREATEIDX_PHASE_WAIT_1);
+	WaitForLockersMultiple(lockTags, ShareLock, true);
+	CommitTransactionCommand();
+
+	foreach(lc, newIndexIds)
+	{
+		ReindexIndexInfo *newidx = lfirst(lc);
+
+		/* Start new transaction for this index's concurrent build */
+		StartTransactionCommand();
+
+		/*
+		 * Check for user-requested abort.  This is inside a transaction so as
+		 * xact.c does not issue a useless WARNING, and ensures that
+		 * session-level locks are cleaned up on abort.
+		 */
+		CHECK_FOR_INTERRUPTS();
+
+		/* Tell concurrent indexing to ignore us, if index qualifies */
+		if (newidx->safe)
+			set_indexsafe_procflags();
+
+		/* Set ActiveSnapshot since functions in the indexes may need it */
+		PushActiveSnapshot(GetTransactionSnapshot());
+
+		/*
+		 * Update progress for the index to build, with the correct parent
+		 * table involved.
+		 */
+		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId);
+		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
+		progress_vals[1] = PROGRESS_CREATEIDX_PHASE_BUILD;
+		progress_vals[2] = newidx->indexId;
+		progress_vals[3] = newidx->amId;
+		pgstat_progress_update_multi_param(4, progress_index, progress_vals);
+
+		/* Perform concurrent build of new index */
+		index_concurrently_build(newidx->tableId, newidx->indexId);
+
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+	}
+
+	StartTransactionCommand();
+
+	/*
+	 * Because we don't take a snapshot or Xid in this transaction, there's no
+	 * need to set the PROC_IN_SAFE_IC flag here.
+	 */
+
+	/*
+	 * Phase 3 of REINDEX CONCURRENTLY
+	 *
+	 * During this phase the old indexes catch up with any new tuples that
+	 * were created during the previous phase.  See "phase 3" in DefineIndex()
+	 * for more details.
+	 */
+
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
+								 PROGRESS_CREATEIDX_PHASE_WAIT_2);
+	WaitForLockersMultiple(lockTags, ShareLock, true);
+	CommitTransactionCommand();
+
+	foreach(lc, newIndexIds)
+	{
+		ReindexIndexInfo *newidx = lfirst(lc);
+		TransactionId limitXmin;
+		Snapshot	snapshot;
+
+		StartTransactionCommand();
+
+		/*
+		 * Check for user-requested abort.  This is inside a transaction so as
+		 * xact.c does not issue a useless WARNING, and ensures that
+		 * session-level locks are cleaned up on abort.
+		 */
+		CHECK_FOR_INTERRUPTS();
+
+		/* Tell concurrent indexing to ignore us, if index qualifies */
+		if (newidx->safe)
+			set_indexsafe_procflags();
+
+		/*
+		 * Take the "reference snapshot" that will be used by validate_index()
+		 * to filter candidate tuples.
+		 */
+		snapshot = RegisterSnapshot(GetTransactionSnapshot());
+		PushActiveSnapshot(snapshot);
+
+		/*
+		 * Update progress for the index to build, with the correct parent
+		 * table involved.
+		 */
+		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+									  newidx->tableId);
+		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
+		progress_vals[1] = PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN;
+		progress_vals[2] = newidx->indexId;
+		progress_vals[3] = newidx->amId;
+		pgstat_progress_update_multi_param(4, progress_index, progress_vals);
+
+		validate_index(newidx->tableId, newidx->indexId, snapshot);
+
+		/*
+		 * We can now do away with our active snapshot, we still need to save
+		 * the xmin limit to wait for older snapshots.
+		 */
+		limitXmin = snapshot->xmin;
+
+		PopActiveSnapshot();
+		UnregisterSnapshot(snapshot);
+
+		/*
+		 * To ensure no deadlocks, we must commit and start yet another
+		 * transaction, and do our wait before any snapshot has been taken in
+		 * it.
+		 */
+		CommitTransactionCommand();
+		StartTransactionCommand();
+
+		/*
+		 * The index is now valid in the sense that it contains all currently
+		 * interesting tuples.  But since it might not contain tuples deleted
+		 * just before the reference snap was taken, we have to wait out any
+		 * transactions that might have older snapshots.
+		 *
+		 * Because we don't take a snapshot or Xid in this transaction,
+		 * there's no need to set the PROC_IN_SAFE_IC flag here.
+		 */
+		pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
+									 PROGRESS_CREATEIDX_PHASE_WAIT_3);
+		WaitForOlderSnapshots(limitXmin, true);
+
+		CommitTransactionCommand();
+	}
+
+	/*
+	 * Phase 4 of REINDEX CONCURRENTLY
+	 *
+	 * Now that the new indexes have been validated, swap each new index with
+	 * its corresponding old index.
+	 *
+	 * We mark the new indexes as valid and the old indexes as not valid at
+	 * the same time to make sure we only get constraint violations from the
+	 * indexes with the correct names.
+	 */
+
+	StartTransactionCommand();
+
+	/*
+	 * Because this transaction only does catalog manipulations and doesn't do
+	 * any index operations, we can set the PROC_IN_SAFE_IC flag here
+	 * unconditionally.
+	 */
+	set_indexsafe_procflags();
+
+	forboth(lc, indexIds, lc2, newIndexIds)
+	{
+		ReindexIndexInfo *oldidx = lfirst(lc);
+		ReindexIndexInfo *newidx = lfirst(lc2);
+		char	   *oldName;
+
+		/*
+		 * Check for user-requested abort.  This is inside a transaction so as
+		 * xact.c does not issue a useless WARNING, and ensures that
+		 * session-level locks are cleaned up on abort.
+		 */
+		CHECK_FOR_INTERRUPTS();
+
+		/* Choose a relation name for old index */
+		oldName = ChooseRelationName(get_rel_name(oldidx->indexId),
+									 NULL,
+									 "ccold",
+									 get_rel_namespace(oldidx->tableId),
+									 false);
+
+		/*
+		 * Swap old index with the new one.  This also marks the new one as
+		 * valid and the old one as not valid.
+		 */
+		index_concurrently_swap(newidx->indexId, oldidx->indexId, oldName);
+
+		/*
+		 * Invalidate the relcache for the table, so that after this commit
+		 * all sessions will refresh any cached plans that might reference the
+		 * index.
+		 */
+		CacheInvalidateRelcacheByRelid(oldidx->tableId);
+
+		/*
+		 * CCI here so that subsequent iterations see the oldName in the
+		 * catalog and can choose a nonconflicting name for their oldName.
+		 * Otherwise, this could lead to conflicts if a table has two indexes
+		 * whose names are equal for the first NAMEDATALEN-minus-a-few
+		 * characters.
+		 */
+		CommandCounterIncrement();
+	}
+
+	/* Commit this transaction and make index swaps visible */
+	CommitTransactionCommand();
+	StartTransactionCommand();
+
+	/*
+	 * While we could set PROC_IN_SAFE_IC if all indexes qualified, there's no
+	 * real need for that, because we only acquire an Xid after the wait is
+	 * done, and that lasts for a very short period.
+	 */
+
+	/*
+	 * Phase 5 of REINDEX CONCURRENTLY
+	 *
+	 * Mark the old indexes as dead.  First we must wait until no running
+	 * transaction could be using the index for a query.  See also
+	 * index_drop() for more details.
+	 */
+
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
+								 PROGRESS_CREATEIDX_PHASE_WAIT_4);
+	WaitForLockersMultiple(lockTags, AccessExclusiveLock, true);
+
+	foreach(lc, indexIds)
+	{
+		ReindexIndexInfo *oldidx = lfirst(lc);
+
+		/*
+		 * Check for user-requested abort.  This is inside a transaction so as
+		 * xact.c does not issue a useless WARNING, and ensures that
+		 * session-level locks are cleaned up on abort.
+		 */
+		CHECK_FOR_INTERRUPTS();
+
+		index_concurrently_set_dead(oldidx->tableId, oldidx->indexId);
+	}
+
+	/* Commit this transaction to make the updates visible. */
+	CommitTransactionCommand();
+	StartTransactionCommand();
+
+	/*
+	 * While we could set PROC_IN_SAFE_IC if all indexes qualified, there's no
+	 * real need for that, because we only acquire an Xid after the wait is
+	 * done, and that lasts for a very short period.
+	 */
+
+	/*
+	 * Phase 6 of REINDEX CONCURRENTLY
+	 *
+	 * Drop the old indexes.
+	 */
+
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
+								 PROGRESS_CREATEIDX_PHASE_WAIT_5);
+	WaitForLockersMultiple(lockTags, AccessExclusiveLock, true);
+
+	PushActiveSnapshot(GetTransactionSnapshot());
+
+	{
+		ObjectAddresses *objects = new_object_addresses();
+
+		foreach(lc, indexIds)
+		{
+			ReindexIndexInfo *idx = lfirst(lc);
+			ObjectAddress object;
+
+			object.classId = RelationRelationId;
+			object.objectId = idx->indexId;
+			object.objectSubId = 0;
+
+			add_exact_object_address(&object, objects);
+		}
+
+		/*
+		 * Use PERFORM_DELETION_CONCURRENT_LOCK so that index_drop() uses the
+		 * right lock level.
+		 */
+		performMultipleDeletions(objects, DROP_RESTRICT,
+								 PERFORM_DELETION_CONCURRENT_LOCK | PERFORM_DELETION_INTERNAL);
+	}
+
+	PopActiveSnapshot();
+	CommitTransactionCommand();
+
+	/*
+	 * Finally, release the session-level lock on the table.
+	 */
+	foreach(lc, relationLocks)
+	{
+		LockRelId  *lockrelid = (LockRelId *) lfirst(lc);
+
+		UnlockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
+	}
+
+	/* Start a new transaction to finish process properly */
+	StartTransactionCommand();
+
+	/* Log what we did */
+	if ((params->options & REINDEXOPT_VERBOSE) != 0)
+	{
+		if (relkind == RELKIND_INDEX)
+			ereport(INFO,
+					(errmsg("index \"%s.%s\" was reindexed",
+							relationNamespace, relationName),
+					 errdetail("%s.",
+							   pg_rusage_show(&ru0))));
+		else
+		{
+			foreach(lc, newIndexIds)
+			{
+				ReindexIndexInfo *idx = lfirst(lc);
+				Oid			indOid = idx->indexId;
+
+				ereport(INFO,
+						(errmsg("index \"%s.%s\" was reindexed",
+								get_namespace_name(get_rel_namespace(indOid)),
+								get_rel_name(indOid))));
+				/* Don't show rusage here, since it's not per index. */
+			}
+
+			ereport(INFO,
+					(errmsg("table \"%s.%s\" was reindexed",
+							relationNamespace, relationName),
+					 errdetail("%s.",
+							   pg_rusage_show(&ru0))));
+		}
+	}
+
+	MemoryContextDelete(private_context);
+
+	pgstat_progress_end_command();
+
+	return true;
+}
+
+/*
+ * Insert or delete an appropriate pg_inherits tuple to make the given index
+ * be a partition of the indicated parent index.
+ *
+ * This also corrects the pg_depend information for the affected index.
+ */
+void
+IndexSetParentIndex(Relation partitionIdx, Oid parentOid)
+{
+	Relation	pg_inherits;
+	ScanKeyData key[2];
+	SysScanDesc scan;
+	Oid			partRelid = RelationGetRelid(partitionIdx);
+	HeapTuple	tuple;
+	bool		fix_dependencies;
+
+	/* Make sure this is an index */
+	Assert(partitionIdx->rd_rel->relkind == RELKIND_INDEX ||
+		   partitionIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX);
+
+	/*
+	 * Scan pg_inherits for rows linking our index to some parent.
+	 */
+	pg_inherits = relation_open(InheritsRelationId, RowExclusiveLock);
+	ScanKeyInit(&key[0],
+				Anum_pg_inherits_inhrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(partRelid));
+	ScanKeyInit(&key[1],
+				Anum_pg_inherits_inhseqno,
+				BTEqualStrategyNumber, F_INT4EQ,
+				Int32GetDatum(1));
+	scan = systable_beginscan(pg_inherits, InheritsRelidSeqnoIndexId, true,
+							  NULL, 2, key);
+	tuple = systable_getnext(scan);
+
+	if (!HeapTupleIsValid(tuple))
+	{
+		if (parentOid == InvalidOid)
+		{
+			/*
+			 * No pg_inherits row, and no parent wanted: nothing to do in this
+			 * case.
+			 */
+			fix_dependencies = false;
+		}
+		else
+		{
+			StoreSingleInheritance(partRelid, parentOid, 1);
+			fix_dependencies = true;
+		}
+	}
+	else
+	{
+		Form_pg_inherits inhForm = (Form_pg_inherits) GETSTRUCT(tuple);
+
+		if (parentOid == InvalidOid)
+		{
+			/*
+			 * There exists a pg_inherits row, which we want to clear; do so.
+			 */
+			CatalogTupleDelete(pg_inherits, &tuple->t_self);
+			fix_dependencies = true;
+		}
+		else
+		{
+			/*
+			 * A pg_inherits row exists.  If it's the same we want, then we're
+			 * good; if it differs, that amounts to a corrupt catalog and
+			 * should not happen.
+			 */
+			if (inhForm->inhparent != parentOid)
+			{
+				/* unexpected: we should not get called in this case */
+				elog(ERROR, "bogus pg_inherit row: inhrelid %u inhparent %u",
+					 inhForm->inhrelid, inhForm->inhparent);
+			}
+
+			/* already in the right state */
+			fix_dependencies = false;
+		}
+	}
+
+	/* done with pg_inherits */
+	systable_endscan(scan);
+	relation_close(pg_inherits, RowExclusiveLock);
+
+	/* set relhassubclass if an index partition has been added to the parent */
+	if (OidIsValid(parentOid))
+		SetRelationHasSubclass(parentOid, true);
+
+	/* set relispartition correctly on the partition */
+	update_relispartition(partRelid, OidIsValid(parentOid));
+
+	if (fix_dependencies)
+	{
+		/*
+		 * Insert/delete pg_depend rows.  If setting a parent, add PARTITION
+		 * dependencies on the parent index and the table; if removing a
+		 * parent, delete PARTITION dependencies.
+		 */
+		if (OidIsValid(parentOid))
+		{
+			ObjectAddress partIdx;
+			ObjectAddress parentIdx;
+			ObjectAddress partitionTbl;
+
+			ObjectAddressSet(partIdx, RelationRelationId, partRelid);
+			ObjectAddressSet(parentIdx, RelationRelationId, parentOid);
+			ObjectAddressSet(partitionTbl, RelationRelationId,
+							 partitionIdx->rd_index->indrelid);
+			recordDependencyOn(&partIdx, &parentIdx,
+							   DEPENDENCY_PARTITION_PRI);
+			recordDependencyOn(&partIdx, &partitionTbl,
+							   DEPENDENCY_PARTITION_SEC);
+		}
+		else
+		{
+			deleteDependencyRecordsForClass(RelationRelationId, partRelid,
+											RelationRelationId,
+											DEPENDENCY_PARTITION_PRI);
+			deleteDependencyRecordsForClass(RelationRelationId, partRelid,
+											RelationRelationId,
+											DEPENDENCY_PARTITION_SEC);
+		}
+
+		/* make our updates visible */
+		CommandCounterIncrement();
+	}
+}
+
+/*
+ * Subroutine of IndexSetParentIndex to update the relispartition flag of the
+ * given index to the given value.
+ */
+static void
+update_relispartition(Oid relationId, bool newval)
+{
+	HeapTuple	tup;
+	Relation	classRel;
+
+	classRel = table_open(RelationRelationId, RowExclusiveLock);
+	tup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relationId));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for relation %u", relationId);
+	Assert(((Form_pg_class) GETSTRUCT(tup))->relispartition != newval);
+	((Form_pg_class) GETSTRUCT(tup))->relispartition = newval;
+	CatalogTupleUpdate(classRel, &tup->t_self, tup);
+	heap_freetuple(tup);
+	table_close(classRel, RowExclusiveLock);
+}
+
+/*
+ * Set the PROC_IN_SAFE_IC flag in MyProc->statusFlags.
+ *
+ * When doing concurrent index builds, we can set this flag
+ * to tell other processes concurrently running CREATE
+ * INDEX CONCURRENTLY or REINDEX CONCURRENTLY to ignore us when
+ * doing their waits for concurrent snapshots.  On one hand it
+ * avoids pointlessly waiting for a process that's not interesting
+ * anyway; but more importantly it avoids deadlocks in some cases.
+ *
+ * This can be done safely only for indexes that don't execute any
+ * expressions that could access other tables, so index must not be
+ * expressional nor partial.  Caller is responsible for only calling
+ * this routine when that assumption holds true.
+ *
+ * (The flag is reset automatically at transaction end, so it must be
+ * set for each transaction.)
+ */
+static inline void
+set_indexsafe_procflags(void)
+{
+	/*
+	 * This should only be called before installing xid or xmin in MyProc;
+	 * otherwise, concurrent processes could see an Xmin that moves backwards.
+	 */
+	Assert(MyProc->xid == InvalidTransactionId &&
+		   MyProc->xmin == InvalidTransactionId);
+
+	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	MyProc->statusFlags |= PROC_IN_SAFE_IC;
+	ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
+	LWLockRelease(ProcArrayLock);
+}
diff --git a/src/backend/commands/lockcmds.c b/src/backend/commands/lockcmds.c
new file mode 100644
index 0000000..b97b8b0
--- /dev/null
+++ b/src/backend/commands/lockcmds.c
@@ -0,0 +1,306 @@
+/*-------------------------------------------------------------------------
+ *
+ * lockcmds.c
+ *	  LOCK command support code
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/lockcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_inherits.h"
+#include "commands/lockcmds.h"
+#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/parse_clause.h"
+#include "rewrite/rewriteHandler.h"
+#include "storage/lmgr.h"
+#include "utils/acl.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+static void LockTableRecurse(Oid reloid, LOCKMODE lockmode, bool nowait);
+static AclResult LockTableAclCheck(Oid relid, LOCKMODE lockmode, Oid userid);
+static void RangeVarCallbackForLockTable(const RangeVar *rv, Oid relid,
+										 Oid oldrelid, void *arg);
+static void LockViewRecurse(Oid reloid, LOCKMODE lockmode, bool nowait,
+							List *ancestor_views);
+
+/*
+ * LOCK TABLE
+ */
+void
+LockTableCommand(LockStmt *lockstmt)
+{
+	ListCell   *p;
+
+	/*
+	 * Iterate over the list and process the named relations one at a time
+	 */
+	foreach(p, lockstmt->relations)
+	{
+		RangeVar   *rv = (RangeVar *) lfirst(p);
+		bool		recurse = rv->inh;
+		Oid			reloid;
+
+		reloid = RangeVarGetRelidExtended(rv, lockstmt->mode,
+										  lockstmt->nowait ? RVR_NOWAIT : 0,
+										  RangeVarCallbackForLockTable,
+										  (void *) &lockstmt->mode);
+
+		if (get_rel_relkind(reloid) == RELKIND_VIEW)
+			LockViewRecurse(reloid, lockstmt->mode, lockstmt->nowait, NIL);
+		else if (recurse)
+			LockTableRecurse(reloid, lockstmt->mode, lockstmt->nowait);
+	}
+}
+
+/*
+ * Before acquiring a table lock on the named table, check whether we have
+ * permission to do so.
+ */
+static void
+RangeVarCallbackForLockTable(const RangeVar *rv, Oid relid, Oid oldrelid,
+							 void *arg)
+{
+	LOCKMODE	lockmode = *(LOCKMODE *) arg;
+	char		relkind;
+	char		relpersistence;
+	AclResult	aclresult;
+
+	if (!OidIsValid(relid))
+		return;					/* doesn't exist, so no permissions check */
+	relkind = get_rel_relkind(relid);
+	if (!relkind)
+		return;					/* woops, concurrently dropped; no permissions
+								 * check */
+
+	/* Currently, we only allow plain tables or views to be locked */
+	if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE &&
+		relkind != RELKIND_VIEW)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot lock relation \"%s\"",
+						rv->relname),
+				 errdetail_relkind_not_supported(relkind)));
+
+	/*
+	 * Make note if a temporary relation has been accessed in this
+	 * transaction.
+	 */
+	relpersistence = get_rel_persistence(relid);
+	if (relpersistence == RELPERSISTENCE_TEMP)
+		MyXactFlags |= XACT_FLAGS_ACCESSEDTEMPNAMESPACE;
+
+	/* Check permissions. */
+	aclresult = LockTableAclCheck(relid, lockmode, GetUserId());
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, get_relkind_objtype(get_rel_relkind(relid)), rv->relname);
+}
+
+/*
+ * Apply LOCK TABLE recursively over an inheritance tree
+ *
+ * This doesn't check permission to perform LOCK TABLE on the child tables,
+ * because getting here means that the user has permission to lock the
+ * parent which is enough.
+ */
+static void
+LockTableRecurse(Oid reloid, LOCKMODE lockmode, bool nowait)
+{
+	List	   *children;
+	ListCell   *lc;
+
+	children = find_all_inheritors(reloid, NoLock, NULL);
+
+	foreach(lc, children)
+	{
+		Oid			childreloid = lfirst_oid(lc);
+
+		/* Parent already locked. */
+		if (childreloid == reloid)
+			continue;
+
+		if (!nowait)
+			LockRelationOid(childreloid, lockmode);
+		else if (!ConditionalLockRelationOid(childreloid, lockmode))
+		{
+			/* try to throw error by name; relation could be deleted... */
+			char	   *relname = get_rel_name(childreloid);
+
+			if (!relname)
+				continue;		/* child concurrently dropped, just skip it */
+			ereport(ERROR,
+					(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+					 errmsg("could not obtain lock on relation \"%s\"",
+							relname)));
+		}
+
+		/*
+		 * Even if we got the lock, child might have been concurrently
+		 * dropped. If so, we can skip it.
+		 */
+		if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(childreloid)))
+		{
+			/* Release useless lock */
+			UnlockRelationOid(childreloid, lockmode);
+			continue;
+		}
+	}
+}
+
+/*
+ * Apply LOCK TABLE recursively over a view
+ *
+ * All tables and views appearing in the view definition query are locked
+ * recursively with the same lock mode.
+ */
+
+typedef struct
+{
+	LOCKMODE	lockmode;		/* lock mode to use */
+	bool		nowait;			/* no wait mode */
+	Oid			check_as_user;	/* user for checking the privilege */
+	Oid			viewoid;		/* OID of the view to be locked */
+	List	   *ancestor_views; /* OIDs of ancestor views */
+} LockViewRecurse_context;
+
+static bool
+LockViewRecurse_walker(Node *node, LockViewRecurse_context *context)
+{
+	if (node == NULL)
+		return false;
+
+	if (IsA(node, Query))
+	{
+		Query	   *query = (Query *) node;
+		ListCell   *rtable;
+
+		foreach(rtable, query->rtable)
+		{
+			RangeTblEntry *rte = lfirst(rtable);
+			AclResult	aclresult;
+
+			Oid			relid = rte->relid;
+			char		relkind = rte->relkind;
+			char	   *relname = get_rel_name(relid);
+
+			/*
+			 * The OLD and NEW placeholder entries in the view's rtable are
+			 * skipped.
+			 */
+			if (relid == context->viewoid &&
+				(strcmp(rte->eref->aliasname, "old") == 0 ||
+				 strcmp(rte->eref->aliasname, "new") == 0))
+				continue;
+
+			/* Currently, we only allow plain tables or views to be locked. */
+			if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE &&
+				relkind != RELKIND_VIEW)
+				continue;
+
+			/*
+			 * We might be dealing with a self-referential view.  If so, we
+			 * can just stop recursing, since we already locked it.
+			 */
+			if (list_member_oid(context->ancestor_views, relid))
+				continue;
+
+			/*
+			 * Check permissions as the specified user.  This will either be
+			 * the view owner or the current user.
+			 */
+			aclresult = LockTableAclCheck(relid, context->lockmode,
+										  context->check_as_user);
+			if (aclresult != ACLCHECK_OK)
+				aclcheck_error(aclresult, get_relkind_objtype(relkind), relname);
+
+			/* We have enough rights to lock the relation; do so. */
+			if (!context->nowait)
+				LockRelationOid(relid, context->lockmode);
+			else if (!ConditionalLockRelationOid(relid, context->lockmode))
+				ereport(ERROR,
+						(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+						 errmsg("could not obtain lock on relation \"%s\"",
+								relname)));
+
+			if (relkind == RELKIND_VIEW)
+				LockViewRecurse(relid, context->lockmode, context->nowait,
+								context->ancestor_views);
+			else if (rte->inh)
+				LockTableRecurse(relid, context->lockmode, context->nowait);
+		}
+
+		return query_tree_walker(query,
+								 LockViewRecurse_walker,
+								 context,
+								 QTW_IGNORE_JOINALIASES);
+	}
+
+	return expression_tree_walker(node,
+								  LockViewRecurse_walker,
+								  context);
+}
+
+static void
+LockViewRecurse(Oid reloid, LOCKMODE lockmode, bool nowait,
+				List *ancestor_views)
+{
+	LockViewRecurse_context context;
+	Relation	view;
+	Query	   *viewquery;
+
+	/* caller has already locked the view */
+	view = table_open(reloid, NoLock);
+	viewquery = get_view_query(view);
+
+	/*
+	 * If the view has the security_invoker property set, check permissions as
+	 * the current user.  Otherwise, check permissions as the view owner.
+	 */
+	context.lockmode = lockmode;
+	context.nowait = nowait;
+	if (RelationHasSecurityInvoker(view))
+		context.check_as_user = GetUserId();
+	else
+		context.check_as_user = view->rd_rel->relowner;
+	context.viewoid = reloid;
+	context.ancestor_views = lappend_oid(ancestor_views, reloid);
+
+	LockViewRecurse_walker((Node *) viewquery, &context);
+
+	context.ancestor_views = list_delete_last(context.ancestor_views);
+
+	table_close(view, NoLock);
+}
+
+/*
+ * Check whether the current user is permitted to lock this relation.
+ */
+static AclResult
+LockTableAclCheck(Oid reloid, LOCKMODE lockmode, Oid userid)
+{
+	AclResult	aclresult;
+	AclMode		aclmask;
+
+	/* Verify adequate privilege */
+	if (lockmode == AccessShareLock)
+		aclmask = ACL_SELECT;
+	else if (lockmode == RowExclusiveLock)
+		aclmask = ACL_INSERT | ACL_UPDATE | ACL_DELETE | ACL_TRUNCATE;
+	else
+		aclmask = ACL_UPDATE | ACL_DELETE | ACL_TRUNCATE;
+
+	aclresult = pg_class_aclcheck(reloid, userid, aclmask);
+
+	return aclresult;
+}
diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c
new file mode 100644
index 0000000..d1ee106
--- /dev/null
+++ b/src/backend/commands/matview.c
@@ -0,0 +1,936 @@
+/*-------------------------------------------------------------------------
+ *
+ * matview.c
+ *	  materialized view support
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/matview.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/multixact.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "catalog/catalog.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_operator.h"
+#include "commands/cluster.h"
+#include "commands/matview.h"
+#include "commands/tablecmds.h"
+#include "commands/tablespace.h"
+#include "executor/executor.h"
+#include "executor/spi.h"
+#include "miscadmin.h"
+#include "parser/parse_relation.h"
+#include "pgstat.h"
+#include "rewrite/rewriteHandler.h"
+#include "storage/lmgr.h"
+#include "storage/smgr.h"
+#include "tcop/tcopprot.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+
+
+typedef struct
+{
+	DestReceiver pub;			/* publicly-known function pointers */
+	Oid			transientoid;	/* OID of new heap into which to store */
+	/* These fields are filled by transientrel_startup: */
+	Relation	transientrel;	/* relation to write to */
+	CommandId	output_cid;		/* cmin to insert in output tuples */
+	int			ti_options;		/* table_tuple_insert performance options */
+	BulkInsertState bistate;	/* bulk insert state */
+} DR_transientrel;
+
+static int	matview_maintenance_depth = 0;
+
+static void transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
+static bool transientrel_receive(TupleTableSlot *slot, DestReceiver *self);
+static void transientrel_shutdown(DestReceiver *self);
+static void transientrel_destroy(DestReceiver *self);
+static uint64 refresh_matview_datafill(DestReceiver *dest, Query *query,
+									   const char *queryString);
+static char *make_temptable_name_n(char *tempname, int n);
+static void refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
+								   int save_sec_context);
+static void refresh_by_heap_swap(Oid matviewOid, Oid OIDNewHeap, char relpersistence);
+static bool is_usable_unique_index(Relation indexRel);
+static void OpenMatViewIncrementalMaintenance(void);
+static void CloseMatViewIncrementalMaintenance(void);
+
+/*
+ * SetMatViewPopulatedState
+ *		Mark a materialized view as populated, or not.
+ *
+ * NOTE: caller must be holding an appropriate lock on the relation.
+ */
+void
+SetMatViewPopulatedState(Relation relation, bool newstate)
+{
+	Relation	pgrel;
+	HeapTuple	tuple;
+
+	Assert(relation->rd_rel->relkind == RELKIND_MATVIEW);
+
+	/*
+	 * Update relation's pg_class entry.  Crucial side-effect: other backends
+	 * (and this one too!) are sent SI message to make them rebuild relcache
+	 * entries.
+	 */
+	pgrel = table_open(RelationRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheCopy1(RELOID,
+								ObjectIdGetDatum(RelationGetRelid(relation)));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u",
+			 RelationGetRelid(relation));
+
+	((Form_pg_class) GETSTRUCT(tuple))->relispopulated = newstate;
+
+	CatalogTupleUpdate(pgrel, &tuple->t_self, tuple);
+
+	heap_freetuple(tuple);
+	table_close(pgrel, RowExclusiveLock);
+
+	/*
+	 * Advance command counter to make the updated pg_class row locally
+	 * visible.
+	 */
+	CommandCounterIncrement();
+}
+
+/*
+ * ExecRefreshMatView -- execute a REFRESH MATERIALIZED VIEW command
+ *
+ * This refreshes the materialized view by creating a new table and swapping
+ * the relfilenodes of the new table and the old materialized view, so the OID
+ * of the original materialized view is preserved. Thus we do not lose GRANT
+ * nor references to this materialized view.
+ *
+ * If WITH NO DATA was specified, this is effectively like a TRUNCATE;
+ * otherwise it is like a TRUNCATE followed by an INSERT using the SELECT
+ * statement associated with the materialized view.  The statement node's
+ * skipData field shows whether the clause was used.
+ *
+ * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
+ * the new heap, it's better to create the indexes afterwards than to fill them
+ * incrementally while we load.
+ *
+ * The matview's "populated" state is changed based on whether the contents
+ * reflect the result set of the materialized view's query.
+ */
+ObjectAddress
+ExecRefreshMatView(RefreshMatViewStmt *stmt, const char *queryString,
+				   ParamListInfo params, QueryCompletion *qc)
+{
+	Oid			matviewOid;
+	Relation	matviewRel;
+	RewriteRule *rule;
+	List	   *actions;
+	Query	   *dataQuery;
+	Oid			tableSpace;
+	Oid			relowner;
+	Oid			OIDNewHeap;
+	DestReceiver *dest;
+	uint64		processed = 0;
+	bool		concurrent;
+	LOCKMODE	lockmode;
+	char		relpersistence;
+	Oid			save_userid;
+	int			save_sec_context;
+	int			save_nestlevel;
+	ObjectAddress address;
+
+	/* Determine strength of lock needed. */
+	concurrent = stmt->concurrent;
+	lockmode = concurrent ? ExclusiveLock : AccessExclusiveLock;
+
+	/*
+	 * Get a lock until end of transaction.
+	 */
+	matviewOid = RangeVarGetRelidExtended(stmt->relation,
+										  lockmode, 0,
+										  RangeVarCallbackOwnsTable, NULL);
+	matviewRel = table_open(matviewOid, NoLock);
+	relowner = matviewRel->rd_rel->relowner;
+
+	/*
+	 * Switch to the owner's userid, so that any functions are run as that
+	 * user.  Also lock down security-restricted operations and arrange to
+	 * make GUC variable changes local to this command.
+	 */
+	GetUserIdAndSecContext(&save_userid, &save_sec_context);
+	SetUserIdAndSecContext(relowner,
+						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
+	save_nestlevel = NewGUCNestLevel();
+
+	/* Make sure it is a materialized view. */
+	if (matviewRel->rd_rel->relkind != RELKIND_MATVIEW)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("\"%s\" is not a materialized view",
+						RelationGetRelationName(matviewRel))));
+
+	/* Check that CONCURRENTLY is not specified if not populated. */
+	if (concurrent && !RelationIsPopulated(matviewRel))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("CONCURRENTLY cannot be used when the materialized view is not populated")));
+
+	/* Check that conflicting options have not been specified. */
+	if (concurrent && stmt->skipData)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("%s and %s options cannot be used together",
+						"CONCURRENTLY", "WITH NO DATA")));
+
+	/*
+	 * Check that everything is correct for a refresh. Problems at this point
+	 * are internal errors, so elog is sufficient.
+	 */
+	if (matviewRel->rd_rel->relhasrules == false ||
+		matviewRel->rd_rules->numLocks < 1)
+		elog(ERROR,
+			 "materialized view \"%s\" is missing rewrite information",
+			 RelationGetRelationName(matviewRel));
+
+	if (matviewRel->rd_rules->numLocks > 1)
+		elog(ERROR,
+			 "materialized view \"%s\" has too many rules",
+			 RelationGetRelationName(matviewRel));
+
+	rule = matviewRel->rd_rules->rules[0];
+	if (rule->event != CMD_SELECT || !(rule->isInstead))
+		elog(ERROR,
+			 "the rule for materialized view \"%s\" is not a SELECT INSTEAD OF rule",
+			 RelationGetRelationName(matviewRel));
+
+	actions = rule->actions;
+	if (list_length(actions) != 1)
+		elog(ERROR,
+			 "the rule for materialized view \"%s\" is not a single action",
+			 RelationGetRelationName(matviewRel));
+
+	/*
+	 * Check that there is a unique index with no WHERE clause on one or more
+	 * columns of the materialized view if CONCURRENTLY is specified.
+	 */
+	if (concurrent)
+	{
+		List	   *indexoidlist = RelationGetIndexList(matviewRel);
+		ListCell   *indexoidscan;
+		bool		hasUniqueIndex = false;
+
+		foreach(indexoidscan, indexoidlist)
+		{
+			Oid			indexoid = lfirst_oid(indexoidscan);
+			Relation	indexRel;
+
+			indexRel = index_open(indexoid, AccessShareLock);
+			hasUniqueIndex = is_usable_unique_index(indexRel);
+			index_close(indexRel, AccessShareLock);
+			if (hasUniqueIndex)
+				break;
+		}
+
+		list_free(indexoidlist);
+
+		if (!hasUniqueIndex)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("cannot refresh materialized view \"%s\" concurrently",
+							quote_qualified_identifier(get_namespace_name(RelationGetNamespace(matviewRel)),
+													   RelationGetRelationName(matviewRel))),
+					 errhint("Create a unique index with no WHERE clause on one or more columns of the materialized view.")));
+	}
+
+	/*
+	 * The stored query was rewritten at the time of the MV definition, but
+	 * has not been scribbled on by the planner.
+	 */
+	dataQuery = linitial_node(Query, actions);
+
+	/*
+	 * Check for active uses of the relation in the current transaction, such
+	 * as open scans.
+	 *
+	 * NB: We count on this to protect us against problems with refreshing the
+	 * data using TABLE_INSERT_FROZEN.
+	 */
+	CheckTableNotInUse(matviewRel, "REFRESH MATERIALIZED VIEW");
+
+	/*
+	 * Tentatively mark the matview as populated or not (this will roll back
+	 * if we fail later).
+	 */
+	SetMatViewPopulatedState(matviewRel, !stmt->skipData);
+
+	/* Concurrent refresh builds new data in temp tablespace, and does diff. */
+	if (concurrent)
+	{
+		tableSpace = GetDefaultTablespace(RELPERSISTENCE_TEMP, false);
+		relpersistence = RELPERSISTENCE_TEMP;
+	}
+	else
+	{
+		tableSpace = matviewRel->rd_rel->reltablespace;
+		relpersistence = matviewRel->rd_rel->relpersistence;
+	}
+
+	/*
+	 * Create the transient table that will receive the regenerated data. Lock
+	 * it against access by any other process until commit (by which time it
+	 * will be gone).
+	 */
+	OIDNewHeap = make_new_heap(matviewOid, tableSpace,
+							   matviewRel->rd_rel->relam,
+							   relpersistence, ExclusiveLock);
+	LockRelationOid(OIDNewHeap, AccessExclusiveLock);
+	dest = CreateTransientRelDestReceiver(OIDNewHeap);
+
+	/* Generate the data, if wanted. */
+	if (!stmt->skipData)
+		processed = refresh_matview_datafill(dest, dataQuery, queryString);
+
+	/* Make the matview match the newly generated data. */
+	if (concurrent)
+	{
+		int			old_depth = matview_maintenance_depth;
+
+		PG_TRY();
+		{
+			refresh_by_match_merge(matviewOid, OIDNewHeap, relowner,
+								   save_sec_context);
+		}
+		PG_CATCH();
+		{
+			matview_maintenance_depth = old_depth;
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
+		Assert(matview_maintenance_depth == old_depth);
+	}
+	else
+	{
+		refresh_by_heap_swap(matviewOid, OIDNewHeap, relpersistence);
+
+		/*
+		 * Inform cumulative stats system about our activity: basically, we
+		 * truncated the matview and inserted some new data.  (The concurrent
+		 * code path above doesn't need to worry about this because the
+		 * inserts and deletes it issues get counted by lower-level code.)
+		 */
+		pgstat_count_truncate(matviewRel);
+		if (!stmt->skipData)
+			pgstat_count_heap_insert(matviewRel, processed);
+	}
+
+	table_close(matviewRel, NoLock);
+
+	/* Roll back any GUC changes */
+	AtEOXact_GUC(false, save_nestlevel);
+
+	/* Restore userid and security context */
+	SetUserIdAndSecContext(save_userid, save_sec_context);
+
+	ObjectAddressSet(address, RelationRelationId, matviewOid);
+
+	/*
+	 * Save the rowcount so that pg_stat_statements can track the total number
+	 * of rows processed by REFRESH MATERIALIZED VIEW command. Note that we
+	 * still don't display the rowcount in the command completion tag output,
+	 * i.e., the display_rowcount flag of CMDTAG_REFRESH_MATERIALIZED_VIEW
+	 * command tag is left false in cmdtaglist.h. Otherwise, the change of
+	 * completion tag output might break applications using it.
+	 */
+	if (qc)
+		SetQueryCompletion(qc, CMDTAG_REFRESH_MATERIALIZED_VIEW, processed);
+
+	return address;
+}
+
+/*
+ * refresh_matview_datafill
+ *
+ * Execute the given query, sending result rows to "dest" (which will
+ * insert them into the target matview).
+ *
+ * Returns number of rows inserted.
+ */
+static uint64
+refresh_matview_datafill(DestReceiver *dest, Query *query,
+						 const char *queryString)
+{
+	List	   *rewritten;
+	PlannedStmt *plan;
+	QueryDesc  *queryDesc;
+	Query	   *copied_query;
+	uint64		processed;
+
+	/* Lock and rewrite, using a copy to preserve the original query. */
+	copied_query = copyObject(query);
+	AcquireRewriteLocks(copied_query, true, false);
+	rewritten = QueryRewrite(copied_query);
+
+	/* SELECT should never rewrite to more or less than one SELECT query */
+	if (list_length(rewritten) != 1)
+		elog(ERROR, "unexpected rewrite result for REFRESH MATERIALIZED VIEW");
+	query = (Query *) linitial(rewritten);
+
+	/* Check for user-requested abort. */
+	CHECK_FOR_INTERRUPTS();
+
+	/* Plan the query which will generate data for the refresh. */
+	plan = pg_plan_query(query, queryString, CURSOR_OPT_PARALLEL_OK, NULL);
+
+	/*
+	 * Use a snapshot with an updated command ID to ensure this query sees
+	 * results of any previously executed queries.  (This could only matter if
+	 * the planner executed an allegedly-stable function that changed the
+	 * database contents, but let's do it anyway to be safe.)
+	 */
+	PushCopiedSnapshot(GetActiveSnapshot());
+	UpdateActiveSnapshotCommandId();
+
+	/* Create a QueryDesc, redirecting output to our tuple receiver */
+	queryDesc = CreateQueryDesc(plan, queryString,
+								GetActiveSnapshot(), InvalidSnapshot,
+								dest, NULL, NULL, 0);
+
+	/* call ExecutorStart to prepare the plan for execution */
+	ExecutorStart(queryDesc, 0);
+
+	/* run the plan */
+	ExecutorRun(queryDesc, ForwardScanDirection, 0L, true);
+
+	processed = queryDesc->estate->es_processed;
+
+	/* and clean up */
+	ExecutorFinish(queryDesc);
+	ExecutorEnd(queryDesc);
+
+	FreeQueryDesc(queryDesc);
+
+	PopActiveSnapshot();
+
+	return processed;
+}
+
+DestReceiver *
+CreateTransientRelDestReceiver(Oid transientoid)
+{
+	DR_transientrel *self = (DR_transientrel *) palloc0(sizeof(DR_transientrel));
+
+	self->pub.receiveSlot = transientrel_receive;
+	self->pub.rStartup = transientrel_startup;
+	self->pub.rShutdown = transientrel_shutdown;
+	self->pub.rDestroy = transientrel_destroy;
+	self->pub.mydest = DestTransientRel;
+	self->transientoid = transientoid;
+
+	return (DestReceiver *) self;
+}
+
+/*
+ * transientrel_startup --- executor startup
+ */
+static void
+transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
+{
+	DR_transientrel *myState = (DR_transientrel *) self;
+	Relation	transientrel;
+
+	transientrel = table_open(myState->transientoid, NoLock);
+
+	/*
+	 * Fill private fields of myState for use by later routines
+	 */
+	myState->transientrel = transientrel;
+	myState->output_cid = GetCurrentCommandId(true);
+	myState->ti_options = TABLE_INSERT_SKIP_FSM | TABLE_INSERT_FROZEN;
+	myState->bistate = GetBulkInsertState();
+
+	/*
+	 * Valid smgr_targblock implies something already wrote to the relation.
+	 * This may be harmless, but this function hasn't planned for it.
+	 */
+	Assert(RelationGetTargetBlock(transientrel) == InvalidBlockNumber);
+}
+
+/*
+ * transientrel_receive --- receive one tuple
+ */
+static bool
+transientrel_receive(TupleTableSlot *slot, DestReceiver *self)
+{
+	DR_transientrel *myState = (DR_transientrel *) self;
+
+	/*
+	 * Note that the input slot might not be of the type of the target
+	 * relation. That's supported by table_tuple_insert(), but slightly less
+	 * efficient than inserting with the right slot - but the alternative
+	 * would be to copy into a slot of the right type, which would not be
+	 * cheap either. This also doesn't allow accessing per-AM data (say a
+	 * tuple's xmin), but since we don't do that here...
+	 */
+
+	table_tuple_insert(myState->transientrel,
+					   slot,
+					   myState->output_cid,
+					   myState->ti_options,
+					   myState->bistate);
+
+	/* We know this is a newly created relation, so there are no indexes */
+
+	return true;
+}
+
+/*
+ * transientrel_shutdown --- executor end
+ */
+static void
+transientrel_shutdown(DestReceiver *self)
+{
+	DR_transientrel *myState = (DR_transientrel *) self;
+
+	FreeBulkInsertState(myState->bistate);
+
+	table_finish_bulk_insert(myState->transientrel, myState->ti_options);
+
+	/* close transientrel, but keep lock until commit */
+	table_close(myState->transientrel, NoLock);
+	myState->transientrel = NULL;
+}
+
+/*
+ * transientrel_destroy --- release DestReceiver object
+ */
+static void
+transientrel_destroy(DestReceiver *self)
+{
+	pfree(self);
+}
+
+
+/*
+ * Given a qualified temporary table name, append an underscore followed by
+ * the given integer, to make a new table name based on the old one.
+ * The result is a palloc'd string.
+ *
+ * As coded, this would fail to make a valid SQL name if the given name were,
+ * say, "FOO"."BAR".  Currently, the table name portion of the input will
+ * never be double-quoted because it's of the form "pg_temp_NNN", cf
+ * make_new_heap().  But we might have to work harder someday.
+ */
+static char *
+make_temptable_name_n(char *tempname, int n)
+{
+	StringInfoData namebuf;
+
+	initStringInfo(&namebuf);
+	appendStringInfoString(&namebuf, tempname);
+	appendStringInfo(&namebuf, "_%d", n);
+	return namebuf.data;
+}
+
+/*
+ * refresh_by_match_merge
+ *
+ * Refresh a materialized view with transactional semantics, while allowing
+ * concurrent reads.
+ *
+ * This is called after a new version of the data has been created in a
+ * temporary table.  It performs a full outer join against the old version of
+ * the data, producing "diff" results.  This join cannot work if there are any
+ * duplicated rows in either the old or new versions, in the sense that every
+ * column would compare as equal between the two rows.  It does work correctly
+ * in the face of rows which have at least one NULL value, with all non-NULL
+ * columns equal.  The behavior of NULLs on equality tests and on UNIQUE
+ * indexes turns out to be quite convenient here; the tests we need to make
+ * are consistent with default behavior.  If there is at least one UNIQUE
+ * index on the materialized view, we have exactly the guarantee we need.
+ *
+ * The temporary table used to hold the diff results contains just the TID of
+ * the old record (if matched) and the ROW from the new table as a single
+ * column of complex record type (if matched).
+ *
+ * Once we have the diff table, we perform set-based DELETE and INSERT
+ * operations against the materialized view, and discard both temporary
+ * tables.
+ *
+ * Everything from the generation of the new data to applying the differences
+ * takes place under cover of an ExclusiveLock, since it seems as though we
+ * would want to prohibit not only concurrent REFRESH operations, but also
+ * incremental maintenance.  It also doesn't seem reasonable or safe to allow
+ * SELECT FOR UPDATE or SELECT FOR SHARE on rows being updated or deleted by
+ * this command.
+ */
+static void
+refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
+					   int save_sec_context)
+{
+	StringInfoData querybuf;
+	Relation	matviewRel;
+	Relation	tempRel;
+	char	   *matviewname;
+	char	   *tempname;
+	char	   *diffname;
+	TupleDesc	tupdesc;
+	bool		foundUniqueIndex;
+	List	   *indexoidlist;
+	ListCell   *indexoidscan;
+	int16		relnatts;
+	Oid		   *opUsedForQual;
+
+	initStringInfo(&querybuf);
+	matviewRel = table_open(matviewOid, NoLock);
+	matviewname = quote_qualified_identifier(get_namespace_name(RelationGetNamespace(matviewRel)),
+											 RelationGetRelationName(matviewRel));
+	tempRel = table_open(tempOid, NoLock);
+	tempname = quote_qualified_identifier(get_namespace_name(RelationGetNamespace(tempRel)),
+										  RelationGetRelationName(tempRel));
+	diffname = make_temptable_name_n(tempname, 2);
+
+	relnatts = RelationGetNumberOfAttributes(matviewRel);
+
+	/* Open SPI context. */
+	if (SPI_connect() != SPI_OK_CONNECT)
+		elog(ERROR, "SPI_connect failed");
+
+	/* Analyze the temp table with the new contents. */
+	appendStringInfo(&querybuf, "ANALYZE %s", tempname);
+	if (SPI_exec(querybuf.data, 0) != SPI_OK_UTILITY)
+		elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+
+	/*
+	 * We need to ensure that there are not duplicate rows without NULLs in
+	 * the new data set before we can count on the "diff" results.  Check for
+	 * that in a way that allows showing the first duplicated row found.  Even
+	 * after we pass this test, a unique index on the materialized view may
+	 * find a duplicate key problem.
+	 *
+	 * Note: here and below, we use "tablename.*::tablerowtype" as a hack to
+	 * keep ".*" from being expanded into multiple columns in a SELECT list.
+	 * Compare ruleutils.c's get_variable().
+	 */
+	resetStringInfo(&querybuf);
+	appendStringInfo(&querybuf,
+					 "SELECT newdata.*::%s FROM %s newdata "
+					 "WHERE newdata.* IS NOT NULL AND EXISTS "
+					 "(SELECT 1 FROM %s newdata2 WHERE newdata2.* IS NOT NULL "
+					 "AND newdata2.* OPERATOR(pg_catalog.*=) newdata.* "
+					 "AND newdata2.ctid OPERATOR(pg_catalog.<>) "
+					 "newdata.ctid)",
+					 tempname, tempname, tempname);
+	if (SPI_execute(querybuf.data, false, 1) != SPI_OK_SELECT)
+		elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+	if (SPI_processed > 0)
+	{
+		/*
+		 * Note that this ereport() is returning data to the user.  Generally,
+		 * we would want to make sure that the user has been granted access to
+		 * this data.  However, REFRESH MAT VIEW is only able to be run by the
+		 * owner of the mat view (or a superuser) and therefore there is no
+		 * need to check for access to data in the mat view.
+		 */
+		ereport(ERROR,
+				(errcode(ERRCODE_CARDINALITY_VIOLATION),
+				 errmsg("new data for materialized view \"%s\" contains duplicate rows without any null columns",
+						RelationGetRelationName(matviewRel)),
+				 errdetail("Row: %s",
+						   SPI_getvalue(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1))));
+	}
+
+	SetUserIdAndSecContext(relowner,
+						   save_sec_context | SECURITY_LOCAL_USERID_CHANGE);
+
+	/* Start building the query for creating the diff table. */
+	resetStringInfo(&querybuf);
+	appendStringInfo(&querybuf,
+					 "CREATE TEMP TABLE %s AS "
+					 "SELECT mv.ctid AS tid, newdata.*::%s AS newdata "
+					 "FROM %s mv FULL JOIN %s newdata ON (",
+					 diffname, tempname, matviewname, tempname);
+
+	/*
+	 * Get the list of index OIDs for the table from the relcache, and look up
+	 * each one in the pg_index syscache.  We will test for equality on all
+	 * columns present in all unique indexes which only reference columns and
+	 * include all rows.
+	 */
+	tupdesc = matviewRel->rd_att;
+	opUsedForQual = (Oid *) palloc0(sizeof(Oid) * relnatts);
+	foundUniqueIndex = false;
+
+	indexoidlist = RelationGetIndexList(matviewRel);
+
+	foreach(indexoidscan, indexoidlist)
+	{
+		Oid			indexoid = lfirst_oid(indexoidscan);
+		Relation	indexRel;
+
+		indexRel = index_open(indexoid, RowExclusiveLock);
+		if (is_usable_unique_index(indexRel))
+		{
+			Form_pg_index indexStruct = indexRel->rd_index;
+			int			indnkeyatts = indexStruct->indnkeyatts;
+			oidvector  *indclass;
+			Datum		indclassDatum;
+			bool		isnull;
+			int			i;
+
+			/* Must get indclass the hard way. */
+			indclassDatum = SysCacheGetAttr(INDEXRELID,
+											indexRel->rd_indextuple,
+											Anum_pg_index_indclass,
+											&isnull);
+			Assert(!isnull);
+			indclass = (oidvector *) DatumGetPointer(indclassDatum);
+
+			/* Add quals for all columns from this index. */
+			for (i = 0; i < indnkeyatts; i++)
+			{
+				int			attnum = indexStruct->indkey.values[i];
+				Oid			opclass = indclass->values[i];
+				Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1);
+				Oid			attrtype = attr->atttypid;
+				HeapTuple	cla_ht;
+				Form_pg_opclass cla_tup;
+				Oid			opfamily;
+				Oid			opcintype;
+				Oid			op;
+				const char *leftop;
+				const char *rightop;
+
+				/*
+				 * Identify the equality operator associated with this index
+				 * column.  First we need to look up the column's opclass.
+				 */
+				cla_ht = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
+				if (!HeapTupleIsValid(cla_ht))
+					elog(ERROR, "cache lookup failed for opclass %u", opclass);
+				cla_tup = (Form_pg_opclass) GETSTRUCT(cla_ht);
+				Assert(cla_tup->opcmethod == BTREE_AM_OID);
+				opfamily = cla_tup->opcfamily;
+				opcintype = cla_tup->opcintype;
+				ReleaseSysCache(cla_ht);
+
+				op = get_opfamily_member(opfamily, opcintype, opcintype,
+										 BTEqualStrategyNumber);
+				if (!OidIsValid(op))
+					elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+						 BTEqualStrategyNumber, opcintype, opcintype, opfamily);
+
+				/*
+				 * If we find the same column with the same equality semantics
+				 * in more than one index, we only need to emit the equality
+				 * clause once.
+				 *
+				 * Since we only remember the last equality operator, this
+				 * code could be fooled into emitting duplicate clauses given
+				 * multiple indexes with several different opclasses ... but
+				 * that's so unlikely it doesn't seem worth spending extra
+				 * code to avoid.
+				 */
+				if (opUsedForQual[attnum - 1] == op)
+					continue;
+				opUsedForQual[attnum - 1] = op;
+
+				/*
+				 * Actually add the qual, ANDed with any others.
+				 */
+				if (foundUniqueIndex)
+					appendStringInfoString(&querybuf, " AND ");
+
+				leftop = quote_qualified_identifier("newdata",
+													NameStr(attr->attname));
+				rightop = quote_qualified_identifier("mv",
+													 NameStr(attr->attname));
+
+				generate_operator_clause(&querybuf,
+										 leftop, attrtype,
+										 op,
+										 rightop, attrtype);
+
+				foundUniqueIndex = true;
+			}
+		}
+
+		/* Keep the locks, since we're about to run DML which needs them. */
+		index_close(indexRel, NoLock);
+	}
+
+	list_free(indexoidlist);
+
+	/*
+	 * There must be at least one usable unique index on the matview.
+	 *
+	 * ExecRefreshMatView() checks that after taking the exclusive lock on the
+	 * matview. So at least one unique index is guaranteed to exist here
+	 * because the lock is still being held; so an Assert seems sufficient.
+	 */
+	Assert(foundUniqueIndex);
+
+	appendStringInfoString(&querybuf,
+						   " AND newdata.* OPERATOR(pg_catalog.*=) mv.*) "
+						   "WHERE newdata.* IS NULL OR mv.* IS NULL "
+						   "ORDER BY tid");
+
+	/* Create the temporary "diff" table. */
+	if (SPI_exec(querybuf.data, 0) != SPI_OK_UTILITY)
+		elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+
+	SetUserIdAndSecContext(relowner,
+						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
+
+	/*
+	 * We have no further use for data from the "full-data" temp table, but we
+	 * must keep it around because its type is referenced from the diff table.
+	 */
+
+	/* Analyze the diff table. */
+	resetStringInfo(&querybuf);
+	appendStringInfo(&querybuf, "ANALYZE %s", diffname);
+	if (SPI_exec(querybuf.data, 0) != SPI_OK_UTILITY)
+		elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+
+	OpenMatViewIncrementalMaintenance();
+
+	/* Deletes must come before inserts; do them first. */
+	resetStringInfo(&querybuf);
+	appendStringInfo(&querybuf,
+					 "DELETE FROM %s mv WHERE ctid OPERATOR(pg_catalog.=) ANY "
+					 "(SELECT diff.tid FROM %s diff "
+					 "WHERE diff.tid IS NOT NULL "
+					 "AND diff.newdata IS NULL)",
+					 matviewname, diffname);
+	if (SPI_exec(querybuf.data, 0) != SPI_OK_DELETE)
+		elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+
+	/* Inserts go last. */
+	resetStringInfo(&querybuf);
+	appendStringInfo(&querybuf,
+					 "INSERT INTO %s SELECT (diff.newdata).* "
+					 "FROM %s diff WHERE tid IS NULL",
+					 matviewname, diffname);
+	if (SPI_exec(querybuf.data, 0) != SPI_OK_INSERT)
+		elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+
+	/* We're done maintaining the materialized view. */
+	CloseMatViewIncrementalMaintenance();
+	table_close(tempRel, NoLock);
+	table_close(matviewRel, NoLock);
+
+	/* Clean up temp tables. */
+	resetStringInfo(&querybuf);
+	appendStringInfo(&querybuf, "DROP TABLE %s, %s", diffname, tempname);
+	if (SPI_exec(querybuf.data, 0) != SPI_OK_UTILITY)
+		elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+
+	/* Close SPI context. */
+	if (SPI_finish() != SPI_OK_FINISH)
+		elog(ERROR, "SPI_finish failed");
+}
+
+/*
+ * Swap the physical files of the target and transient tables, then rebuild
+ * the target's indexes and throw away the transient table.  Security context
+ * swapping is handled by the called function, so it is not needed here.
+ */
+static void
+refresh_by_heap_swap(Oid matviewOid, Oid OIDNewHeap, char relpersistence)
+{
+	finish_heap_swap(matviewOid, OIDNewHeap, false, false, true, true,
+					 RecentXmin, ReadNextMultiXactId(), relpersistence);
+}
+
+/*
+ * Check whether specified index is usable for match merge.
+ */
+static bool
+is_usable_unique_index(Relation indexRel)
+{
+	Form_pg_index indexStruct = indexRel->rd_index;
+
+	/*
+	 * Must be unique, valid, immediate, non-partial, and be defined over
+	 * plain user columns (not expressions).  We also require it to be a
+	 * btree.  Even if we had any other unique index kinds, we'd not know how
+	 * to identify the corresponding equality operator, nor could we be sure
+	 * that the planner could implement the required FULL JOIN with non-btree
+	 * operators.
+	 */
+	if (indexStruct->indisunique &&
+		indexStruct->indimmediate &&
+		indexRel->rd_rel->relam == BTREE_AM_OID &&
+		indexStruct->indisvalid &&
+		RelationGetIndexPredicate(indexRel) == NIL &&
+		indexStruct->indnatts > 0)
+	{
+		/*
+		 * The point of groveling through the index columns individually is to
+		 * reject both index expressions and system columns.  Currently,
+		 * matviews couldn't have OID columns so there's no way to create an
+		 * index on a system column; but maybe someday that wouldn't be true,
+		 * so let's be safe.
+		 */
+		int			numatts = indexStruct->indnatts;
+		int			i;
+
+		for (i = 0; i < numatts; i++)
+		{
+			int			attnum = indexStruct->indkey.values[i];
+
+			if (attnum <= 0)
+				return false;
+		}
+		return true;
+	}
+	return false;
+}
+
+
+/*
+ * This should be used to test whether the backend is in a context where it is
+ * OK to allow DML statements to modify materialized views.  We only want to
+ * allow that for internal code driven by the materialized view definition,
+ * not for arbitrary user-supplied code.
+ *
+ * While the function names reflect the fact that their main intended use is
+ * incremental maintenance of materialized views (in response to changes to
+ * the data in referenced relations), they are initially used to allow REFRESH
+ * without blocking concurrent reads.
+ */
+bool
+MatViewIncrementalMaintenanceIsEnabled(void)
+{
+	return matview_maintenance_depth > 0;
+}
+
+static void
+OpenMatViewIncrementalMaintenance(void)
+{
+	matview_maintenance_depth++;
+}
+
+static void
+CloseMatViewIncrementalMaintenance(void)
+{
+	matview_maintenance_depth--;
+	Assert(matview_maintenance_depth >= 0);
+}
diff --git a/src/backend/commands/opclasscmds.c b/src/backend/commands/opclasscmds.c
new file mode 100644
index 0000000..7a931ab
--- /dev/null
+++ b/src/backend/commands/opclasscmds.c
@@ -0,0 +1,1745 @@
+/*-------------------------------------------------------------------------
+ *
+ * opclasscmds.c
+ *
+ *	  Routines for opclass (and opfamily) manipulation commands
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/opclasscmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "access/genam.h"
+#include "access/hash.h"
+#include "access/htup_details.h"
+#include "access/nbtree.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_type.h"
+#include "commands/alter.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "parser/parse_oper.h"
+#include "parser/parse_type.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+static void AlterOpFamilyAdd(AlterOpFamilyStmt *stmt,
+							 Oid amoid, Oid opfamilyoid,
+							 int maxOpNumber, int maxProcNumber,
+							 int opclassOptsProcNumber, List *items);
+static void AlterOpFamilyDrop(AlterOpFamilyStmt *stmt,
+							  Oid amoid, Oid opfamilyoid,
+							  int maxOpNumber, int maxProcNumber,
+							  List *items);
+static void processTypesSpec(List *args, Oid *lefttype, Oid *righttype);
+static void assignOperTypes(OpFamilyMember *member, Oid amoid, Oid typeoid);
+static void assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid,
+							int opclassOptsProcNum);
+static void addFamilyMember(List **list, OpFamilyMember *member);
+static void storeOperators(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+						   List *operators, bool isAdd);
+static void storeProcedures(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+							List *procedures, bool isAdd);
+static void dropOperators(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+						  List *operators);
+static void dropProcedures(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+						   List *procedures);
+
+/*
+ * OpFamilyCacheLookup
+ *		Look up an existing opfamily by name.
+ *
+ * Returns a syscache tuple reference, or NULL if not found.
+ */
+static HeapTuple
+OpFamilyCacheLookup(Oid amID, List *opfamilyname, bool missing_ok)
+{
+	char	   *schemaname;
+	char	   *opfname;
+	HeapTuple	htup;
+
+	/* deconstruct the name list */
+	DeconstructQualifiedName(opfamilyname, &schemaname, &opfname);
+
+	if (schemaname)
+	{
+		/* Look in specific schema only */
+		Oid			namespaceId;
+
+		namespaceId = LookupExplicitNamespace(schemaname, missing_ok);
+		if (!OidIsValid(namespaceId))
+			htup = NULL;
+		else
+			htup = SearchSysCache3(OPFAMILYAMNAMENSP,
+								   ObjectIdGetDatum(amID),
+								   PointerGetDatum(opfname),
+								   ObjectIdGetDatum(namespaceId));
+	}
+	else
+	{
+		/* Unqualified opfamily name, so search the search path */
+		Oid			opfID = OpfamilynameGetOpfid(amID, opfname);
+
+		if (!OidIsValid(opfID))
+			htup = NULL;
+		else
+			htup = SearchSysCache1(OPFAMILYOID, ObjectIdGetDatum(opfID));
+	}
+
+	if (!HeapTupleIsValid(htup) && !missing_ok)
+	{
+		HeapTuple	amtup;
+
+		amtup = SearchSysCache1(AMOID, ObjectIdGetDatum(amID));
+		if (!HeapTupleIsValid(amtup))
+			elog(ERROR, "cache lookup failed for access method %u", amID);
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("operator family \"%s\" does not exist for access method \"%s\"",
+						NameListToString(opfamilyname),
+						NameStr(((Form_pg_am) GETSTRUCT(amtup))->amname))));
+	}
+
+	return htup;
+}
+
+/*
+ * get_opfamily_oid
+ *	  find an opfamily OID by possibly qualified name
+ *
+ * If not found, returns InvalidOid if missing_ok, else throws error.
+ */
+Oid
+get_opfamily_oid(Oid amID, List *opfamilyname, bool missing_ok)
+{
+	HeapTuple	htup;
+	Form_pg_opfamily opfamform;
+	Oid			opfID;
+
+	htup = OpFamilyCacheLookup(amID, opfamilyname, missing_ok);
+	if (!HeapTupleIsValid(htup))
+		return InvalidOid;
+	opfamform = (Form_pg_opfamily) GETSTRUCT(htup);
+	opfID = opfamform->oid;
+	ReleaseSysCache(htup);
+
+	return opfID;
+}
+
+/*
+ * OpClassCacheLookup
+ *		Look up an existing opclass by name.
+ *
+ * Returns a syscache tuple reference, or NULL if not found.
+ */
+static HeapTuple
+OpClassCacheLookup(Oid amID, List *opclassname, bool missing_ok)
+{
+	char	   *schemaname;
+	char	   *opcname;
+	HeapTuple	htup;
+
+	/* deconstruct the name list */
+	DeconstructQualifiedName(opclassname, &schemaname, &opcname);
+
+	if (schemaname)
+	{
+		/* Look in specific schema only */
+		Oid			namespaceId;
+
+		namespaceId = LookupExplicitNamespace(schemaname, missing_ok);
+		if (!OidIsValid(namespaceId))
+			htup = NULL;
+		else
+			htup = SearchSysCache3(CLAAMNAMENSP,
+								   ObjectIdGetDatum(amID),
+								   PointerGetDatum(opcname),
+								   ObjectIdGetDatum(namespaceId));
+	}
+	else
+	{
+		/* Unqualified opclass name, so search the search path */
+		Oid			opcID = OpclassnameGetOpcid(amID, opcname);
+
+		if (!OidIsValid(opcID))
+			htup = NULL;
+		else
+			htup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opcID));
+	}
+
+	if (!HeapTupleIsValid(htup) && !missing_ok)
+	{
+		HeapTuple	amtup;
+
+		amtup = SearchSysCache1(AMOID, ObjectIdGetDatum(amID));
+		if (!HeapTupleIsValid(amtup))
+			elog(ERROR, "cache lookup failed for access method %u", amID);
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("operator class \"%s\" does not exist for access method \"%s\"",
+						NameListToString(opclassname),
+						NameStr(((Form_pg_am) GETSTRUCT(amtup))->amname))));
+	}
+
+	return htup;
+}
+
+/*
+ * get_opclass_oid
+ *	  find an opclass OID by possibly qualified name
+ *
+ * If not found, returns InvalidOid if missing_ok, else throws error.
+ */
+Oid
+get_opclass_oid(Oid amID, List *opclassname, bool missing_ok)
+{
+	HeapTuple	htup;
+	Form_pg_opclass opcform;
+	Oid			opcID;
+
+	htup = OpClassCacheLookup(amID, opclassname, missing_ok);
+	if (!HeapTupleIsValid(htup))
+		return InvalidOid;
+	opcform = (Form_pg_opclass) GETSTRUCT(htup);
+	opcID = opcform->oid;
+	ReleaseSysCache(htup);
+
+	return opcID;
+}
+
+/*
+ * CreateOpFamily
+ *		Internal routine to make the catalog entry for a new operator family.
+ *
+ * Caller must have done permissions checks etc. already.
+ */
+static ObjectAddress
+CreateOpFamily(CreateOpFamilyStmt *stmt, const char *opfname,
+			   Oid namespaceoid, Oid amoid)
+{
+	Oid			opfamilyoid;
+	Relation	rel;
+	HeapTuple	tup;
+	Datum		values[Natts_pg_opfamily];
+	bool		nulls[Natts_pg_opfamily];
+	NameData	opfName;
+	ObjectAddress myself,
+				referenced;
+
+	rel = table_open(OperatorFamilyRelationId, RowExclusiveLock);
+
+	/*
+	 * Make sure there is no existing opfamily of this name (this is just to
+	 * give a more friendly error message than "duplicate key").
+	 */
+	if (SearchSysCacheExists3(OPFAMILYAMNAMENSP,
+							  ObjectIdGetDatum(amoid),
+							  CStringGetDatum(opfname),
+							  ObjectIdGetDatum(namespaceoid)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("operator family \"%s\" for access method \"%s\" already exists",
+						opfname, stmt->amname)));
+
+	/*
+	 * Okay, let's create the pg_opfamily entry.
+	 */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	opfamilyoid = GetNewOidWithIndex(rel, OpfamilyOidIndexId,
+									 Anum_pg_opfamily_oid);
+	values[Anum_pg_opfamily_oid - 1] = ObjectIdGetDatum(opfamilyoid);
+	values[Anum_pg_opfamily_opfmethod - 1] = ObjectIdGetDatum(amoid);
+	namestrcpy(&opfName, opfname);
+	values[Anum_pg_opfamily_opfname - 1] = NameGetDatum(&opfName);
+	values[Anum_pg_opfamily_opfnamespace - 1] = ObjectIdGetDatum(namespaceoid);
+	values[Anum_pg_opfamily_opfowner - 1] = ObjectIdGetDatum(GetUserId());
+
+	tup = heap_form_tuple(rel->rd_att, values, nulls);
+
+	CatalogTupleInsert(rel, tup);
+
+	heap_freetuple(tup);
+
+	/*
+	 * Create dependencies for the opfamily proper.
+	 */
+	myself.classId = OperatorFamilyRelationId;
+	myself.objectId = opfamilyoid;
+	myself.objectSubId = 0;
+
+	/* dependency on access method */
+	referenced.classId = AccessMethodRelationId;
+	referenced.objectId = amoid;
+	referenced.objectSubId = 0;
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
+
+	/* dependency on namespace */
+	referenced.classId = NamespaceRelationId;
+	referenced.objectId = namespaceoid;
+	referenced.objectSubId = 0;
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+	/* dependency on owner */
+	recordDependencyOnOwner(OperatorFamilyRelationId, opfamilyoid, GetUserId());
+
+	/* dependency on extension */
+	recordDependencyOnCurrentExtension(&myself, false);
+
+	/* Report the new operator family to possibly interested event triggers */
+	EventTriggerCollectSimpleCommand(myself, InvalidObjectAddress,
+									 (Node *) stmt);
+
+	/* Post creation hook for new operator family */
+	InvokeObjectPostCreateHook(OperatorFamilyRelationId, opfamilyoid, 0);
+
+	table_close(rel, RowExclusiveLock);
+
+	return myself;
+}
+
+/*
+ * DefineOpClass
+ *		Define a new index operator class.
+ */
+ObjectAddress
+DefineOpClass(CreateOpClassStmt *stmt)
+{
+	char	   *opcname;		/* name of opclass we're creating */
+	Oid			amoid,			/* our AM's oid */
+				typeoid,		/* indexable datatype oid */
+				storageoid,		/* storage datatype oid, if any */
+				namespaceoid,	/* namespace to create opclass in */
+				opfamilyoid,	/* oid of containing opfamily */
+				opclassoid;		/* oid of opclass we create */
+	int			maxOpNumber,	/* amstrategies value */
+				optsProcNumber, /* amoptsprocnum value */
+				maxProcNumber;	/* amsupport value */
+	bool		amstorage;		/* amstorage flag */
+	List	   *operators;		/* OpFamilyMember list for operators */
+	List	   *procedures;		/* OpFamilyMember list for support procs */
+	ListCell   *l;
+	Relation	rel;
+	HeapTuple	tup;
+	Form_pg_am	amform;
+	IndexAmRoutine *amroutine;
+	Datum		values[Natts_pg_opclass];
+	bool		nulls[Natts_pg_opclass];
+	AclResult	aclresult;
+	NameData	opcName;
+	ObjectAddress myself,
+				referenced;
+
+	/* Convert list of names to a name and namespace */
+	namespaceoid = QualifiedNameGetCreationNamespace(stmt->opclassname,
+													 &opcname);
+
+	/* Check we have creation rights in target namespace */
+	aclresult = pg_namespace_aclcheck(namespaceoid, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_SCHEMA,
+					   get_namespace_name(namespaceoid));
+
+	/* Get necessary info about access method */
+	tup = SearchSysCache1(AMNAME, CStringGetDatum(stmt->amname));
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("access method \"%s\" does not exist",
+						stmt->amname)));
+
+	amform = (Form_pg_am) GETSTRUCT(tup);
+	amoid = amform->oid;
+	amroutine = GetIndexAmRoutineByAmId(amoid, false);
+	ReleaseSysCache(tup);
+
+	maxOpNumber = amroutine->amstrategies;
+	/* if amstrategies is zero, just enforce that op numbers fit in int16 */
+	if (maxOpNumber <= 0)
+		maxOpNumber = SHRT_MAX;
+	maxProcNumber = amroutine->amsupport;
+	optsProcNumber = amroutine->amoptsprocnum;
+	amstorage = amroutine->amstorage;
+
+	/* XXX Should we make any privilege check against the AM? */
+
+	/*
+	 * The question of appropriate permissions for CREATE OPERATOR CLASS is
+	 * interesting.  Creating an opclass is tantamount to granting public
+	 * execute access on the functions involved, since the index machinery
+	 * generally does not check access permission before using the functions.
+	 * A minimum expectation therefore is that the caller have execute
+	 * privilege with grant option.  Since we don't have a way to make the
+	 * opclass go away if the grant option is revoked, we choose instead to
+	 * require ownership of the functions.  It's also not entirely clear what
+	 * permissions should be required on the datatype, but ownership seems
+	 * like a safe choice.
+	 *
+	 * Currently, we require superuser privileges to create an opclass. This
+	 * seems necessary because we have no way to validate that the offered set
+	 * of operators and functions are consistent with the AM's expectations.
+	 * It would be nice to provide such a check someday, if it can be done
+	 * without solving the halting problem :-(
+	 *
+	 * XXX re-enable NOT_USED code sections below if you remove this test.
+	 */
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be superuser to create an operator class")));
+
+	/* Look up the datatype */
+	typeoid = typenameTypeId(NULL, stmt->datatype);
+
+#ifdef NOT_USED
+	/* XXX this is unnecessary given the superuser check above */
+	/* Check we have ownership of the datatype */
+	if (!pg_type_ownercheck(typeoid, GetUserId()))
+		aclcheck_error_type(ACLCHECK_NOT_OWNER, typeoid);
+#endif
+
+	/*
+	 * Look up the containing operator family, or create one if FAMILY option
+	 * was omitted and there's not a match already.
+	 */
+	if (stmt->opfamilyname)
+	{
+		opfamilyoid = get_opfamily_oid(amoid, stmt->opfamilyname, false);
+	}
+	else
+	{
+		/* Lookup existing family of same name and namespace */
+		tup = SearchSysCache3(OPFAMILYAMNAMENSP,
+							  ObjectIdGetDatum(amoid),
+							  PointerGetDatum(opcname),
+							  ObjectIdGetDatum(namespaceoid));
+		if (HeapTupleIsValid(tup))
+		{
+			opfamilyoid = ((Form_pg_opfamily) GETSTRUCT(tup))->oid;
+
+			/*
+			 * XXX given the superuser check above, there's no need for an
+			 * ownership check here
+			 */
+			ReleaseSysCache(tup);
+		}
+		else
+		{
+			CreateOpFamilyStmt *opfstmt;
+			ObjectAddress tmpAddr;
+
+			opfstmt = makeNode(CreateOpFamilyStmt);
+			opfstmt->opfamilyname = stmt->opclassname;
+			opfstmt->amname = stmt->amname;
+
+			/*
+			 * Create it ... again no need for more permissions ...
+			 */
+			tmpAddr = CreateOpFamily(opfstmt, opcname, namespaceoid, amoid);
+			opfamilyoid = tmpAddr.objectId;
+		}
+	}
+
+	operators = NIL;
+	procedures = NIL;
+
+	/* Storage datatype is optional */
+	storageoid = InvalidOid;
+
+	/*
+	 * Scan the "items" list to obtain additional info.
+	 */
+	foreach(l, stmt->items)
+	{
+		CreateOpClassItem *item = lfirst_node(CreateOpClassItem, l);
+		Oid			operOid;
+		Oid			funcOid;
+		Oid			sortfamilyOid;
+		OpFamilyMember *member;
+
+		switch (item->itemtype)
+		{
+			case OPCLASS_ITEM_OPERATOR:
+				if (item->number <= 0 || item->number > maxOpNumber)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("invalid operator number %d,"
+									" must be between 1 and %d",
+									item->number, maxOpNumber)));
+				if (item->name->objargs != NIL)
+					operOid = LookupOperWithArgs(item->name, false);
+				else
+				{
+					/* Default to binary op on input datatype */
+					operOid = LookupOperName(NULL, item->name->objname,
+											 typeoid, typeoid,
+											 false, -1);
+				}
+
+				if (item->order_family)
+					sortfamilyOid = get_opfamily_oid(BTREE_AM_OID,
+													 item->order_family,
+													 false);
+				else
+					sortfamilyOid = InvalidOid;
+
+#ifdef NOT_USED
+				/* XXX this is unnecessary given the superuser check above */
+				/* Caller must own operator and its underlying function */
+				if (!pg_oper_ownercheck(operOid, GetUserId()))
+					aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_OPERATOR,
+								   get_opname(operOid));
+				funcOid = get_opcode(operOid);
+				if (!pg_proc_ownercheck(funcOid, GetUserId()))
+					aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+								   get_func_name(funcOid));
+#endif
+
+				/* Save the info */
+				member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember));
+				member->is_func = false;
+				member->object = operOid;
+				member->number = item->number;
+				member->sortfamily = sortfamilyOid;
+				assignOperTypes(member, amoid, typeoid);
+				addFamilyMember(&operators, member);
+				break;
+			case OPCLASS_ITEM_FUNCTION:
+				if (item->number <= 0 || item->number > maxProcNumber)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("invalid function number %d,"
+									" must be between 1 and %d",
+									item->number, maxProcNumber)));
+				funcOid = LookupFuncWithArgs(OBJECT_FUNCTION, item->name, false);
+#ifdef NOT_USED
+				/* XXX this is unnecessary given the superuser check above */
+				/* Caller must own function */
+				if (!pg_proc_ownercheck(funcOid, GetUserId()))
+					aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+								   get_func_name(funcOid));
+#endif
+				/* Save the info */
+				member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember));
+				member->is_func = true;
+				member->object = funcOid;
+				member->number = item->number;
+
+				/* allow overriding of the function's actual arg types */
+				if (item->class_args)
+					processTypesSpec(item->class_args,
+									 &member->lefttype, &member->righttype);
+
+				assignProcTypes(member, amoid, typeoid, optsProcNumber);
+				addFamilyMember(&procedures, member);
+				break;
+			case OPCLASS_ITEM_STORAGETYPE:
+				if (OidIsValid(storageoid))
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("storage type specified more than once")));
+				storageoid = typenameTypeId(NULL, item->storedtype);
+
+#ifdef NOT_USED
+				/* XXX this is unnecessary given the superuser check above */
+				/* Check we have ownership of the datatype */
+				if (!pg_type_ownercheck(storageoid, GetUserId()))
+					aclcheck_error_type(ACLCHECK_NOT_OWNER, storageoid);
+#endif
+				break;
+			default:
+				elog(ERROR, "unrecognized item type: %d", item->itemtype);
+				break;
+		}
+	}
+
+	/*
+	 * If storagetype is specified, make sure it's legal.
+	 */
+	if (OidIsValid(storageoid))
+	{
+		/* Just drop the spec if same as column datatype */
+		if (storageoid == typeoid)
+			storageoid = InvalidOid;
+		else if (!amstorage)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("storage type cannot be different from data type for access method \"%s\"",
+							stmt->amname)));
+	}
+
+	rel = table_open(OperatorClassRelationId, RowExclusiveLock);
+
+	/*
+	 * Make sure there is no existing opclass of this name (this is just to
+	 * give a more friendly error message than "duplicate key").
+	 */
+	if (SearchSysCacheExists3(CLAAMNAMENSP,
+							  ObjectIdGetDatum(amoid),
+							  CStringGetDatum(opcname),
+							  ObjectIdGetDatum(namespaceoid)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("operator class \"%s\" for access method \"%s\" already exists",
+						opcname, stmt->amname)));
+
+	/*
+	 * If we are creating a default opclass, check there isn't one already.
+	 * (Note we do not restrict this test to visible opclasses; this ensures
+	 * that typcache.c can find unique solutions to its questions.)
+	 */
+	if (stmt->isDefault)
+	{
+		ScanKeyData skey[1];
+		SysScanDesc scan;
+
+		ScanKeyInit(&skey[0],
+					Anum_pg_opclass_opcmethod,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(amoid));
+
+		scan = systable_beginscan(rel, OpclassAmNameNspIndexId, true,
+								  NULL, 1, skey);
+
+		while (HeapTupleIsValid(tup = systable_getnext(scan)))
+		{
+			Form_pg_opclass opclass = (Form_pg_opclass) GETSTRUCT(tup);
+
+			if (opclass->opcintype == typeoid && opclass->opcdefault)
+				ereport(ERROR,
+						(errcode(ERRCODE_DUPLICATE_OBJECT),
+						 errmsg("could not make operator class \"%s\" be default for type %s",
+								opcname,
+								TypeNameToString(stmt->datatype)),
+						 errdetail("Operator class \"%s\" already is the default.",
+								   NameStr(opclass->opcname))));
+		}
+
+		systable_endscan(scan);
+	}
+
+	/*
+	 * Okay, let's create the pg_opclass entry.
+	 */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	opclassoid = GetNewOidWithIndex(rel, OpclassOidIndexId,
+									Anum_pg_opclass_oid);
+	values[Anum_pg_opclass_oid - 1] = ObjectIdGetDatum(opclassoid);
+	values[Anum_pg_opclass_opcmethod - 1] = ObjectIdGetDatum(amoid);
+	namestrcpy(&opcName, opcname);
+	values[Anum_pg_opclass_opcname - 1] = NameGetDatum(&opcName);
+	values[Anum_pg_opclass_opcnamespace - 1] = ObjectIdGetDatum(namespaceoid);
+	values[Anum_pg_opclass_opcowner - 1] = ObjectIdGetDatum(GetUserId());
+	values[Anum_pg_opclass_opcfamily - 1] = ObjectIdGetDatum(opfamilyoid);
+	values[Anum_pg_opclass_opcintype - 1] = ObjectIdGetDatum(typeoid);
+	values[Anum_pg_opclass_opcdefault - 1] = BoolGetDatum(stmt->isDefault);
+	values[Anum_pg_opclass_opckeytype - 1] = ObjectIdGetDatum(storageoid);
+
+	tup = heap_form_tuple(rel->rd_att, values, nulls);
+
+	CatalogTupleInsert(rel, tup);
+
+	heap_freetuple(tup);
+
+	/*
+	 * Now that we have the opclass OID, set up default dependency info for
+	 * the pg_amop and pg_amproc entries.  Historically, CREATE OPERATOR CLASS
+	 * has created hard dependencies on the opclass, so that's what we use.
+	 */
+	foreach(l, operators)
+	{
+		OpFamilyMember *op = (OpFamilyMember *) lfirst(l);
+
+		op->ref_is_hard = true;
+		op->ref_is_family = false;
+		op->refobjid = opclassoid;
+	}
+	foreach(l, procedures)
+	{
+		OpFamilyMember *proc = (OpFamilyMember *) lfirst(l);
+
+		proc->ref_is_hard = true;
+		proc->ref_is_family = false;
+		proc->refobjid = opclassoid;
+	}
+
+	/*
+	 * Let the index AM editorialize on the dependency choices.  It could also
+	 * do further validation on the operators and functions, if it likes.
+	 */
+	if (amroutine->amadjustmembers)
+		amroutine->amadjustmembers(opfamilyoid,
+								   opclassoid,
+								   operators,
+								   procedures);
+
+	/*
+	 * Now add tuples to pg_amop and pg_amproc tying in the operators and
+	 * functions.  Dependencies on them are inserted, too.
+	 */
+	storeOperators(stmt->opfamilyname, amoid, opfamilyoid,
+				   operators, false);
+	storeProcedures(stmt->opfamilyname, amoid, opfamilyoid,
+					procedures, false);
+
+	/* let event triggers know what happened */
+	EventTriggerCollectCreateOpClass(stmt, opclassoid, operators, procedures);
+
+	/*
+	 * Create dependencies for the opclass proper.  Note: we do not need a
+	 * dependency link to the AM, because that exists through the opfamily.
+	 */
+	myself.classId = OperatorClassRelationId;
+	myself.objectId = opclassoid;
+	myself.objectSubId = 0;
+
+	/* dependency on namespace */
+	referenced.classId = NamespaceRelationId;
+	referenced.objectId = namespaceoid;
+	referenced.objectSubId = 0;
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+	/* dependency on opfamily */
+	referenced.classId = OperatorFamilyRelationId;
+	referenced.objectId = opfamilyoid;
+	referenced.objectSubId = 0;
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
+
+	/* dependency on indexed datatype */
+	referenced.classId = TypeRelationId;
+	referenced.objectId = typeoid;
+	referenced.objectSubId = 0;
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+	/* dependency on storage datatype */
+	if (OidIsValid(storageoid))
+	{
+		referenced.classId = TypeRelationId;
+		referenced.objectId = storageoid;
+		referenced.objectSubId = 0;
+		recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+	}
+
+	/* dependency on owner */
+	recordDependencyOnOwner(OperatorClassRelationId, opclassoid, GetUserId());
+
+	/* dependency on extension */
+	recordDependencyOnCurrentExtension(&myself, false);
+
+	/* Post creation hook for new operator class */
+	InvokeObjectPostCreateHook(OperatorClassRelationId, opclassoid, 0);
+
+	table_close(rel, RowExclusiveLock);
+
+	return myself;
+}
+
+
+/*
+ * DefineOpFamily
+ *		Define a new index operator family.
+ */
+ObjectAddress
+DefineOpFamily(CreateOpFamilyStmt *stmt)
+{
+	char	   *opfname;		/* name of opfamily we're creating */
+	Oid			amoid,			/* our AM's oid */
+				namespaceoid;	/* namespace to create opfamily in */
+	AclResult	aclresult;
+
+	/* Convert list of names to a name and namespace */
+	namespaceoid = QualifiedNameGetCreationNamespace(stmt->opfamilyname,
+													 &opfname);
+
+	/* Check we have creation rights in target namespace */
+	aclresult = pg_namespace_aclcheck(namespaceoid, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_SCHEMA,
+					   get_namespace_name(namespaceoid));
+
+	/* Get access method OID, throwing an error if it doesn't exist. */
+	amoid = get_index_am_oid(stmt->amname, false);
+
+	/* XXX Should we make any privilege check against the AM? */
+
+	/*
+	 * Currently, we require superuser privileges to create an opfamily. See
+	 * comments in DefineOpClass.
+	 */
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be superuser to create an operator family")));
+
+	/* Insert pg_opfamily catalog entry */
+	return CreateOpFamily(stmt, opfname, namespaceoid, amoid);
+}
+
+
+/*
+ * AlterOpFamily
+ *		Add or remove operators/procedures within an existing operator family.
+ *
+ * Note: this implements only ALTER OPERATOR FAMILY ... ADD/DROP.  Some
+ * other commands called ALTER OPERATOR FAMILY exist, but go through
+ * different code paths.
+ */
+Oid
+AlterOpFamily(AlterOpFamilyStmt *stmt)
+{
+	Oid			amoid,			/* our AM's oid */
+				opfamilyoid;	/* oid of opfamily */
+	int			maxOpNumber,	/* amstrategies value */
+				optsProcNumber, /* amopclassopts value */
+				maxProcNumber;	/* amsupport value */
+	HeapTuple	tup;
+	Form_pg_am	amform;
+	IndexAmRoutine *amroutine;
+
+	/* Get necessary info about access method */
+	tup = SearchSysCache1(AMNAME, CStringGetDatum(stmt->amname));
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("access method \"%s\" does not exist",
+						stmt->amname)));
+
+	amform = (Form_pg_am) GETSTRUCT(tup);
+	amoid = amform->oid;
+	amroutine = GetIndexAmRoutineByAmId(amoid, false);
+	ReleaseSysCache(tup);
+
+	maxOpNumber = amroutine->amstrategies;
+	/* if amstrategies is zero, just enforce that op numbers fit in int16 */
+	if (maxOpNumber <= 0)
+		maxOpNumber = SHRT_MAX;
+	maxProcNumber = amroutine->amsupport;
+	optsProcNumber = amroutine->amoptsprocnum;
+
+	/* XXX Should we make any privilege check against the AM? */
+
+	/* Look up the opfamily */
+	opfamilyoid = get_opfamily_oid(amoid, stmt->opfamilyname, false);
+
+	/*
+	 * Currently, we require superuser privileges to alter an opfamily.
+	 *
+	 * XXX re-enable NOT_USED code sections below if you remove this test.
+	 */
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be superuser to alter an operator family")));
+
+	/*
+	 * ADD and DROP cases need separate code from here on down.
+	 */
+	if (stmt->isDrop)
+		AlterOpFamilyDrop(stmt, amoid, opfamilyoid,
+						  maxOpNumber, maxProcNumber, stmt->items);
+	else
+		AlterOpFamilyAdd(stmt, amoid, opfamilyoid,
+						 maxOpNumber, maxProcNumber, optsProcNumber,
+						 stmt->items);
+
+	return opfamilyoid;
+}
+
+/*
+ * ADD part of ALTER OP FAMILY
+ */
+static void
+AlterOpFamilyAdd(AlterOpFamilyStmt *stmt, Oid amoid, Oid opfamilyoid,
+				 int maxOpNumber, int maxProcNumber, int optsProcNumber,
+				 List *items)
+{
+	IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(amoid, false);
+	List	   *operators;		/* OpFamilyMember list for operators */
+	List	   *procedures;		/* OpFamilyMember list for support procs */
+	ListCell   *l;
+
+	operators = NIL;
+	procedures = NIL;
+
+	/*
+	 * Scan the "items" list to obtain additional info.
+	 */
+	foreach(l, items)
+	{
+		CreateOpClassItem *item = lfirst_node(CreateOpClassItem, l);
+		Oid			operOid;
+		Oid			funcOid;
+		Oid			sortfamilyOid;
+		OpFamilyMember *member;
+
+		switch (item->itemtype)
+		{
+			case OPCLASS_ITEM_OPERATOR:
+				if (item->number <= 0 || item->number > maxOpNumber)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("invalid operator number %d,"
+									" must be between 1 and %d",
+									item->number, maxOpNumber)));
+				if (item->name->objargs != NIL)
+					operOid = LookupOperWithArgs(item->name, false);
+				else
+				{
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("operator argument types must be specified in ALTER OPERATOR FAMILY")));
+					operOid = InvalidOid;	/* keep compiler quiet */
+				}
+
+				if (item->order_family)
+					sortfamilyOid = get_opfamily_oid(BTREE_AM_OID,
+													 item->order_family,
+													 false);
+				else
+					sortfamilyOid = InvalidOid;
+
+#ifdef NOT_USED
+				/* XXX this is unnecessary given the superuser check above */
+				/* Caller must own operator and its underlying function */
+				if (!pg_oper_ownercheck(operOid, GetUserId()))
+					aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_OPERATOR,
+								   get_opname(operOid));
+				funcOid = get_opcode(operOid);
+				if (!pg_proc_ownercheck(funcOid, GetUserId()))
+					aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+								   get_func_name(funcOid));
+#endif
+
+				/* Save the info */
+				member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember));
+				member->is_func = false;
+				member->object = operOid;
+				member->number = item->number;
+				member->sortfamily = sortfamilyOid;
+				/* We can set up dependency fields immediately */
+				/* Historically, ALTER ADD has created soft dependencies */
+				member->ref_is_hard = false;
+				member->ref_is_family = true;
+				member->refobjid = opfamilyoid;
+				assignOperTypes(member, amoid, InvalidOid);
+				addFamilyMember(&operators, member);
+				break;
+			case OPCLASS_ITEM_FUNCTION:
+				if (item->number <= 0 || item->number > maxProcNumber)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("invalid function number %d,"
+									" must be between 1 and %d",
+									item->number, maxProcNumber)));
+				funcOid = LookupFuncWithArgs(OBJECT_FUNCTION, item->name, false);
+#ifdef NOT_USED
+				/* XXX this is unnecessary given the superuser check above */
+				/* Caller must own function */
+				if (!pg_proc_ownercheck(funcOid, GetUserId()))
+					aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+								   get_func_name(funcOid));
+#endif
+
+				/* Save the info */
+				member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember));
+				member->is_func = true;
+				member->object = funcOid;
+				member->number = item->number;
+				/* We can set up dependency fields immediately */
+				/* Historically, ALTER ADD has created soft dependencies */
+				member->ref_is_hard = false;
+				member->ref_is_family = true;
+				member->refobjid = opfamilyoid;
+
+				/* allow overriding of the function's actual arg types */
+				if (item->class_args)
+					processTypesSpec(item->class_args,
+									 &member->lefttype, &member->righttype);
+
+				assignProcTypes(member, amoid, InvalidOid, optsProcNumber);
+				addFamilyMember(&procedures, member);
+				break;
+			case OPCLASS_ITEM_STORAGETYPE:
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("STORAGE cannot be specified in ALTER OPERATOR FAMILY")));
+				break;
+			default:
+				elog(ERROR, "unrecognized item type: %d", item->itemtype);
+				break;
+		}
+	}
+
+	/*
+	 * Let the index AM editorialize on the dependency choices.  It could also
+	 * do further validation on the operators and functions, if it likes.
+	 */
+	if (amroutine->amadjustmembers)
+		amroutine->amadjustmembers(opfamilyoid,
+								   InvalidOid,	/* no specific opclass */
+								   operators,
+								   procedures);
+
+	/*
+	 * Add tuples to pg_amop and pg_amproc tying in the operators and
+	 * functions.  Dependencies on them are inserted, too.
+	 */
+	storeOperators(stmt->opfamilyname, amoid, opfamilyoid,
+				   operators, true);
+	storeProcedures(stmt->opfamilyname, amoid, opfamilyoid,
+					procedures, true);
+
+	/* make information available to event triggers */
+	EventTriggerCollectAlterOpFam(stmt, opfamilyoid,
+								  operators, procedures);
+}
+
+/*
+ * DROP part of ALTER OP FAMILY
+ */
+static void
+AlterOpFamilyDrop(AlterOpFamilyStmt *stmt, Oid amoid, Oid opfamilyoid,
+				  int maxOpNumber, int maxProcNumber, List *items)
+{
+	List	   *operators;		/* OpFamilyMember list for operators */
+	List	   *procedures;		/* OpFamilyMember list for support procs */
+	ListCell   *l;
+
+	operators = NIL;
+	procedures = NIL;
+
+	/*
+	 * Scan the "items" list to obtain additional info.
+	 */
+	foreach(l, items)
+	{
+		CreateOpClassItem *item = lfirst_node(CreateOpClassItem, l);
+		Oid			lefttype,
+					righttype;
+		OpFamilyMember *member;
+
+		switch (item->itemtype)
+		{
+			case OPCLASS_ITEM_OPERATOR:
+				if (item->number <= 0 || item->number > maxOpNumber)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("invalid operator number %d,"
+									" must be between 1 and %d",
+									item->number, maxOpNumber)));
+				processTypesSpec(item->class_args, &lefttype, &righttype);
+				/* Save the info */
+				member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember));
+				member->is_func = false;
+				member->number = item->number;
+				member->lefttype = lefttype;
+				member->righttype = righttype;
+				addFamilyMember(&operators, member);
+				break;
+			case OPCLASS_ITEM_FUNCTION:
+				if (item->number <= 0 || item->number > maxProcNumber)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("invalid function number %d,"
+									" must be between 1 and %d",
+									item->number, maxProcNumber)));
+				processTypesSpec(item->class_args, &lefttype, &righttype);
+				/* Save the info */
+				member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember));
+				member->is_func = true;
+				member->number = item->number;
+				member->lefttype = lefttype;
+				member->righttype = righttype;
+				addFamilyMember(&procedures, member);
+				break;
+			case OPCLASS_ITEM_STORAGETYPE:
+				/* grammar prevents this from appearing */
+			default:
+				elog(ERROR, "unrecognized item type: %d", item->itemtype);
+				break;
+		}
+	}
+
+	/*
+	 * Remove tuples from pg_amop and pg_amproc.
+	 */
+	dropOperators(stmt->opfamilyname, amoid, opfamilyoid, operators);
+	dropProcedures(stmt->opfamilyname, amoid, opfamilyoid, procedures);
+
+	/* make information available to event triggers */
+	EventTriggerCollectAlterOpFam(stmt, opfamilyoid,
+								  operators, procedures);
+}
+
+
+/*
+ * Deal with explicit arg types used in ALTER ADD/DROP
+ */
+static void
+processTypesSpec(List *args, Oid *lefttype, Oid *righttype)
+{
+	TypeName   *typeName;
+
+	Assert(args != NIL);
+
+	typeName = (TypeName *) linitial(args);
+	*lefttype = typenameTypeId(NULL, typeName);
+
+	if (list_length(args) > 1)
+	{
+		typeName = (TypeName *) lsecond(args);
+		*righttype = typenameTypeId(NULL, typeName);
+	}
+	else
+		*righttype = *lefttype;
+
+	if (list_length(args) > 2)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("one or two argument types must be specified")));
+}
+
+
+/*
+ * Determine the lefttype/righttype to assign to an operator,
+ * and do any validity checking we can manage.
+ */
+static void
+assignOperTypes(OpFamilyMember *member, Oid amoid, Oid typeoid)
+{
+	Operator	optup;
+	Form_pg_operator opform;
+
+	/* Fetch the operator definition */
+	optup = SearchSysCache1(OPEROID, ObjectIdGetDatum(member->object));
+	if (!HeapTupleIsValid(optup))
+		elog(ERROR, "cache lookup failed for operator %u", member->object);
+	opform = (Form_pg_operator) GETSTRUCT(optup);
+
+	/*
+	 * Opfamily operators must be binary.
+	 */
+	if (opform->oprkind != 'b')
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("index operators must be binary")));
+
+	if (OidIsValid(member->sortfamily))
+	{
+		/*
+		 * Ordering op, check index supports that.  (We could perhaps also
+		 * check that the operator returns a type supported by the sortfamily,
+		 * but that seems more trouble than it's worth here.  If it does not,
+		 * the operator will never be matchable to any ORDER BY clause, but no
+		 * worse consequences can ensue.  Also, trying to check that would
+		 * create an ordering hazard during dump/reload: it's possible that
+		 * the family has been created but not yet populated with the required
+		 * operators.)
+		 */
+		IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(amoid, false);
+
+		if (!amroutine->amcanorderbyop)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("access method \"%s\" does not support ordering operators",
+							get_am_name(amoid))));
+	}
+	else
+	{
+		/*
+		 * Search operators must return boolean.
+		 */
+		if (opform->oprresult != BOOLOID)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("index search operators must return boolean")));
+	}
+
+	/*
+	 * If lefttype/righttype isn't specified, use the operator's input types
+	 */
+	if (!OidIsValid(member->lefttype))
+		member->lefttype = opform->oprleft;
+	if (!OidIsValid(member->righttype))
+		member->righttype = opform->oprright;
+
+	ReleaseSysCache(optup);
+}
+
+/*
+ * Determine the lefttype/righttype to assign to a support procedure,
+ * and do any validity checking we can manage.
+ */
+static void
+assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid,
+				int opclassOptsProcNum)
+{
+	HeapTuple	proctup;
+	Form_pg_proc procform;
+
+	/* Fetch the procedure definition */
+	proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(member->object));
+	if (!HeapTupleIsValid(proctup))
+		elog(ERROR, "cache lookup failed for function %u", member->object);
+	procform = (Form_pg_proc) GETSTRUCT(proctup);
+
+	/* Check the signature of the opclass options parsing function */
+	if (member->number == opclassOptsProcNum)
+	{
+		if (OidIsValid(typeoid))
+		{
+			if ((OidIsValid(member->lefttype) && member->lefttype != typeoid) ||
+				(OidIsValid(member->righttype) && member->righttype != typeoid))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("associated data types for operator class options parsing functions must match opclass input type")));
+		}
+		else
+		{
+			if (member->lefttype != member->righttype)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("left and right associated data types for operator class options parsing functions must match")));
+		}
+
+		if (procform->prorettype != VOIDOID ||
+			procform->pronargs != 1 ||
+			procform->proargtypes.values[0] != INTERNALOID)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("invalid operator class options parsing function"),
+					 errhint("Valid signature of operator class options parsing function is %s.",
+							 "(internal) RETURNS void")));
+	}
+
+	/*
+	 * btree comparison procs must be 2-arg procs returning int4.  btree
+	 * sortsupport procs must take internal and return void.  btree in_range
+	 * procs must be 5-arg procs returning bool.  btree equalimage procs must
+	 * take 1 arg and return bool.  hash support proc 1 must be a 1-arg proc
+	 * returning int4, while proc 2 must be a 2-arg proc returning int8.
+	 * Otherwise we don't know.
+	 */
+	else if (amoid == BTREE_AM_OID)
+	{
+		if (member->number == BTORDER_PROC)
+		{
+			if (procform->pronargs != 2)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("btree comparison functions must have two arguments")));
+			if (procform->prorettype != INT4OID)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("btree comparison functions must return integer")));
+
+			/*
+			 * If lefttype/righttype isn't specified, use the proc's input
+			 * types
+			 */
+			if (!OidIsValid(member->lefttype))
+				member->lefttype = procform->proargtypes.values[0];
+			if (!OidIsValid(member->righttype))
+				member->righttype = procform->proargtypes.values[1];
+		}
+		else if (member->number == BTSORTSUPPORT_PROC)
+		{
+			if (procform->pronargs != 1 ||
+				procform->proargtypes.values[0] != INTERNALOID)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("btree sort support functions must accept type \"internal\"")));
+			if (procform->prorettype != VOIDOID)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("btree sort support functions must return void")));
+
+			/*
+			 * Can't infer lefttype/righttype from proc, so use default rule
+			 */
+		}
+		else if (member->number == BTINRANGE_PROC)
+		{
+			if (procform->pronargs != 5)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("btree in_range functions must have five arguments")));
+			if (procform->prorettype != BOOLOID)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("btree in_range functions must return boolean")));
+
+			/*
+			 * If lefttype/righttype isn't specified, use the proc's input
+			 * types (we look at the test-value and offset arguments)
+			 */
+			if (!OidIsValid(member->lefttype))
+				member->lefttype = procform->proargtypes.values[0];
+			if (!OidIsValid(member->righttype))
+				member->righttype = procform->proargtypes.values[2];
+		}
+		else if (member->number == BTEQUALIMAGE_PROC)
+		{
+			if (procform->pronargs != 1)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("btree equal image functions must have one argument")));
+			if (procform->prorettype != BOOLOID)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("btree equal image functions must return boolean")));
+
+			/*
+			 * pg_amproc functions are indexed by (lefttype, righttype), but
+			 * an equalimage function can only be called at CREATE INDEX time.
+			 * The same opclass opcintype OID is always used for leftype and
+			 * righttype.  Providing a cross-type routine isn't sensible.
+			 * Reject cross-type ALTER OPERATOR FAMILY ...  ADD FUNCTION 4
+			 * statements here.
+			 */
+			if (member->lefttype != member->righttype)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("btree equal image functions must not be cross-type")));
+		}
+	}
+	else if (amoid == HASH_AM_OID)
+	{
+		if (member->number == HASHSTANDARD_PROC)
+		{
+			if (procform->pronargs != 1)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("hash function 1 must have one argument")));
+			if (procform->prorettype != INT4OID)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("hash function 1 must return integer")));
+		}
+		else if (member->number == HASHEXTENDED_PROC)
+		{
+			if (procform->pronargs != 2)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("hash function 2 must have two arguments")));
+			if (procform->prorettype != INT8OID)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("hash function 2 must return bigint")));
+		}
+
+		/*
+		 * If lefttype/righttype isn't specified, use the proc's input type
+		 */
+		if (!OidIsValid(member->lefttype))
+			member->lefttype = procform->proargtypes.values[0];
+		if (!OidIsValid(member->righttype))
+			member->righttype = procform->proargtypes.values[0];
+	}
+
+	/*
+	 * The default in CREATE OPERATOR CLASS is to use the class' opcintype as
+	 * lefttype and righttype.  In CREATE or ALTER OPERATOR FAMILY, opcintype
+	 * isn't available, so make the user specify the types.
+	 */
+	if (!OidIsValid(member->lefttype))
+		member->lefttype = typeoid;
+	if (!OidIsValid(member->righttype))
+		member->righttype = typeoid;
+
+	if (!OidIsValid(member->lefttype) || !OidIsValid(member->righttype))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("associated data types must be specified for index support function")));
+
+	ReleaseSysCache(proctup);
+}
+
+/*
+ * Add a new family member to the appropriate list, after checking for
+ * duplicated strategy or proc number.
+ */
+static void
+addFamilyMember(List **list, OpFamilyMember *member)
+{
+	ListCell   *l;
+
+	foreach(l, *list)
+	{
+		OpFamilyMember *old = (OpFamilyMember *) lfirst(l);
+
+		if (old->number == member->number &&
+			old->lefttype == member->lefttype &&
+			old->righttype == member->righttype)
+		{
+			if (member->is_func)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("function number %d for (%s,%s) appears more than once",
+								member->number,
+								format_type_be(member->lefttype),
+								format_type_be(member->righttype))));
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("operator number %d for (%s,%s) appears more than once",
+								member->number,
+								format_type_be(member->lefttype),
+								format_type_be(member->righttype))));
+		}
+	}
+	*list = lappend(*list, member);
+}
+
+/*
+ * Dump the operators to pg_amop
+ *
+ * We also make dependency entries in pg_depend for the pg_amop entries.
+ */
+static void
+storeOperators(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+			   List *operators, bool isAdd)
+{
+	Relation	rel;
+	Datum		values[Natts_pg_amop];
+	bool		nulls[Natts_pg_amop];
+	HeapTuple	tup;
+	Oid			entryoid;
+	ObjectAddress myself,
+				referenced;
+	ListCell   *l;
+
+	rel = table_open(AccessMethodOperatorRelationId, RowExclusiveLock);
+
+	foreach(l, operators)
+	{
+		OpFamilyMember *op = (OpFamilyMember *) lfirst(l);
+		char		oppurpose;
+
+		/*
+		 * If adding to an existing family, check for conflict with an
+		 * existing pg_amop entry (just to give a nicer error message)
+		 */
+		if (isAdd &&
+			SearchSysCacheExists4(AMOPSTRATEGY,
+								  ObjectIdGetDatum(opfamilyoid),
+								  ObjectIdGetDatum(op->lefttype),
+								  ObjectIdGetDatum(op->righttype),
+								  Int16GetDatum(op->number)))
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("operator %d(%s,%s) already exists in operator family \"%s\"",
+							op->number,
+							format_type_be(op->lefttype),
+							format_type_be(op->righttype),
+							NameListToString(opfamilyname))));
+
+		oppurpose = OidIsValid(op->sortfamily) ? AMOP_ORDER : AMOP_SEARCH;
+
+		/* Create the pg_amop entry */
+		memset(values, 0, sizeof(values));
+		memset(nulls, false, sizeof(nulls));
+
+		entryoid = GetNewOidWithIndex(rel, AccessMethodOperatorOidIndexId,
+									  Anum_pg_amop_oid);
+		values[Anum_pg_amop_oid - 1] = ObjectIdGetDatum(entryoid);
+		values[Anum_pg_amop_amopfamily - 1] = ObjectIdGetDatum(opfamilyoid);
+		values[Anum_pg_amop_amoplefttype - 1] = ObjectIdGetDatum(op->lefttype);
+		values[Anum_pg_amop_amoprighttype - 1] = ObjectIdGetDatum(op->righttype);
+		values[Anum_pg_amop_amopstrategy - 1] = Int16GetDatum(op->number);
+		values[Anum_pg_amop_amoppurpose - 1] = CharGetDatum(oppurpose);
+		values[Anum_pg_amop_amopopr - 1] = ObjectIdGetDatum(op->object);
+		values[Anum_pg_amop_amopmethod - 1] = ObjectIdGetDatum(amoid);
+		values[Anum_pg_amop_amopsortfamily - 1] = ObjectIdGetDatum(op->sortfamily);
+
+		tup = heap_form_tuple(rel->rd_att, values, nulls);
+
+		CatalogTupleInsert(rel, tup);
+
+		heap_freetuple(tup);
+
+		/* Make its dependencies */
+		myself.classId = AccessMethodOperatorRelationId;
+		myself.objectId = entryoid;
+		myself.objectSubId = 0;
+
+		referenced.classId = OperatorRelationId;
+		referenced.objectId = op->object;
+		referenced.objectSubId = 0;
+
+		/* see comments in amapi.h about dependency strength */
+		recordDependencyOn(&myself, &referenced,
+						   op->ref_is_hard ? DEPENDENCY_NORMAL : DEPENDENCY_AUTO);
+
+		referenced.classId = op->ref_is_family ? OperatorFamilyRelationId :
+			OperatorClassRelationId;
+		referenced.objectId = op->refobjid;
+		referenced.objectSubId = 0;
+
+		recordDependencyOn(&myself, &referenced,
+						   op->ref_is_hard ? DEPENDENCY_INTERNAL : DEPENDENCY_AUTO);
+
+		/* A search operator also needs a dep on the referenced opfamily */
+		if (OidIsValid(op->sortfamily))
+		{
+			referenced.classId = OperatorFamilyRelationId;
+			referenced.objectId = op->sortfamily;
+			referenced.objectSubId = 0;
+
+			recordDependencyOn(&myself, &referenced,
+							   op->ref_is_hard ? DEPENDENCY_NORMAL : DEPENDENCY_AUTO);
+		}
+
+		/* Post create hook of this access method operator */
+		InvokeObjectPostCreateHook(AccessMethodOperatorRelationId,
+								   entryoid, 0);
+	}
+
+	table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Dump the procedures (support routines) to pg_amproc
+ *
+ * We also make dependency entries in pg_depend for the pg_amproc entries.
+ */
+static void
+storeProcedures(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+				List *procedures, bool isAdd)
+{
+	Relation	rel;
+	Datum		values[Natts_pg_amproc];
+	bool		nulls[Natts_pg_amproc];
+	HeapTuple	tup;
+	Oid			entryoid;
+	ObjectAddress myself,
+				referenced;
+	ListCell   *l;
+
+	rel = table_open(AccessMethodProcedureRelationId, RowExclusiveLock);
+
+	foreach(l, procedures)
+	{
+		OpFamilyMember *proc = (OpFamilyMember *) lfirst(l);
+
+		/*
+		 * If adding to an existing family, check for conflict with an
+		 * existing pg_amproc entry (just to give a nicer error message)
+		 */
+		if (isAdd &&
+			SearchSysCacheExists4(AMPROCNUM,
+								  ObjectIdGetDatum(opfamilyoid),
+								  ObjectIdGetDatum(proc->lefttype),
+								  ObjectIdGetDatum(proc->righttype),
+								  Int16GetDatum(proc->number)))
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("function %d(%s,%s) already exists in operator family \"%s\"",
+							proc->number,
+							format_type_be(proc->lefttype),
+							format_type_be(proc->righttype),
+							NameListToString(opfamilyname))));
+
+		/* Create the pg_amproc entry */
+		memset(values, 0, sizeof(values));
+		memset(nulls, false, sizeof(nulls));
+
+		entryoid = GetNewOidWithIndex(rel, AccessMethodProcedureOidIndexId,
+									  Anum_pg_amproc_oid);
+		values[Anum_pg_amproc_oid - 1] = ObjectIdGetDatum(entryoid);
+		values[Anum_pg_amproc_amprocfamily - 1] = ObjectIdGetDatum(opfamilyoid);
+		values[Anum_pg_amproc_amproclefttype - 1] = ObjectIdGetDatum(proc->lefttype);
+		values[Anum_pg_amproc_amprocrighttype - 1] = ObjectIdGetDatum(proc->righttype);
+		values[Anum_pg_amproc_amprocnum - 1] = Int16GetDatum(proc->number);
+		values[Anum_pg_amproc_amproc - 1] = ObjectIdGetDatum(proc->object);
+
+		tup = heap_form_tuple(rel->rd_att, values, nulls);
+
+		CatalogTupleInsert(rel, tup);
+
+		heap_freetuple(tup);
+
+		/* Make its dependencies */
+		myself.classId = AccessMethodProcedureRelationId;
+		myself.objectId = entryoid;
+		myself.objectSubId = 0;
+
+		referenced.classId = ProcedureRelationId;
+		referenced.objectId = proc->object;
+		referenced.objectSubId = 0;
+
+		/* see comments in amapi.h about dependency strength */
+		recordDependencyOn(&myself, &referenced,
+						   proc->ref_is_hard ? DEPENDENCY_NORMAL : DEPENDENCY_AUTO);
+
+		referenced.classId = proc->ref_is_family ? OperatorFamilyRelationId :
+			OperatorClassRelationId;
+		referenced.objectId = proc->refobjid;
+		referenced.objectSubId = 0;
+
+		recordDependencyOn(&myself, &referenced,
+						   proc->ref_is_hard ? DEPENDENCY_INTERNAL : DEPENDENCY_AUTO);
+
+		/* Post create hook of access method procedure */
+		InvokeObjectPostCreateHook(AccessMethodProcedureRelationId,
+								   entryoid, 0);
+	}
+
+	table_close(rel, RowExclusiveLock);
+}
+
+
+/*
+ * Remove operator entries from an opfamily.
+ *
+ * Note: this is only allowed for "loose" members of an opfamily, hence
+ * behavior is always RESTRICT.
+ */
+static void
+dropOperators(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+			  List *operators)
+{
+	ListCell   *l;
+
+	foreach(l, operators)
+	{
+		OpFamilyMember *op = (OpFamilyMember *) lfirst(l);
+		Oid			amopid;
+		ObjectAddress object;
+
+		amopid = GetSysCacheOid4(AMOPSTRATEGY, Anum_pg_amop_oid,
+								 ObjectIdGetDatum(opfamilyoid),
+								 ObjectIdGetDatum(op->lefttype),
+								 ObjectIdGetDatum(op->righttype),
+								 Int16GetDatum(op->number));
+		if (!OidIsValid(amopid))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("operator %d(%s,%s) does not exist in operator family \"%s\"",
+							op->number,
+							format_type_be(op->lefttype),
+							format_type_be(op->righttype),
+							NameListToString(opfamilyname))));
+
+		object.classId = AccessMethodOperatorRelationId;
+		object.objectId = amopid;
+		object.objectSubId = 0;
+
+		performDeletion(&object, DROP_RESTRICT, 0);
+	}
+}
+
+/*
+ * Remove procedure entries from an opfamily.
+ *
+ * Note: this is only allowed for "loose" members of an opfamily, hence
+ * behavior is always RESTRICT.
+ */
+static void
+dropProcedures(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+			   List *procedures)
+{
+	ListCell   *l;
+
+	foreach(l, procedures)
+	{
+		OpFamilyMember *op = (OpFamilyMember *) lfirst(l);
+		Oid			amprocid;
+		ObjectAddress object;
+
+		amprocid = GetSysCacheOid4(AMPROCNUM, Anum_pg_amproc_oid,
+								   ObjectIdGetDatum(opfamilyoid),
+								   ObjectIdGetDatum(op->lefttype),
+								   ObjectIdGetDatum(op->righttype),
+								   Int16GetDatum(op->number));
+		if (!OidIsValid(amprocid))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("function %d(%s,%s) does not exist in operator family \"%s\"",
+							op->number,
+							format_type_be(op->lefttype),
+							format_type_be(op->righttype),
+							NameListToString(opfamilyname))));
+
+		object.classId = AccessMethodProcedureRelationId;
+		object.objectId = amprocid;
+		object.objectSubId = 0;
+
+		performDeletion(&object, DROP_RESTRICT, 0);
+	}
+}
+
+/*
+ * Subroutine for ALTER OPERATOR CLASS SET SCHEMA/RENAME
+ *
+ * Is there an operator class with the given name and signature already
+ * in the given namespace?	If so, raise an appropriate error message.
+ */
+void
+IsThereOpClassInNamespace(const char *opcname, Oid opcmethod,
+						  Oid opcnamespace)
+{
+	/* make sure the new name doesn't exist */
+	if (SearchSysCacheExists3(CLAAMNAMENSP,
+							  ObjectIdGetDatum(opcmethod),
+							  CStringGetDatum(opcname),
+							  ObjectIdGetDatum(opcnamespace)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("operator class \"%s\" for access method \"%s\" already exists in schema \"%s\"",
+						opcname,
+						get_am_name(opcmethod),
+						get_namespace_name(opcnamespace))));
+}
+
+/*
+ * Subroutine for ALTER OPERATOR FAMILY SET SCHEMA/RENAME
+ *
+ * Is there an operator family with the given name and signature already
+ * in the given namespace?	If so, raise an appropriate error message.
+ */
+void
+IsThereOpFamilyInNamespace(const char *opfname, Oid opfmethod,
+						   Oid opfnamespace)
+{
+	/* make sure the new name doesn't exist */
+	if (SearchSysCacheExists3(OPFAMILYAMNAMENSP,
+							  ObjectIdGetDatum(opfmethod),
+							  CStringGetDatum(opfname),
+							  ObjectIdGetDatum(opfnamespace)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("operator family \"%s\" for access method \"%s\" already exists in schema \"%s\"",
+						opfname,
+						get_am_name(opfmethod),
+						get_namespace_name(opfnamespace))));
+}
diff --git a/src/backend/commands/operatorcmds.c b/src/backend/commands/operatorcmds.c
new file mode 100644
index 0000000..a5924d7
--- /dev/null
+++ b/src/backend/commands/operatorcmds.c
@@ -0,0 +1,552 @@
+/*-------------------------------------------------------------------------
+ *
+ * operatorcmds.c
+ *
+ *	  Routines for operator manipulation commands
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/operatorcmds.c
+ *
+ * DESCRIPTION
+ *	  The "DefineFoo" routines take the parse tree and pick out the
+ *	  appropriate arguments/flags, passing the results to the
+ *	  corresponding "FooDefine" routines (in src/catalog) that do
+ *	  the actual catalog-munging.  These routines also verify permission
+ *	  of the user to execute the command.
+ *
+ * NOTES
+ *	  These things must be defined and committed in the following order:
+ *		"create function":
+ *				input/output, recv/send functions
+ *		"create type":
+ *				type
+ *		"create operator":
+ *				operators
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_type.h"
+#include "commands/alter.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "parser/parse_oper.h"
+#include "parser/parse_type.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+static Oid	ValidateRestrictionEstimator(List *restrictionName);
+static Oid	ValidateJoinEstimator(List *joinName);
+
+/*
+ * DefineOperator
+ *		this function extracts all the information from the
+ *		parameter list generated by the parser and then has
+ *		OperatorCreate() do all the actual work.
+ *
+ * 'parameters' is a list of DefElem
+ */
+ObjectAddress
+DefineOperator(List *names, List *parameters)
+{
+	char	   *oprName;
+	Oid			oprNamespace;
+	AclResult	aclresult;
+	bool		canMerge = false;	/* operator merges */
+	bool		canHash = false;	/* operator hashes */
+	List	   *functionName = NIL; /* function for operator */
+	TypeName   *typeName1 = NULL;	/* first type name */
+	TypeName   *typeName2 = NULL;	/* second type name */
+	Oid			typeId1 = InvalidOid;	/* types converted to OID */
+	Oid			typeId2 = InvalidOid;
+	Oid			rettype;
+	List	   *commutatorName = NIL;	/* optional commutator operator name */
+	List	   *negatorName = NIL;	/* optional negator operator name */
+	List	   *restrictionName = NIL;	/* optional restrict. sel. function */
+	List	   *joinName = NIL; /* optional join sel. function */
+	Oid			functionOid;	/* functions converted to OID */
+	Oid			restrictionOid;
+	Oid			joinOid;
+	Oid			typeId[2];		/* to hold left and right arg */
+	int			nargs;
+	ListCell   *pl;
+
+	/* Convert list of names to a name and namespace */
+	oprNamespace = QualifiedNameGetCreationNamespace(names, &oprName);
+
+	/* Check we have creation rights in target namespace */
+	aclresult = pg_namespace_aclcheck(oprNamespace, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_SCHEMA,
+					   get_namespace_name(oprNamespace));
+
+	/*
+	 * loop over the definition list and extract the information we need.
+	 */
+	foreach(pl, parameters)
+	{
+		DefElem    *defel = (DefElem *) lfirst(pl);
+
+		if (strcmp(defel->defname, "leftarg") == 0)
+		{
+			typeName1 = defGetTypeName(defel);
+			if (typeName1->setof)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+						 errmsg("SETOF type not allowed for operator argument")));
+		}
+		else if (strcmp(defel->defname, "rightarg") == 0)
+		{
+			typeName2 = defGetTypeName(defel);
+			if (typeName2->setof)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+						 errmsg("SETOF type not allowed for operator argument")));
+		}
+		/* "function" and "procedure" are equivalent here */
+		else if (strcmp(defel->defname, "function") == 0)
+			functionName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "procedure") == 0)
+			functionName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "commutator") == 0)
+			commutatorName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "negator") == 0)
+			negatorName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "restrict") == 0)
+			restrictionName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "join") == 0)
+			joinName = defGetQualifiedName(defel);
+		else if (strcmp(defel->defname, "hashes") == 0)
+			canHash = defGetBoolean(defel);
+		else if (strcmp(defel->defname, "merges") == 0)
+			canMerge = defGetBoolean(defel);
+		/* These obsolete options are taken as meaning canMerge */
+		else if (strcmp(defel->defname, "sort1") == 0)
+			canMerge = true;
+		else if (strcmp(defel->defname, "sort2") == 0)
+			canMerge = true;
+		else if (strcmp(defel->defname, "ltcmp") == 0)
+			canMerge = true;
+		else if (strcmp(defel->defname, "gtcmp") == 0)
+			canMerge = true;
+		else
+		{
+			/* WARNING, not ERROR, for historical backwards-compatibility */
+			ereport(WARNING,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("operator attribute \"%s\" not recognized",
+							defel->defname)));
+		}
+	}
+
+	/*
+	 * make sure we have our required definitions
+	 */
+	if (functionName == NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+				 errmsg("operator function must be specified")));
+
+	/* Transform type names to type OIDs */
+	if (typeName1)
+		typeId1 = typenameTypeId(NULL, typeName1);
+	if (typeName2)
+		typeId2 = typenameTypeId(NULL, typeName2);
+
+	/*
+	 * If only the right argument is missing, the user is likely trying to
+	 * create a postfix operator, so give them a hint about why that does not
+	 * work.  But if both arguments are missing, do not mention postfix
+	 * operators, as the user most likely simply neglected to mention the
+	 * arguments.
+	 */
+	if (!OidIsValid(typeId1) && !OidIsValid(typeId2))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+				 errmsg("operator argument types must be specified")));
+	if (!OidIsValid(typeId2))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+				 errmsg("operator right argument type must be specified"),
+				 errdetail("Postfix operators are not supported.")));
+
+	if (typeName1)
+	{
+		aclresult = pg_type_aclcheck(typeId1, GetUserId(), ACL_USAGE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error_type(aclresult, typeId1);
+	}
+
+	if (typeName2)
+	{
+		aclresult = pg_type_aclcheck(typeId2, GetUserId(), ACL_USAGE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error_type(aclresult, typeId2);
+	}
+
+	/*
+	 * Look up the operator's underlying function.
+	 */
+	if (!OidIsValid(typeId1))
+	{
+		typeId[0] = typeId2;
+		nargs = 1;
+	}
+	else if (!OidIsValid(typeId2))
+	{
+		typeId[0] = typeId1;
+		nargs = 1;
+	}
+	else
+	{
+		typeId[0] = typeId1;
+		typeId[1] = typeId2;
+		nargs = 2;
+	}
+	functionOid = LookupFuncName(functionName, nargs, typeId, false);
+
+	/*
+	 * We require EXECUTE rights for the function.  This isn't strictly
+	 * necessary, since EXECUTE will be checked at any attempted use of the
+	 * operator, but it seems like a good idea anyway.
+	 */
+	aclresult = pg_proc_aclcheck(functionOid, GetUserId(), ACL_EXECUTE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_FUNCTION,
+					   NameListToString(functionName));
+
+	rettype = get_func_rettype(functionOid);
+	aclresult = pg_type_aclcheck(rettype, GetUserId(), ACL_USAGE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error_type(aclresult, rettype);
+
+	/*
+	 * Look up restriction and join estimators if specified
+	 */
+	if (restrictionName)
+		restrictionOid = ValidateRestrictionEstimator(restrictionName);
+	else
+		restrictionOid = InvalidOid;
+	if (joinName)
+		joinOid = ValidateJoinEstimator(joinName);
+	else
+		joinOid = InvalidOid;
+
+	/*
+	 * now have OperatorCreate do all the work..
+	 */
+	return
+		OperatorCreate(oprName, /* operator name */
+					   oprNamespace,	/* namespace */
+					   typeId1, /* left type id */
+					   typeId2, /* right type id */
+					   functionOid, /* function for operator */
+					   commutatorName,	/* optional commutator operator name */
+					   negatorName, /* optional negator operator name */
+					   restrictionOid,	/* optional restrict. sel. function */
+					   joinOid, /* optional join sel. function name */
+					   canMerge,	/* operator merges */
+					   canHash);	/* operator hashes */
+}
+
+/*
+ * Look up a restriction estimator function by name, and verify that it has
+ * the correct signature and we have the permissions to attach it to an
+ * operator.
+ */
+static Oid
+ValidateRestrictionEstimator(List *restrictionName)
+{
+	Oid			typeId[4];
+	Oid			restrictionOid;
+	AclResult	aclresult;
+
+	typeId[0] = INTERNALOID;	/* PlannerInfo */
+	typeId[1] = OIDOID;			/* operator OID */
+	typeId[2] = INTERNALOID;	/* args list */
+	typeId[3] = INT4OID;		/* varRelid */
+
+	restrictionOid = LookupFuncName(restrictionName, 4, typeId, false);
+
+	/* estimators must return float8 */
+	if (get_func_rettype(restrictionOid) != FLOAT8OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("restriction estimator function %s must return type %s",
+						NameListToString(restrictionName), "float8")));
+
+	/* Require EXECUTE rights for the estimator */
+	aclresult = pg_proc_aclcheck(restrictionOid, GetUserId(), ACL_EXECUTE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_FUNCTION,
+					   NameListToString(restrictionName));
+
+	return restrictionOid;
+}
+
+/*
+ * Look up a join estimator function by name, and verify that it has the
+ * correct signature and we have the permissions to attach it to an
+ * operator.
+ */
+static Oid
+ValidateJoinEstimator(List *joinName)
+{
+	Oid			typeId[5];
+	Oid			joinOid;
+	Oid			joinOid2;
+	AclResult	aclresult;
+
+	typeId[0] = INTERNALOID;	/* PlannerInfo */
+	typeId[1] = OIDOID;			/* operator OID */
+	typeId[2] = INTERNALOID;	/* args list */
+	typeId[3] = INT2OID;		/* jointype */
+	typeId[4] = INTERNALOID;	/* SpecialJoinInfo */
+
+	/*
+	 * As of Postgres 8.4, the preferred signature for join estimators has 5
+	 * arguments, but we still allow the old 4-argument form.  Whine about
+	 * ambiguity if both forms exist.
+	 */
+	joinOid = LookupFuncName(joinName, 5, typeId, true);
+	joinOid2 = LookupFuncName(joinName, 4, typeId, true);
+	if (OidIsValid(joinOid))
+	{
+		if (OidIsValid(joinOid2))
+			ereport(ERROR,
+					(errcode(ERRCODE_AMBIGUOUS_FUNCTION),
+					 errmsg("join estimator function %s has multiple matches",
+							NameListToString(joinName))));
+	}
+	else
+	{
+		joinOid = joinOid2;
+		/* If not found, reference the 5-argument signature in error msg */
+		if (!OidIsValid(joinOid))
+			joinOid = LookupFuncName(joinName, 5, typeId, false);
+	}
+
+	/* estimators must return float8 */
+	if (get_func_rettype(joinOid) != FLOAT8OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("join estimator function %s must return type %s",
+						NameListToString(joinName), "float8")));
+
+	/* Require EXECUTE rights for the estimator */
+	aclresult = pg_proc_aclcheck(joinOid, GetUserId(), ACL_EXECUTE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_FUNCTION,
+					   NameListToString(joinName));
+
+	return joinOid;
+}
+
+/*
+ * Guts of operator deletion.
+ */
+void
+RemoveOperatorById(Oid operOid)
+{
+	Relation	relation;
+	HeapTuple	tup;
+	Form_pg_operator op;
+
+	relation = table_open(OperatorRelationId, RowExclusiveLock);
+
+	tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operOid));
+	if (!HeapTupleIsValid(tup)) /* should not happen */
+		elog(ERROR, "cache lookup failed for operator %u", operOid);
+	op = (Form_pg_operator) GETSTRUCT(tup);
+
+	/*
+	 * Reset links from commutator and negator, if any.  In case of a
+	 * self-commutator or self-negator, this means we have to re-fetch the
+	 * updated tuple.  (We could optimize away updates on the tuple we're
+	 * about to drop, but it doesn't seem worth convoluting the logic for.)
+	 */
+	if (OidIsValid(op->oprcom) || OidIsValid(op->oprnegate))
+	{
+		OperatorUpd(operOid, op->oprcom, op->oprnegate, true);
+		if (operOid == op->oprcom || operOid == op->oprnegate)
+		{
+			ReleaseSysCache(tup);
+			tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operOid));
+			if (!HeapTupleIsValid(tup)) /* should not happen */
+				elog(ERROR, "cache lookup failed for operator %u", operOid);
+		}
+	}
+
+	CatalogTupleDelete(relation, &tup->t_self);
+
+	ReleaseSysCache(tup);
+
+	table_close(relation, RowExclusiveLock);
+}
+
+/*
+ * AlterOperator
+ *		routine implementing ALTER OPERATOR <operator> SET (option = ...).
+ *
+ * Currently, only RESTRICT and JOIN estimator functions can be changed.
+ */
+ObjectAddress
+AlterOperator(AlterOperatorStmt *stmt)
+{
+	ObjectAddress address;
+	Oid			oprId;
+	Relation	catalog;
+	HeapTuple	tup;
+	Form_pg_operator oprForm;
+	int			i;
+	ListCell   *pl;
+	Datum		values[Natts_pg_operator];
+	bool		nulls[Natts_pg_operator];
+	bool		replaces[Natts_pg_operator];
+	List	   *restrictionName = NIL;	/* optional restrict. sel. function */
+	bool		updateRestriction = false;
+	Oid			restrictionOid;
+	List	   *joinName = NIL; /* optional join sel. function */
+	bool		updateJoin = false;
+	Oid			joinOid;
+
+	/* Look up the operator */
+	oprId = LookupOperWithArgs(stmt->opername, false);
+	catalog = table_open(OperatorRelationId, RowExclusiveLock);
+	tup = SearchSysCacheCopy1(OPEROID, ObjectIdGetDatum(oprId));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for operator %u", oprId);
+	oprForm = (Form_pg_operator) GETSTRUCT(tup);
+
+	/* Process options */
+	foreach(pl, stmt->options)
+	{
+		DefElem    *defel = (DefElem *) lfirst(pl);
+		List	   *param;
+
+		if (defel->arg == NULL)
+			param = NIL;		/* NONE, removes the function */
+		else
+			param = defGetQualifiedName(defel);
+
+		if (strcmp(defel->defname, "restrict") == 0)
+		{
+			restrictionName = param;
+			updateRestriction = true;
+		}
+		else if (strcmp(defel->defname, "join") == 0)
+		{
+			joinName = param;
+			updateJoin = true;
+		}
+
+		/*
+		 * The rest of the options that CREATE accepts cannot be changed.
+		 * Check for them so that we can give a meaningful error message.
+		 */
+		else if (strcmp(defel->defname, "leftarg") == 0 ||
+				 strcmp(defel->defname, "rightarg") == 0 ||
+				 strcmp(defel->defname, "function") == 0 ||
+				 strcmp(defel->defname, "procedure") == 0 ||
+				 strcmp(defel->defname, "commutator") == 0 ||
+				 strcmp(defel->defname, "negator") == 0 ||
+				 strcmp(defel->defname, "hashes") == 0 ||
+				 strcmp(defel->defname, "merges") == 0)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("operator attribute \"%s\" cannot be changed",
+							defel->defname)));
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("operator attribute \"%s\" not recognized",
+							defel->defname)));
+	}
+
+	/* Check permissions. Must be owner. */
+	if (!pg_oper_ownercheck(oprId, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_OPERATOR,
+					   NameStr(oprForm->oprname));
+
+	/*
+	 * Look up restriction and join estimators if specified
+	 */
+	if (restrictionName)
+		restrictionOid = ValidateRestrictionEstimator(restrictionName);
+	else
+		restrictionOid = InvalidOid;
+	if (joinName)
+		joinOid = ValidateJoinEstimator(joinName);
+	else
+		joinOid = InvalidOid;
+
+	/* Perform additional checks, like OperatorCreate does */
+	if (!(OidIsValid(oprForm->oprleft) && OidIsValid(oprForm->oprright)))
+	{
+		/* If it's not a binary op, these things mustn't be set: */
+		if (OidIsValid(joinOid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("only binary operators can have join selectivity")));
+	}
+
+	if (oprForm->oprresult != BOOLOID)
+	{
+		if (OidIsValid(restrictionOid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("only boolean operators can have restriction selectivity")));
+		if (OidIsValid(joinOid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+					 errmsg("only boolean operators can have join selectivity")));
+	}
+
+	/* Update the tuple */
+	for (i = 0; i < Natts_pg_operator; ++i)
+	{
+		values[i] = (Datum) 0;
+		replaces[i] = false;
+		nulls[i] = false;
+	}
+	if (updateRestriction)
+	{
+		replaces[Anum_pg_operator_oprrest - 1] = true;
+		values[Anum_pg_operator_oprrest - 1] = restrictionOid;
+	}
+	if (updateJoin)
+	{
+		replaces[Anum_pg_operator_oprjoin - 1] = true;
+		values[Anum_pg_operator_oprjoin - 1] = joinOid;
+	}
+
+	tup = heap_modify_tuple(tup, RelationGetDescr(catalog),
+							values, nulls, replaces);
+
+	CatalogTupleUpdate(catalog, &tup->t_self, tup);
+
+	address = makeOperatorDependencies(tup, false, true);
+
+	InvokeObjectPostAlterHook(OperatorRelationId, oprId, 0);
+
+	table_close(catalog, NoLock);
+
+	return address;
+}
diff --git a/src/backend/commands/policy.c b/src/backend/commands/policy.c
new file mode 100644
index 0000000..a59ee3b
--- /dev/null
+++ b/src/backend/commands/policy.c
@@ -0,0 +1,1285 @@
+/*-------------------------------------------------------------------------
+ *
+ * policy.c
+ *	  Commands for manipulating policies.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/commands/policy.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup.h"
+#include "access/htup_details.h"
+#include "access/relation.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_policy.h"
+#include "catalog/pg_type.h"
+#include "commands/policy.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/pg_list.h"
+#include "parser/parse_clause.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_node.h"
+#include "parser/parse_relation.h"
+#include "rewrite/rewriteManip.h"
+#include "rewrite/rowsecurity.h"
+#include "storage/lock.h"
+#include "utils/acl.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+static void RangeVarCallbackForPolicy(const RangeVar *rv,
+									  Oid relid, Oid oldrelid, void *arg);
+static char parse_policy_command(const char *cmd_name);
+static Datum *policy_role_list_to_array(List *roles, int *num_roles);
+
+/*
+ * Callback to RangeVarGetRelidExtended().
+ *
+ * Checks the following:
+ *	- the relation specified is a table.
+ *	- current user owns the table.
+ *	- the table is not a system table.
+ *
+ * If any of these checks fails then an error is raised.
+ */
+static void
+RangeVarCallbackForPolicy(const RangeVar *rv, Oid relid, Oid oldrelid,
+						  void *arg)
+{
+	HeapTuple	tuple;
+	Form_pg_class classform;
+	char		relkind;
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(tuple))
+		return;
+
+	classform = (Form_pg_class) GETSTRUCT(tuple);
+	relkind = classform->relkind;
+
+	/* Must own relation. */
+	if (!pg_class_ownercheck(relid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relid)), rv->relname);
+
+	/* No system table modifications unless explicitly allowed. */
+	if (!allowSystemTableMods && IsSystemClass(relid, classform))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						rv->relname)));
+
+	/* Relation type MUST be a table. */
+	if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a table", rv->relname)));
+
+	ReleaseSysCache(tuple);
+}
+
+/*
+ * parse_policy_command -
+ *	 helper function to convert full command strings to their char
+ *	 representation.
+ *
+ * cmd_name - full string command name. Valid values are 'all', 'select',
+ *			  'insert', 'update' and 'delete'.
+ *
+ */
+static char
+parse_policy_command(const char *cmd_name)
+{
+	char		polcmd;
+
+	if (!cmd_name)
+		elog(ERROR, "unrecognized policy command");
+
+	if (strcmp(cmd_name, "all") == 0)
+		polcmd = '*';
+	else if (strcmp(cmd_name, "select") == 0)
+		polcmd = ACL_SELECT_CHR;
+	else if (strcmp(cmd_name, "insert") == 0)
+		polcmd = ACL_INSERT_CHR;
+	else if (strcmp(cmd_name, "update") == 0)
+		polcmd = ACL_UPDATE_CHR;
+	else if (strcmp(cmd_name, "delete") == 0)
+		polcmd = ACL_DELETE_CHR;
+	else
+		elog(ERROR, "unrecognized policy command");
+
+	return polcmd;
+}
+
+/*
+ * policy_role_list_to_array
+ *	 helper function to convert a list of RoleSpecs to an array of
+ *	 role id Datums.
+ */
+static Datum *
+policy_role_list_to_array(List *roles, int *num_roles)
+{
+	Datum	   *role_oids;
+	ListCell   *cell;
+	int			i = 0;
+
+	/* Handle no roles being passed in as being for public */
+	if (roles == NIL)
+	{
+		*num_roles = 1;
+		role_oids = (Datum *) palloc(*num_roles * sizeof(Datum));
+		role_oids[0] = ObjectIdGetDatum(ACL_ID_PUBLIC);
+
+		return role_oids;
+	}
+
+	*num_roles = list_length(roles);
+	role_oids = (Datum *) palloc(*num_roles * sizeof(Datum));
+
+	foreach(cell, roles)
+	{
+		RoleSpec   *spec = lfirst(cell);
+
+		/*
+		 * PUBLIC covers all roles, so it only makes sense alone.
+		 */
+		if (spec->roletype == ROLESPEC_PUBLIC)
+		{
+			if (*num_roles != 1)
+			{
+				ereport(WARNING,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("ignoring specified roles other than PUBLIC"),
+						 errhint("All roles are members of the PUBLIC role.")));
+				*num_roles = 1;
+			}
+			role_oids[0] = ObjectIdGetDatum(ACL_ID_PUBLIC);
+
+			return role_oids;
+		}
+		else
+			role_oids[i++] =
+				ObjectIdGetDatum(get_rolespec_oid(spec, false));
+	}
+
+	return role_oids;
+}
+
+/*
+ * Load row security policy from the catalog, and store it in
+ * the relation's relcache entry.
+ *
+ * Note that caller should have verified that pg_class.relrowsecurity
+ * is true for this relation.
+ */
+void
+RelationBuildRowSecurity(Relation relation)
+{
+	MemoryContext rscxt;
+	MemoryContext oldcxt = CurrentMemoryContext;
+	RowSecurityDesc *rsdesc;
+	Relation	catalog;
+	ScanKeyData skey;
+	SysScanDesc sscan;
+	HeapTuple	tuple;
+
+	/*
+	 * Create a memory context to hold everything associated with this
+	 * relation's row security policy.  This makes it easy to clean up during
+	 * a relcache flush.  However, to cover the possibility of an error
+	 * partway through, we don't make the context long-lived till we're done.
+	 */
+	rscxt = AllocSetContextCreate(CurrentMemoryContext,
+								  "row security descriptor",
+								  ALLOCSET_SMALL_SIZES);
+	MemoryContextCopyAndSetIdentifier(rscxt,
+									  RelationGetRelationName(relation));
+
+	rsdesc = MemoryContextAllocZero(rscxt, sizeof(RowSecurityDesc));
+	rsdesc->rscxt = rscxt;
+
+	/*
+	 * Now scan pg_policy for RLS policies associated with this relation.
+	 * Because we use the index on (polrelid, polname), we should consistently
+	 * visit the rel's policies in name order, at least when system indexes
+	 * aren't disabled.  This simplifies equalRSDesc().
+	 */
+	catalog = table_open(PolicyRelationId, AccessShareLock);
+
+	ScanKeyInit(&skey,
+				Anum_pg_policy_polrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(relation)));
+
+	sscan = systable_beginscan(catalog, PolicyPolrelidPolnameIndexId, true,
+							   NULL, 1, &skey);
+
+	while (HeapTupleIsValid(tuple = systable_getnext(sscan)))
+	{
+		Form_pg_policy policy_form = (Form_pg_policy) GETSTRUCT(tuple);
+		RowSecurityPolicy *policy;
+		Datum		datum;
+		bool		isnull;
+		char	   *str_value;
+
+		policy = MemoryContextAllocZero(rscxt, sizeof(RowSecurityPolicy));
+
+		/*
+		 * Note: we must be sure that pass-by-reference data gets copied into
+		 * rscxt.  We avoid making that context current over wider spans than
+		 * we have to, though.
+		 */
+
+		/* Get policy command */
+		policy->polcmd = policy_form->polcmd;
+
+		/* Get policy, permissive or restrictive */
+		policy->permissive = policy_form->polpermissive;
+
+		/* Get policy name */
+		policy->policy_name =
+			MemoryContextStrdup(rscxt, NameStr(policy_form->polname));
+
+		/* Get policy roles */
+		datum = heap_getattr(tuple, Anum_pg_policy_polroles,
+							 RelationGetDescr(catalog), &isnull);
+		/* shouldn't be null, but let's check for luck */
+		if (isnull)
+			elog(ERROR, "unexpected null value in pg_policy.polroles");
+		MemoryContextSwitchTo(rscxt);
+		policy->roles = DatumGetArrayTypePCopy(datum);
+		MemoryContextSwitchTo(oldcxt);
+
+		/* Get policy qual */
+		datum = heap_getattr(tuple, Anum_pg_policy_polqual,
+							 RelationGetDescr(catalog), &isnull);
+		if (!isnull)
+		{
+			str_value = TextDatumGetCString(datum);
+			MemoryContextSwitchTo(rscxt);
+			policy->qual = (Expr *) stringToNode(str_value);
+			MemoryContextSwitchTo(oldcxt);
+			pfree(str_value);
+		}
+		else
+			policy->qual = NULL;
+
+		/* Get WITH CHECK qual */
+		datum = heap_getattr(tuple, Anum_pg_policy_polwithcheck,
+							 RelationGetDescr(catalog), &isnull);
+		if (!isnull)
+		{
+			str_value = TextDatumGetCString(datum);
+			MemoryContextSwitchTo(rscxt);
+			policy->with_check_qual = (Expr *) stringToNode(str_value);
+			MemoryContextSwitchTo(oldcxt);
+			pfree(str_value);
+		}
+		else
+			policy->with_check_qual = NULL;
+
+		/* We want to cache whether there are SubLinks in these expressions */
+		policy->hassublinks = checkExprHasSubLink((Node *) policy->qual) ||
+			checkExprHasSubLink((Node *) policy->with_check_qual);
+
+		/*
+		 * Add this object to list.  For historical reasons, the list is built
+		 * in reverse order.
+		 */
+		MemoryContextSwitchTo(rscxt);
+		rsdesc->policies = lcons(policy, rsdesc->policies);
+		MemoryContextSwitchTo(oldcxt);
+	}
+
+	systable_endscan(sscan);
+	table_close(catalog, AccessShareLock);
+
+	/*
+	 * Success.  Reparent the descriptor's memory context under
+	 * CacheMemoryContext so that it will live indefinitely, then attach the
+	 * policy descriptor to the relcache entry.
+	 */
+	MemoryContextSetParent(rscxt, CacheMemoryContext);
+
+	relation->rd_rsdesc = rsdesc;
+}
+
+/*
+ * RemovePolicyById -
+ *	 remove a policy by its OID.  If a policy does not exist with the provided
+ *	 oid, then an error is raised.
+ *
+ * policy_id - the oid of the policy.
+ */
+void
+RemovePolicyById(Oid policy_id)
+{
+	Relation	pg_policy_rel;
+	SysScanDesc sscan;
+	ScanKeyData skey[1];
+	HeapTuple	tuple;
+	Oid			relid;
+	Relation	rel;
+
+	pg_policy_rel = table_open(PolicyRelationId, RowExclusiveLock);
+
+	/*
+	 * Find the policy to delete.
+	 */
+	ScanKeyInit(&skey[0],
+				Anum_pg_policy_oid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(policy_id));
+
+	sscan = systable_beginscan(pg_policy_rel, PolicyOidIndexId, true,
+							   NULL, 1, skey);
+
+	tuple = systable_getnext(sscan);
+
+	/* If the policy exists, then remove it, otherwise raise an error. */
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "could not find tuple for policy %u", policy_id);
+
+	/*
+	 * Open and exclusive-lock the relation the policy belongs to.  (We need
+	 * exclusive lock to lock out queries that might otherwise depend on the
+	 * set of policies the rel has; furthermore we've got to hold the lock
+	 * till commit.)
+	 */
+	relid = ((Form_pg_policy) GETSTRUCT(tuple))->polrelid;
+
+	rel = table_open(relid, AccessExclusiveLock);
+	if (rel->rd_rel->relkind != RELKIND_RELATION &&
+		rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a table",
+						RelationGetRelationName(rel))));
+
+	if (!allowSystemTableMods && IsSystemRelation(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						RelationGetRelationName(rel))));
+
+	CatalogTupleDelete(pg_policy_rel, &tuple->t_self);
+
+	systable_endscan(sscan);
+
+	/*
+	 * Note that, unlike some of the other flags in pg_class, relrowsecurity
+	 * is not just an indication of if policies exist.  When relrowsecurity is
+	 * set by a user, then all access to the relation must be through a
+	 * policy.  If no policy is defined for the relation then a default-deny
+	 * policy is created and all records are filtered (except for queries from
+	 * the owner).
+	 */
+	CacheInvalidateRelcache(rel);
+
+	table_close(rel, NoLock);
+
+	/* Clean up */
+	table_close(pg_policy_rel, RowExclusiveLock);
+}
+
+/*
+ * RemoveRoleFromObjectPolicy -
+ *	 remove a role from a policy's applicable-roles list.
+ *
+ * Returns true if the role was successfully removed from the policy.
+ * Returns false if the role was not removed because it would have left
+ * polroles empty (which is disallowed, though perhaps it should not be).
+ * On false return, the caller should instead drop the policy altogether.
+ *
+ * roleid - the oid of the role to remove
+ * classid - should always be PolicyRelationId
+ * policy_id - the oid of the policy.
+ */
+bool
+RemoveRoleFromObjectPolicy(Oid roleid, Oid classid, Oid policy_id)
+{
+	Relation	pg_policy_rel;
+	SysScanDesc sscan;
+	ScanKeyData skey[1];
+	HeapTuple	tuple;
+	Oid			relid;
+	ArrayType  *policy_roles;
+	Datum		roles_datum;
+	Oid		   *roles;
+	int			num_roles;
+	Datum	   *role_oids;
+	bool		attr_isnull;
+	bool		keep_policy = true;
+	int			i,
+				j;
+
+	Assert(classid == PolicyRelationId);
+
+	pg_policy_rel = table_open(PolicyRelationId, RowExclusiveLock);
+
+	/*
+	 * Find the policy to update.
+	 */
+	ScanKeyInit(&skey[0],
+				Anum_pg_policy_oid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(policy_id));
+
+	sscan = systable_beginscan(pg_policy_rel, PolicyOidIndexId, true,
+							   NULL, 1, skey);
+
+	tuple = systable_getnext(sscan);
+
+	/* Raise an error if we don't find the policy. */
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "could not find tuple for policy %u", policy_id);
+
+	/* Identify rel the policy belongs to */
+	relid = ((Form_pg_policy) GETSTRUCT(tuple))->polrelid;
+
+	/* Get the current set of roles */
+	roles_datum = heap_getattr(tuple,
+							   Anum_pg_policy_polroles,
+							   RelationGetDescr(pg_policy_rel),
+							   &attr_isnull);
+
+	Assert(!attr_isnull);
+
+	policy_roles = DatumGetArrayTypePCopy(roles_datum);
+	roles = (Oid *) ARR_DATA_PTR(policy_roles);
+	num_roles = ARR_DIMS(policy_roles)[0];
+
+	/*
+	 * Rebuild the polroles array, without any mentions of the target role.
+	 * Ordinarily there'd be exactly one, but we must cope with duplicate
+	 * mentions, since CREATE/ALTER POLICY historically have allowed that.
+	 */
+	role_oids = (Datum *) palloc(num_roles * sizeof(Datum));
+	for (i = 0, j = 0; i < num_roles; i++)
+	{
+		if (roles[i] != roleid)
+			role_oids[j++] = ObjectIdGetDatum(roles[i]);
+	}
+	num_roles = j;
+
+	/* If any roles remain, update the policy entry. */
+	if (num_roles > 0)
+	{
+		ArrayType  *role_ids;
+		Datum		values[Natts_pg_policy];
+		bool		isnull[Natts_pg_policy];
+		bool		replaces[Natts_pg_policy];
+		HeapTuple	new_tuple;
+		HeapTuple	reltup;
+		ObjectAddress target;
+		ObjectAddress myself;
+
+		/* zero-clear */
+		memset(values, 0, sizeof(values));
+		memset(replaces, 0, sizeof(replaces));
+		memset(isnull, 0, sizeof(isnull));
+
+		/* This is the array for the new tuple */
+		role_ids = construct_array(role_oids, num_roles, OIDOID,
+								   sizeof(Oid), true, TYPALIGN_INT);
+
+		replaces[Anum_pg_policy_polroles - 1] = true;
+		values[Anum_pg_policy_polroles - 1] = PointerGetDatum(role_ids);
+
+		new_tuple = heap_modify_tuple(tuple,
+									  RelationGetDescr(pg_policy_rel),
+									  values, isnull, replaces);
+		CatalogTupleUpdate(pg_policy_rel, &new_tuple->t_self, new_tuple);
+
+		/* Remove all the old shared dependencies (roles) */
+		deleteSharedDependencyRecordsFor(PolicyRelationId, policy_id, 0);
+
+		/* Record the new shared dependencies (roles) */
+		myself.classId = PolicyRelationId;
+		myself.objectId = policy_id;
+		myself.objectSubId = 0;
+
+		target.classId = AuthIdRelationId;
+		target.objectSubId = 0;
+		for (i = 0; i < num_roles; i++)
+		{
+			target.objectId = DatumGetObjectId(role_oids[i]);
+			/* no need for dependency on the public role */
+			if (target.objectId != ACL_ID_PUBLIC)
+				recordSharedDependencyOn(&myself, &target,
+										 SHARED_DEPENDENCY_POLICY);
+		}
+
+		InvokeObjectPostAlterHook(PolicyRelationId, policy_id, 0);
+
+		heap_freetuple(new_tuple);
+
+		/* Make updates visible */
+		CommandCounterIncrement();
+
+		/*
+		 * Invalidate relcache entry for rel the policy belongs to, to force
+		 * redoing any dependent plans.  In case of a race condition where the
+		 * rel was just dropped, we need do nothing.
+		 */
+		reltup = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+		if (HeapTupleIsValid(reltup))
+		{
+			CacheInvalidateRelcacheByTuple(reltup);
+			ReleaseSysCache(reltup);
+		}
+	}
+	else
+	{
+		/* No roles would remain, so drop the policy instead. */
+		keep_policy = false;
+	}
+
+	/* Clean up. */
+	systable_endscan(sscan);
+
+	table_close(pg_policy_rel, RowExclusiveLock);
+
+	return keep_policy;
+}
+
+/*
+ * CreatePolicy -
+ *	 handles the execution of the CREATE POLICY command.
+ *
+ * stmt - the CreatePolicyStmt that describes the policy to create.
+ */
+ObjectAddress
+CreatePolicy(CreatePolicyStmt *stmt)
+{
+	Relation	pg_policy_rel;
+	Oid			policy_id;
+	Relation	target_table;
+	Oid			table_id;
+	char		polcmd;
+	Datum	   *role_oids;
+	int			nitems = 0;
+	ArrayType  *role_ids;
+	ParseState *qual_pstate;
+	ParseState *with_check_pstate;
+	ParseNamespaceItem *nsitem;
+	Node	   *qual;
+	Node	   *with_check_qual;
+	ScanKeyData skey[2];
+	SysScanDesc sscan;
+	HeapTuple	policy_tuple;
+	Datum		values[Natts_pg_policy];
+	bool		isnull[Natts_pg_policy];
+	ObjectAddress target;
+	ObjectAddress myself;
+	int			i;
+
+	/* Parse command */
+	polcmd = parse_policy_command(stmt->cmd_name);
+
+	/*
+	 * If the command is SELECT or DELETE then WITH CHECK should be NULL.
+	 */
+	if ((polcmd == ACL_SELECT_CHR || polcmd == ACL_DELETE_CHR)
+		&& stmt->with_check != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("WITH CHECK cannot be applied to SELECT or DELETE")));
+
+	/*
+	 * If the command is INSERT then WITH CHECK should be the only expression
+	 * provided.
+	 */
+	if (polcmd == ACL_INSERT_CHR && stmt->qual != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("only WITH CHECK expression allowed for INSERT")));
+
+	/* Collect role ids */
+	role_oids = policy_role_list_to_array(stmt->roles, &nitems);
+	role_ids = construct_array(role_oids, nitems, OIDOID,
+							   sizeof(Oid), true, TYPALIGN_INT);
+
+	/* Parse the supplied clause */
+	qual_pstate = make_parsestate(NULL);
+	with_check_pstate = make_parsestate(NULL);
+
+	/* zero-clear */
+	memset(values, 0, sizeof(values));
+	memset(isnull, 0, sizeof(isnull));
+
+	/* Get id of table.  Also handles permissions checks. */
+	table_id = RangeVarGetRelidExtended(stmt->table, AccessExclusiveLock,
+										0,
+										RangeVarCallbackForPolicy,
+										(void *) stmt);
+
+	/* Open target_table to build quals. No additional lock is necessary. */
+	target_table = relation_open(table_id, NoLock);
+
+	/* Add for the regular security quals */
+	nsitem = addRangeTableEntryForRelation(qual_pstate, target_table,
+										   AccessShareLock,
+										   NULL, false, false);
+	addNSItemToQuery(qual_pstate, nsitem, false, true, true);
+
+	/* Add for the with-check quals */
+	nsitem = addRangeTableEntryForRelation(with_check_pstate, target_table,
+										   AccessShareLock,
+										   NULL, false, false);
+	addNSItemToQuery(with_check_pstate, nsitem, false, true, true);
+
+	qual = transformWhereClause(qual_pstate,
+								stmt->qual,
+								EXPR_KIND_POLICY,
+								"POLICY");
+
+	with_check_qual = transformWhereClause(with_check_pstate,
+										   stmt->with_check,
+										   EXPR_KIND_POLICY,
+										   "POLICY");
+
+	/* Fix up collation information */
+	assign_expr_collations(qual_pstate, qual);
+	assign_expr_collations(with_check_pstate, with_check_qual);
+
+	/* Open pg_policy catalog */
+	pg_policy_rel = table_open(PolicyRelationId, RowExclusiveLock);
+
+	/* Set key - policy's relation id. */
+	ScanKeyInit(&skey[0],
+				Anum_pg_policy_polrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(table_id));
+
+	/* Set key - policy's name. */
+	ScanKeyInit(&skey[1],
+				Anum_pg_policy_polname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(stmt->policy_name));
+
+	sscan = systable_beginscan(pg_policy_rel,
+							   PolicyPolrelidPolnameIndexId, true, NULL, 2,
+							   skey);
+
+	policy_tuple = systable_getnext(sscan);
+
+	/* Complain if the policy name already exists for the table */
+	if (HeapTupleIsValid(policy_tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("policy \"%s\" for table \"%s\" already exists",
+						stmt->policy_name, RelationGetRelationName(target_table))));
+
+	policy_id = GetNewOidWithIndex(pg_policy_rel, PolicyOidIndexId,
+								   Anum_pg_policy_oid);
+	values[Anum_pg_policy_oid - 1] = ObjectIdGetDatum(policy_id);
+	values[Anum_pg_policy_polrelid - 1] = ObjectIdGetDatum(table_id);
+	values[Anum_pg_policy_polname - 1] = DirectFunctionCall1(namein,
+															 CStringGetDatum(stmt->policy_name));
+	values[Anum_pg_policy_polcmd - 1] = CharGetDatum(polcmd);
+	values[Anum_pg_policy_polpermissive - 1] = BoolGetDatum(stmt->permissive);
+	values[Anum_pg_policy_polroles - 1] = PointerGetDatum(role_ids);
+
+	/* Add qual if present. */
+	if (qual)
+		values[Anum_pg_policy_polqual - 1] = CStringGetTextDatum(nodeToString(qual));
+	else
+		isnull[Anum_pg_policy_polqual - 1] = true;
+
+	/* Add WITH CHECK qual if present */
+	if (with_check_qual)
+		values[Anum_pg_policy_polwithcheck - 1] = CStringGetTextDatum(nodeToString(with_check_qual));
+	else
+		isnull[Anum_pg_policy_polwithcheck - 1] = true;
+
+	policy_tuple = heap_form_tuple(RelationGetDescr(pg_policy_rel), values,
+								   isnull);
+
+	CatalogTupleInsert(pg_policy_rel, policy_tuple);
+
+	/* Record Dependencies */
+	target.classId = RelationRelationId;
+	target.objectId = table_id;
+	target.objectSubId = 0;
+
+	myself.classId = PolicyRelationId;
+	myself.objectId = policy_id;
+	myself.objectSubId = 0;
+
+	recordDependencyOn(&myself, &target, DEPENDENCY_AUTO);
+
+	recordDependencyOnExpr(&myself, qual, qual_pstate->p_rtable,
+						   DEPENDENCY_NORMAL);
+
+	recordDependencyOnExpr(&myself, with_check_qual,
+						   with_check_pstate->p_rtable, DEPENDENCY_NORMAL);
+
+	/* Register role dependencies */
+	target.classId = AuthIdRelationId;
+	target.objectSubId = 0;
+	for (i = 0; i < nitems; i++)
+	{
+		target.objectId = DatumGetObjectId(role_oids[i]);
+		/* no dependency if public */
+		if (target.objectId != ACL_ID_PUBLIC)
+			recordSharedDependencyOn(&myself, &target,
+									 SHARED_DEPENDENCY_POLICY);
+	}
+
+	InvokeObjectPostCreateHook(PolicyRelationId, policy_id, 0);
+
+	/* Invalidate Relation Cache */
+	CacheInvalidateRelcache(target_table);
+
+	/* Clean up. */
+	heap_freetuple(policy_tuple);
+	free_parsestate(qual_pstate);
+	free_parsestate(with_check_pstate);
+	systable_endscan(sscan);
+	relation_close(target_table, NoLock);
+	table_close(pg_policy_rel, RowExclusiveLock);
+
+	return myself;
+}
+
+/*
+ * AlterPolicy -
+ *	 handles the execution of the ALTER POLICY command.
+ *
+ * stmt - the AlterPolicyStmt that describes the policy and how to alter it.
+ */
+ObjectAddress
+AlterPolicy(AlterPolicyStmt *stmt)
+{
+	Relation	pg_policy_rel;
+	Oid			policy_id;
+	Relation	target_table;
+	Oid			table_id;
+	Datum	   *role_oids = NULL;
+	int			nitems = 0;
+	ArrayType  *role_ids = NULL;
+	List	   *qual_parse_rtable = NIL;
+	List	   *with_check_parse_rtable = NIL;
+	Node	   *qual = NULL;
+	Node	   *with_check_qual = NULL;
+	ScanKeyData skey[2];
+	SysScanDesc sscan;
+	HeapTuple	policy_tuple;
+	HeapTuple	new_tuple;
+	Datum		values[Natts_pg_policy];
+	bool		isnull[Natts_pg_policy];
+	bool		replaces[Natts_pg_policy];
+	ObjectAddress target;
+	ObjectAddress myself;
+	Datum		polcmd_datum;
+	char		polcmd;
+	bool		polcmd_isnull;
+	int			i;
+
+	/* Parse role_ids */
+	if (stmt->roles != NULL)
+	{
+		role_oids = policy_role_list_to_array(stmt->roles, &nitems);
+		role_ids = construct_array(role_oids, nitems, OIDOID,
+								   sizeof(Oid), true, TYPALIGN_INT);
+	}
+
+	/* Get id of table.  Also handles permissions checks. */
+	table_id = RangeVarGetRelidExtended(stmt->table, AccessExclusiveLock,
+										0,
+										RangeVarCallbackForPolicy,
+										(void *) stmt);
+
+	target_table = relation_open(table_id, NoLock);
+
+	/* Parse the using policy clause */
+	if (stmt->qual)
+	{
+		ParseNamespaceItem *nsitem;
+		ParseState *qual_pstate = make_parsestate(NULL);
+
+		nsitem = addRangeTableEntryForRelation(qual_pstate, target_table,
+											   AccessShareLock,
+											   NULL, false, false);
+
+		addNSItemToQuery(qual_pstate, nsitem, false, true, true);
+
+		qual = transformWhereClause(qual_pstate, stmt->qual,
+									EXPR_KIND_POLICY,
+									"POLICY");
+
+		/* Fix up collation information */
+		assign_expr_collations(qual_pstate, qual);
+
+		qual_parse_rtable = qual_pstate->p_rtable;
+		free_parsestate(qual_pstate);
+	}
+
+	/* Parse the with-check policy clause */
+	if (stmt->with_check)
+	{
+		ParseNamespaceItem *nsitem;
+		ParseState *with_check_pstate = make_parsestate(NULL);
+
+		nsitem = addRangeTableEntryForRelation(with_check_pstate, target_table,
+											   AccessShareLock,
+											   NULL, false, false);
+
+		addNSItemToQuery(with_check_pstate, nsitem, false, true, true);
+
+		with_check_qual = transformWhereClause(with_check_pstate,
+											   stmt->with_check,
+											   EXPR_KIND_POLICY,
+											   "POLICY");
+
+		/* Fix up collation information */
+		assign_expr_collations(with_check_pstate, with_check_qual);
+
+		with_check_parse_rtable = with_check_pstate->p_rtable;
+		free_parsestate(with_check_pstate);
+	}
+
+	/* zero-clear */
+	memset(values, 0, sizeof(values));
+	memset(replaces, 0, sizeof(replaces));
+	memset(isnull, 0, sizeof(isnull));
+
+	/* Find policy to update. */
+	pg_policy_rel = table_open(PolicyRelationId, RowExclusiveLock);
+
+	/* Set key - policy's relation id. */
+	ScanKeyInit(&skey[0],
+				Anum_pg_policy_polrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(table_id));
+
+	/* Set key - policy's name. */
+	ScanKeyInit(&skey[1],
+				Anum_pg_policy_polname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(stmt->policy_name));
+
+	sscan = systable_beginscan(pg_policy_rel,
+							   PolicyPolrelidPolnameIndexId, true, NULL, 2,
+							   skey);
+
+	policy_tuple = systable_getnext(sscan);
+
+	/* Check that the policy is found, raise an error if not. */
+	if (!HeapTupleIsValid(policy_tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("policy \"%s\" for table \"%s\" does not exist",
+						stmt->policy_name,
+						RelationGetRelationName(target_table))));
+
+	/* Get policy command */
+	polcmd_datum = heap_getattr(policy_tuple, Anum_pg_policy_polcmd,
+								RelationGetDescr(pg_policy_rel),
+								&polcmd_isnull);
+	Assert(!polcmd_isnull);
+	polcmd = DatumGetChar(polcmd_datum);
+
+	/*
+	 * If the command is SELECT or DELETE then WITH CHECK should be NULL.
+	 */
+	if ((polcmd == ACL_SELECT_CHR || polcmd == ACL_DELETE_CHR)
+		&& stmt->with_check != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("only USING expression allowed for SELECT, DELETE")));
+
+	/*
+	 * If the command is INSERT then WITH CHECK should be the only expression
+	 * provided.
+	 */
+	if ((polcmd == ACL_INSERT_CHR)
+		&& stmt->qual != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("only WITH CHECK expression allowed for INSERT")));
+
+	policy_id = ((Form_pg_policy) GETSTRUCT(policy_tuple))->oid;
+
+	if (role_ids != NULL)
+	{
+		replaces[Anum_pg_policy_polroles - 1] = true;
+		values[Anum_pg_policy_polroles - 1] = PointerGetDatum(role_ids);
+	}
+	else
+	{
+		Oid		   *roles;
+		Datum		roles_datum;
+		bool		attr_isnull;
+		ArrayType  *policy_roles;
+
+		/*
+		 * We need to pull the set of roles this policy applies to from what's
+		 * in the catalog, so that we can recreate the dependencies correctly
+		 * for the policy.
+		 */
+
+		roles_datum = heap_getattr(policy_tuple, Anum_pg_policy_polroles,
+								   RelationGetDescr(pg_policy_rel),
+								   &attr_isnull);
+		Assert(!attr_isnull);
+
+		policy_roles = DatumGetArrayTypePCopy(roles_datum);
+
+		roles = (Oid *) ARR_DATA_PTR(policy_roles);
+
+		nitems = ARR_DIMS(policy_roles)[0];
+
+		role_oids = (Datum *) palloc(nitems * sizeof(Datum));
+
+		for (i = 0; i < nitems; i++)
+			role_oids[i] = ObjectIdGetDatum(roles[i]);
+	}
+
+	if (qual != NULL)
+	{
+		replaces[Anum_pg_policy_polqual - 1] = true;
+		values[Anum_pg_policy_polqual - 1]
+			= CStringGetTextDatum(nodeToString(qual));
+	}
+	else
+	{
+		Datum		value_datum;
+		bool		attr_isnull;
+
+		/*
+		 * We need to pull the USING expression and build the range table for
+		 * the policy from what's in the catalog, so that we can recreate the
+		 * dependencies correctly for the policy.
+		 */
+
+		/* Check if the policy has a USING expr */
+		value_datum = heap_getattr(policy_tuple, Anum_pg_policy_polqual,
+								   RelationGetDescr(pg_policy_rel),
+								   &attr_isnull);
+		if (!attr_isnull)
+		{
+			char	   *qual_value;
+			ParseState *qual_pstate;
+
+			/* parsestate is built just to build the range table */
+			qual_pstate = make_parsestate(NULL);
+
+			qual_value = TextDatumGetCString(value_datum);
+			qual = stringToNode(qual_value);
+
+			/* Add this rel to the parsestate's rangetable, for dependencies */
+			(void) addRangeTableEntryForRelation(qual_pstate, target_table,
+												 AccessShareLock,
+												 NULL, false, false);
+
+			qual_parse_rtable = qual_pstate->p_rtable;
+			free_parsestate(qual_pstate);
+		}
+	}
+
+	if (with_check_qual != NULL)
+	{
+		replaces[Anum_pg_policy_polwithcheck - 1] = true;
+		values[Anum_pg_policy_polwithcheck - 1]
+			= CStringGetTextDatum(nodeToString(with_check_qual));
+	}
+	else
+	{
+		Datum		value_datum;
+		bool		attr_isnull;
+
+		/*
+		 * We need to pull the WITH CHECK expression and build the range table
+		 * for the policy from what's in the catalog, so that we can recreate
+		 * the dependencies correctly for the policy.
+		 */
+
+		/* Check if the policy has a WITH CHECK expr */
+		value_datum = heap_getattr(policy_tuple, Anum_pg_policy_polwithcheck,
+								   RelationGetDescr(pg_policy_rel),
+								   &attr_isnull);
+		if (!attr_isnull)
+		{
+			char	   *with_check_value;
+			ParseState *with_check_pstate;
+
+			/* parsestate is built just to build the range table */
+			with_check_pstate = make_parsestate(NULL);
+
+			with_check_value = TextDatumGetCString(value_datum);
+			with_check_qual = stringToNode(with_check_value);
+
+			/* Add this rel to the parsestate's rangetable, for dependencies */
+			(void) addRangeTableEntryForRelation(with_check_pstate,
+												 target_table,
+												 AccessShareLock,
+												 NULL, false, false);
+
+			with_check_parse_rtable = with_check_pstate->p_rtable;
+			free_parsestate(with_check_pstate);
+		}
+	}
+
+	new_tuple = heap_modify_tuple(policy_tuple,
+								  RelationGetDescr(pg_policy_rel),
+								  values, isnull, replaces);
+	CatalogTupleUpdate(pg_policy_rel, &new_tuple->t_self, new_tuple);
+
+	/* Update Dependencies. */
+	deleteDependencyRecordsFor(PolicyRelationId, policy_id, false);
+
+	/* Record Dependencies */
+	target.classId = RelationRelationId;
+	target.objectId = table_id;
+	target.objectSubId = 0;
+
+	myself.classId = PolicyRelationId;
+	myself.objectId = policy_id;
+	myself.objectSubId = 0;
+
+	recordDependencyOn(&myself, &target, DEPENDENCY_AUTO);
+
+	recordDependencyOnExpr(&myself, qual, qual_parse_rtable, DEPENDENCY_NORMAL);
+
+	recordDependencyOnExpr(&myself, with_check_qual, with_check_parse_rtable,
+						   DEPENDENCY_NORMAL);
+
+	/* Register role dependencies */
+	deleteSharedDependencyRecordsFor(PolicyRelationId, policy_id, 0);
+	target.classId = AuthIdRelationId;
+	target.objectSubId = 0;
+	for (i = 0; i < nitems; i++)
+	{
+		target.objectId = DatumGetObjectId(role_oids[i]);
+		/* no dependency if public */
+		if (target.objectId != ACL_ID_PUBLIC)
+			recordSharedDependencyOn(&myself, &target,
+									 SHARED_DEPENDENCY_POLICY);
+	}
+
+	InvokeObjectPostAlterHook(PolicyRelationId, policy_id, 0);
+
+	heap_freetuple(new_tuple);
+
+	/* Invalidate Relation Cache */
+	CacheInvalidateRelcache(target_table);
+
+	/* Clean up. */
+	systable_endscan(sscan);
+	relation_close(target_table, NoLock);
+	table_close(pg_policy_rel, RowExclusiveLock);
+
+	return myself;
+}
+
+/*
+ * rename_policy -
+ *	 change the name of a policy on a relation
+ */
+ObjectAddress
+rename_policy(RenameStmt *stmt)
+{
+	Relation	pg_policy_rel;
+	Relation	target_table;
+	Oid			table_id;
+	Oid			opoloid;
+	ScanKeyData skey[2];
+	SysScanDesc sscan;
+	HeapTuple	policy_tuple;
+	ObjectAddress address;
+
+	/* Get id of table.  Also handles permissions checks. */
+	table_id = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock,
+										0,
+										RangeVarCallbackForPolicy,
+										(void *) stmt);
+
+	target_table = relation_open(table_id, NoLock);
+
+	pg_policy_rel = table_open(PolicyRelationId, RowExclusiveLock);
+
+	/* First pass -- check for conflict */
+
+	/* Add key - policy's relation id. */
+	ScanKeyInit(&skey[0],
+				Anum_pg_policy_polrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(table_id));
+
+	/* Add key - policy's name. */
+	ScanKeyInit(&skey[1],
+				Anum_pg_policy_polname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(stmt->newname));
+
+	sscan = systable_beginscan(pg_policy_rel,
+							   PolicyPolrelidPolnameIndexId, true, NULL, 2,
+							   skey);
+
+	if (HeapTupleIsValid(systable_getnext(sscan)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("policy \"%s\" for table \"%s\" already exists",
+						stmt->newname, RelationGetRelationName(target_table))));
+
+	systable_endscan(sscan);
+
+	/* Second pass -- find existing policy and update */
+	/* Add key - policy's relation id. */
+	ScanKeyInit(&skey[0],
+				Anum_pg_policy_polrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(table_id));
+
+	/* Add key - policy's name. */
+	ScanKeyInit(&skey[1],
+				Anum_pg_policy_polname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(stmt->subname));
+
+	sscan = systable_beginscan(pg_policy_rel,
+							   PolicyPolrelidPolnameIndexId, true, NULL, 2,
+							   skey);
+
+	policy_tuple = systable_getnext(sscan);
+
+	/* Complain if we did not find the policy */
+	if (!HeapTupleIsValid(policy_tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("policy \"%s\" for table \"%s\" does not exist",
+						stmt->subname, RelationGetRelationName(target_table))));
+
+	opoloid = ((Form_pg_policy) GETSTRUCT(policy_tuple))->oid;
+
+	policy_tuple = heap_copytuple(policy_tuple);
+
+	namestrcpy(&((Form_pg_policy) GETSTRUCT(policy_tuple))->polname,
+			   stmt->newname);
+
+	CatalogTupleUpdate(pg_policy_rel, &policy_tuple->t_self, policy_tuple);
+
+	InvokeObjectPostAlterHook(PolicyRelationId, opoloid, 0);
+
+	ObjectAddressSet(address, PolicyRelationId, opoloid);
+
+	/*
+	 * Invalidate relation's relcache entry so that other backends (and this
+	 * one too!) are sent SI message to make them rebuild relcache entries.
+	 * (Ideally this should happen automatically...)
+	 */
+	CacheInvalidateRelcache(target_table);
+
+	/* Clean up. */
+	systable_endscan(sscan);
+	table_close(pg_policy_rel, RowExclusiveLock);
+	relation_close(target_table, NoLock);
+
+	return address;
+}
+
+/*
+ * get_relation_policy_oid - Look up a policy by name to find its OID
+ *
+ * If missing_ok is false, throw an error if policy not found.  If
+ * true, just return InvalidOid.
+ */
+Oid
+get_relation_policy_oid(Oid relid, const char *policy_name, bool missing_ok)
+{
+	Relation	pg_policy_rel;
+	ScanKeyData skey[2];
+	SysScanDesc sscan;
+	HeapTuple	policy_tuple;
+	Oid			policy_oid;
+
+	pg_policy_rel = table_open(PolicyRelationId, AccessShareLock);
+
+	/* Add key - policy's relation id. */
+	ScanKeyInit(&skey[0],
+				Anum_pg_policy_polrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relid));
+
+	/* Add key - policy's name. */
+	ScanKeyInit(&skey[1],
+				Anum_pg_policy_polname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(policy_name));
+
+	sscan = systable_beginscan(pg_policy_rel,
+							   PolicyPolrelidPolnameIndexId, true, NULL, 2,
+							   skey);
+
+	policy_tuple = systable_getnext(sscan);
+
+	if (!HeapTupleIsValid(policy_tuple))
+	{
+		if (!missing_ok)
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("policy \"%s\" for table \"%s\" does not exist",
+							policy_name, get_rel_name(relid))));
+
+		policy_oid = InvalidOid;
+	}
+	else
+		policy_oid = ((Form_pg_policy) GETSTRUCT(policy_tuple))->oid;
+
+	/* Clean up. */
+	systable_endscan(sscan);
+	table_close(pg_policy_rel, AccessShareLock);
+
+	return policy_oid;
+}
+
+/*
+ * relation_has_policies - Determine if relation has any policies
+ */
+bool
+relation_has_policies(Relation rel)
+{
+	Relation	catalog;
+	ScanKeyData skey;
+	SysScanDesc sscan;
+	HeapTuple	policy_tuple;
+	bool		ret = false;
+
+	catalog = table_open(PolicyRelationId, AccessShareLock);
+	ScanKeyInit(&skey,
+				Anum_pg_policy_polrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	sscan = systable_beginscan(catalog, PolicyPolrelidPolnameIndexId, true,
+							   NULL, 1, &skey);
+	policy_tuple = systable_getnext(sscan);
+	if (HeapTupleIsValid(policy_tuple))
+		ret = true;
+
+	systable_endscan(sscan);
+	table_close(catalog, AccessShareLock);
+
+	return ret;
+}
diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c
new file mode 100644
index 0000000..9902c5c
--- /dev/null
+++ b/src/backend/commands/portalcmds.c
@@ -0,0 +1,496 @@
+/*-------------------------------------------------------------------------
+ *
+ * portalcmds.c
+ *	  Utility commands affecting portals (that is, SQL cursor commands)
+ *
+ * Note: see also tcop/pquery.c, which implements portal operations for
+ * the FE/BE protocol.  This module uses pquery.c for some operations.
+ * And both modules depend on utils/mmgr/portalmem.c, which controls
+ * storage management for portals (but doesn't run any queries in them).
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/portalcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "access/xact.h"
+#include "commands/portalcmds.h"
+#include "executor/executor.h"
+#include "executor/tstoreReceiver.h"
+#include "miscadmin.h"
+#include "rewrite/rewriteHandler.h"
+#include "tcop/pquery.h"
+#include "tcop/tcopprot.h"
+#include "utils/memutils.h"
+#include "utils/snapmgr.h"
+
+
+/*
+ * PerformCursorOpen
+ *		Execute SQL DECLARE CURSOR command.
+ */
+void
+PerformCursorOpen(ParseState *pstate, DeclareCursorStmt *cstmt, ParamListInfo params,
+				  bool isTopLevel)
+{
+	Query	   *query = castNode(Query, cstmt->query);
+	List	   *rewritten;
+	PlannedStmt *plan;
+	Portal		portal;
+	MemoryContext oldContext;
+	char	   *queryString;
+
+	/*
+	 * Disallow empty-string cursor name (conflicts with protocol-level
+	 * unnamed portal).
+	 */
+	if (!cstmt->portalname || cstmt->portalname[0] == '\0')
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_CURSOR_NAME),
+				 errmsg("invalid cursor name: must not be empty")));
+
+	/*
+	 * If this is a non-holdable cursor, we require that this statement has
+	 * been executed inside a transaction block (or else, it would have no
+	 * user-visible effect).
+	 */
+	if (!(cstmt->options & CURSOR_OPT_HOLD))
+		RequireTransactionBlock(isTopLevel, "DECLARE CURSOR");
+	else if (InSecurityRestrictedOperation())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("cannot create a cursor WITH HOLD within security-restricted operation")));
+
+	/*
+	 * Parse analysis was done already, but we still have to run the rule
+	 * rewriter.  We do not do AcquireRewriteLocks: we assume the query either
+	 * came straight from the parser, or suitable locks were acquired by
+	 * plancache.c.
+	 */
+	rewritten = QueryRewrite(query);
+
+	/* SELECT should never rewrite to more or less than one query */
+	if (list_length(rewritten) != 1)
+		elog(ERROR, "non-SELECT statement in DECLARE CURSOR");
+
+	query = linitial_node(Query, rewritten);
+
+	if (query->commandType != CMD_SELECT)
+		elog(ERROR, "non-SELECT statement in DECLARE CURSOR");
+
+	/* Plan the query, applying the specified options */
+	plan = pg_plan_query(query, pstate->p_sourcetext, cstmt->options, params);
+
+	/*
+	 * Create a portal and copy the plan and query string into its memory.
+	 */
+	portal = CreatePortal(cstmt->portalname, false, false);
+
+	oldContext = MemoryContextSwitchTo(portal->portalContext);
+
+	plan = copyObject(plan);
+
+	queryString = pstrdup(pstate->p_sourcetext);
+
+	PortalDefineQuery(portal,
+					  NULL,
+					  queryString,
+					  CMDTAG_SELECT,	/* cursor's query is always a SELECT */
+					  list_make1(plan),
+					  NULL);
+
+	/*----------
+	 * Also copy the outer portal's parameter list into the inner portal's
+	 * memory context.  We want to pass down the parameter values in case we
+	 * had a command like
+	 *		DECLARE c CURSOR FOR SELECT ... WHERE foo = $1
+	 * This will have been parsed using the outer parameter set and the
+	 * parameter value needs to be preserved for use when the cursor is
+	 * executed.
+	 *----------
+	 */
+	params = copyParamList(params);
+
+	MemoryContextSwitchTo(oldContext);
+
+	/*
+	 * Set up options for portal.
+	 *
+	 * If the user didn't specify a SCROLL type, allow or disallow scrolling
+	 * based on whether it would require any additional runtime overhead to do
+	 * so.  Also, we disallow scrolling for FOR UPDATE cursors.
+	 */
+	portal->cursorOptions = cstmt->options;
+	if (!(portal->cursorOptions & (CURSOR_OPT_SCROLL | CURSOR_OPT_NO_SCROLL)))
+	{
+		if (plan->rowMarks == NIL &&
+			ExecSupportsBackwardScan(plan->planTree))
+			portal->cursorOptions |= CURSOR_OPT_SCROLL;
+		else
+			portal->cursorOptions |= CURSOR_OPT_NO_SCROLL;
+	}
+
+	/*
+	 * Start execution, inserting parameters if any.
+	 */
+	PortalStart(portal, params, 0, GetActiveSnapshot());
+
+	Assert(portal->strategy == PORTAL_ONE_SELECT);
+
+	/*
+	 * We're done; the query won't actually be run until PerformPortalFetch is
+	 * called.
+	 */
+}
+
+/*
+ * PerformPortalFetch
+ *		Execute SQL FETCH or MOVE command.
+ *
+ *	stmt: parsetree node for command
+ *	dest: where to send results
+ *	qc: where to store a command completion status data.
+ *
+ * qc may be NULL if caller doesn't want status data.
+ */
+void
+PerformPortalFetch(FetchStmt *stmt,
+				   DestReceiver *dest,
+				   QueryCompletion *qc)
+{
+	Portal		portal;
+	uint64		nprocessed;
+
+	/*
+	 * Disallow empty-string cursor name (conflicts with protocol-level
+	 * unnamed portal).
+	 */
+	if (!stmt->portalname || stmt->portalname[0] == '\0')
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_CURSOR_NAME),
+				 errmsg("invalid cursor name: must not be empty")));
+
+	/* get the portal from the portal name */
+	portal = GetPortalByName(stmt->portalname);
+	if (!PortalIsValid(portal))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_CURSOR),
+				 errmsg("cursor \"%s\" does not exist", stmt->portalname)));
+		return;					/* keep compiler happy */
+	}
+
+	/* Adjust dest if needed.  MOVE wants destination DestNone */
+	if (stmt->ismove)
+		dest = None_Receiver;
+
+	/* Do it */
+	nprocessed = PortalRunFetch(portal,
+								stmt->direction,
+								stmt->howMany,
+								dest);
+
+	/* Return command status if wanted */
+	if (qc)
+		SetQueryCompletion(qc, stmt->ismove ? CMDTAG_MOVE : CMDTAG_FETCH,
+						   nprocessed);
+}
+
+/*
+ * PerformPortalClose
+ *		Close a cursor.
+ */
+void
+PerformPortalClose(const char *name)
+{
+	Portal		portal;
+
+	/* NULL means CLOSE ALL */
+	if (name == NULL)
+	{
+		PortalHashTableDeleteAll();
+		return;
+	}
+
+	/*
+	 * Disallow empty-string cursor name (conflicts with protocol-level
+	 * unnamed portal).
+	 */
+	if (name[0] == '\0')
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_CURSOR_NAME),
+				 errmsg("invalid cursor name: must not be empty")));
+
+	/*
+	 * get the portal from the portal name
+	 */
+	portal = GetPortalByName(name);
+	if (!PortalIsValid(portal))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_CURSOR),
+				 errmsg("cursor \"%s\" does not exist", name)));
+		return;					/* keep compiler happy */
+	}
+
+	/*
+	 * Note: PortalCleanup is called as a side-effect, if not already done.
+	 */
+	PortalDrop(portal, false);
+}
+
+/*
+ * PortalCleanup
+ *
+ * Clean up a portal when it's dropped.  This is the standard cleanup hook
+ * for portals.
+ *
+ * Note: if portal->status is PORTAL_FAILED, we are probably being called
+ * during error abort, and must be careful to avoid doing anything that
+ * is likely to fail again.
+ */
+void
+PortalCleanup(Portal portal)
+{
+	QueryDesc  *queryDesc;
+
+	/*
+	 * sanity checks
+	 */
+	AssertArg(PortalIsValid(portal));
+	AssertArg(portal->cleanup == PortalCleanup);
+
+	/*
+	 * Shut down executor, if still running.  We skip this during error abort,
+	 * since other mechanisms will take care of releasing executor resources,
+	 * and we can't be sure that ExecutorEnd itself wouldn't fail.
+	 */
+	queryDesc = portal->queryDesc;
+	if (queryDesc)
+	{
+		/*
+		 * Reset the queryDesc before anything else.  This prevents us from
+		 * trying to shut down the executor twice, in case of an error below.
+		 * The transaction abort mechanisms will take care of resource cleanup
+		 * in such a case.
+		 */
+		portal->queryDesc = NULL;
+
+		if (portal->status != PORTAL_FAILED)
+		{
+			ResourceOwner saveResourceOwner;
+
+			/* We must make the portal's resource owner current */
+			saveResourceOwner = CurrentResourceOwner;
+			if (portal->resowner)
+				CurrentResourceOwner = portal->resowner;
+
+			ExecutorFinish(queryDesc);
+			ExecutorEnd(queryDesc);
+			FreeQueryDesc(queryDesc);
+
+			CurrentResourceOwner = saveResourceOwner;
+		}
+	}
+}
+
+/*
+ * PersistHoldablePortal
+ *
+ * Prepare the specified Portal for access outside of the current
+ * transaction. When this function returns, all future accesses to the
+ * portal must be done via the Tuplestore (not by invoking the
+ * executor).
+ */
+void
+PersistHoldablePortal(Portal portal)
+{
+	QueryDesc  *queryDesc = portal->queryDesc;
+	Portal		saveActivePortal;
+	ResourceOwner saveResourceOwner;
+	MemoryContext savePortalContext;
+	MemoryContext oldcxt;
+
+	/*
+	 * If we're preserving a holdable portal, we had better be inside the
+	 * transaction that originally created it.
+	 */
+	Assert(portal->createSubid != InvalidSubTransactionId);
+	Assert(queryDesc != NULL);
+
+	/*
+	 * Caller must have created the tuplestore already ... but not a snapshot.
+	 */
+	Assert(portal->holdContext != NULL);
+	Assert(portal->holdStore != NULL);
+	Assert(portal->holdSnapshot == NULL);
+
+	/*
+	 * Before closing down the executor, we must copy the tupdesc into
+	 * long-term memory, since it was created in executor memory.
+	 */
+	oldcxt = MemoryContextSwitchTo(portal->holdContext);
+
+	portal->tupDesc = CreateTupleDescCopy(portal->tupDesc);
+
+	MemoryContextSwitchTo(oldcxt);
+
+	/*
+	 * Check for improper portal use, and mark portal active.
+	 */
+	MarkPortalActive(portal);
+
+	/*
+	 * Set up global portal context pointers.
+	 */
+	saveActivePortal = ActivePortal;
+	saveResourceOwner = CurrentResourceOwner;
+	savePortalContext = PortalContext;
+	PG_TRY();
+	{
+		ScanDirection direction = ForwardScanDirection;
+
+		ActivePortal = portal;
+		if (portal->resowner)
+			CurrentResourceOwner = portal->resowner;
+		PortalContext = portal->portalContext;
+
+		MemoryContextSwitchTo(PortalContext);
+
+		PushActiveSnapshot(queryDesc->snapshot);
+
+		/*
+		 * If the portal is marked scrollable, we need to store the entire
+		 * result set in the tuplestore, so that subsequent backward FETCHs
+		 * can be processed.  Otherwise, store only the not-yet-fetched rows.
+		 * (The latter is not only more efficient, but avoids semantic
+		 * problems if the query's output isn't stable.)
+		 *
+		 * In the no-scroll case, tuple indexes in the tuplestore will not
+		 * match the cursor's nominal position (portalPos).  Currently this
+		 * causes no difficulty because we only navigate in the tuplestore by
+		 * relative position, except for the tuplestore_skiptuples call below
+		 * and the tuplestore_rescan call in DoPortalRewind, both of which are
+		 * disabled for no-scroll cursors.  But someday we might need to track
+		 * the offset between the holdStore and the cursor's nominal position
+		 * explicitly.
+		 */
+		if (portal->cursorOptions & CURSOR_OPT_SCROLL)
+		{
+			ExecutorRewind(queryDesc);
+		}
+		else
+		{
+			/*
+			 * If we already reached end-of-query, set the direction to
+			 * NoMovement to avoid trying to fetch any tuples.  (This check
+			 * exists because not all plan node types are robust about being
+			 * called again if they've already returned NULL once.)  We'll
+			 * still set up an empty tuplestore, though, to keep this from
+			 * being a special case later.
+			 */
+			if (portal->atEnd)
+				direction = NoMovementScanDirection;
+		}
+
+		/*
+		 * Change the destination to output to the tuplestore.  Note we tell
+		 * the tuplestore receiver to detoast all data passed through it; this
+		 * makes it safe to not keep a snapshot associated with the data.
+		 */
+		queryDesc->dest = CreateDestReceiver(DestTuplestore);
+		SetTuplestoreDestReceiverParams(queryDesc->dest,
+										portal->holdStore,
+										portal->holdContext,
+										true,
+										NULL,
+										NULL);
+
+		/* Fetch the result set into the tuplestore */
+		ExecutorRun(queryDesc, direction, 0L, false);
+
+		queryDesc->dest->rDestroy(queryDesc->dest);
+		queryDesc->dest = NULL;
+
+		/*
+		 * Now shut down the inner executor.
+		 */
+		portal->queryDesc = NULL;	/* prevent double shutdown */
+		ExecutorFinish(queryDesc);
+		ExecutorEnd(queryDesc);
+		FreeQueryDesc(queryDesc);
+
+		/*
+		 * Set the position in the result set.
+		 */
+		MemoryContextSwitchTo(portal->holdContext);
+
+		if (portal->atEnd)
+		{
+			/*
+			 * Just force the tuplestore forward to its end.  The size of the
+			 * skip request here is arbitrary.
+			 */
+			while (tuplestore_skiptuples(portal->holdStore, 1000000, true))
+				 /* continue */ ;
+		}
+		else
+		{
+			tuplestore_rescan(portal->holdStore);
+
+			/*
+			 * In the no-scroll case, the start of the tuplestore is exactly
+			 * where we want to be, so no repositioning is wanted.
+			 */
+			if (portal->cursorOptions & CURSOR_OPT_SCROLL)
+			{
+				if (!tuplestore_skiptuples(portal->holdStore,
+										   portal->portalPos,
+										   true))
+					elog(ERROR, "unexpected end of tuple stream");
+			}
+		}
+	}
+	PG_CATCH();
+	{
+		/* Uncaught error while executing portal: mark it dead */
+		MarkPortalFailed(portal);
+
+		/* Restore global vars and propagate error */
+		ActivePortal = saveActivePortal;
+		CurrentResourceOwner = saveResourceOwner;
+		PortalContext = savePortalContext;
+
+		PG_RE_THROW();
+	}
+	PG_END_TRY();
+
+	MemoryContextSwitchTo(oldcxt);
+
+	/* Mark portal not active */
+	portal->status = PORTAL_READY;
+
+	ActivePortal = saveActivePortal;
+	CurrentResourceOwner = saveResourceOwner;
+	PortalContext = savePortalContext;
+
+	PopActiveSnapshot();
+
+	/*
+	 * We can now release any subsidiary memory of the portal's context; we'll
+	 * never use it again.  The executor already dropped its context, but this
+	 * will clean up anything that glommed onto the portal's context via
+	 * PortalContext.
+	 */
+	MemoryContextDeleteChildren(portal->portalContext);
+}
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
new file mode 100644
index 0000000..fc5c7f9
--- /dev/null
+++ b/src/backend/commands/prepare.c
@@ -0,0 +1,729 @@
+/*-------------------------------------------------------------------------
+ *
+ * prepare.c
+ *	  Prepareable SQL statements via PREPARE, EXECUTE and DEALLOCATE
+ *
+ * This module also implements storage of prepared statements that are
+ * accessed via the extended FE/BE query protocol.
+ *
+ *
+ * Copyright (c) 2002-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/prepare.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "access/xact.h"
+#include "catalog/pg_type.h"
+#include "commands/createas.h"
+#include "commands/prepare.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/analyze.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_type.h"
+#include "rewrite/rewriteHandler.h"
+#include "tcop/pquery.h"
+#include "tcop/utility.h"
+#include "utils/builtins.h"
+#include "utils/snapmgr.h"
+#include "utils/timestamp.h"
+
+
+/*
+ * The hash table in which prepared queries are stored. This is
+ * per-backend: query plans are not shared between backends.
+ * The keys for this hash table are the arguments to PREPARE and EXECUTE
+ * (statement names); the entries are PreparedStatement structs.
+ */
+static HTAB *prepared_queries = NULL;
+
+static void InitQueryHashTable(void);
+static ParamListInfo EvaluateParams(ParseState *pstate,
+									PreparedStatement *pstmt, List *params,
+									EState *estate);
+static Datum build_regtype_array(Oid *param_types, int num_params);
+
+/*
+ * Implements the 'PREPARE' utility statement.
+ */
+void
+PrepareQuery(ParseState *pstate, PrepareStmt *stmt,
+			 int stmt_location, int stmt_len)
+{
+	RawStmt    *rawstmt;
+	CachedPlanSource *plansource;
+	Oid		   *argtypes = NULL;
+	int			nargs;
+	List	   *query_list;
+
+	/*
+	 * Disallow empty-string statement name (conflicts with protocol-level
+	 * unnamed statement).
+	 */
+	if (!stmt->name || stmt->name[0] == '\0')
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PSTATEMENT_DEFINITION),
+				 errmsg("invalid statement name: must not be empty")));
+
+	/*
+	 * Need to wrap the contained statement in a RawStmt node to pass it to
+	 * parse analysis.
+	 */
+	rawstmt = makeNode(RawStmt);
+	rawstmt->stmt = stmt->query;
+	rawstmt->stmt_location = stmt_location;
+	rawstmt->stmt_len = stmt_len;
+
+	/*
+	 * Create the CachedPlanSource before we do parse analysis, since it needs
+	 * to see the unmodified raw parse tree.
+	 */
+	plansource = CreateCachedPlan(rawstmt, pstate->p_sourcetext,
+								  CreateCommandTag(stmt->query));
+
+	/* Transform list of TypeNames to array of type OIDs */
+	nargs = list_length(stmt->argtypes);
+
+	if (nargs)
+	{
+		int			i;
+		ListCell   *l;
+
+		argtypes = (Oid *) palloc(nargs * sizeof(Oid));
+		i = 0;
+
+		foreach(l, stmt->argtypes)
+		{
+			TypeName   *tn = lfirst(l);
+			Oid			toid = typenameTypeId(pstate, tn);
+
+			argtypes[i++] = toid;
+		}
+	}
+
+	/*
+	 * Analyze the statement using these parameter types (any parameters
+	 * passed in from above us will not be visible to it), allowing
+	 * information about unknown parameters to be deduced from context.
+	 * Rewrite the query. The result could be 0, 1, or many queries.
+	 */
+	query_list = pg_analyze_and_rewrite_varparams(rawstmt, pstate->p_sourcetext,
+												  &argtypes, &nargs, NULL);
+
+	/* Finish filling in the CachedPlanSource */
+	CompleteCachedPlan(plansource,
+					   query_list,
+					   NULL,
+					   argtypes,
+					   nargs,
+					   NULL,
+					   NULL,
+					   CURSOR_OPT_PARALLEL_OK,	/* allow parallel mode */
+					   true);	/* fixed result */
+
+	/*
+	 * Save the results.
+	 */
+	StorePreparedStatement(stmt->name,
+						   plansource,
+						   true);
+}
+
+/*
+ * ExecuteQuery --- implement the 'EXECUTE' utility statement.
+ *
+ * This code also supports CREATE TABLE ... AS EXECUTE.  That case is
+ * indicated by passing a non-null intoClause.  The DestReceiver is already
+ * set up correctly for CREATE TABLE AS, but we still have to make a few
+ * other adjustments here.
+ */
+void
+ExecuteQuery(ParseState *pstate,
+			 ExecuteStmt *stmt, IntoClause *intoClause,
+			 ParamListInfo params,
+			 DestReceiver *dest, QueryCompletion *qc)
+{
+	PreparedStatement *entry;
+	CachedPlan *cplan;
+	List	   *plan_list;
+	ParamListInfo paramLI = NULL;
+	EState	   *estate = NULL;
+	Portal		portal;
+	char	   *query_string;
+	int			eflags;
+	long		count;
+
+	/* Look it up in the hash table */
+	entry = FetchPreparedStatement(stmt->name, true);
+
+	/* Shouldn't find a non-fixed-result cached plan */
+	if (!entry->plansource->fixed_result)
+		elog(ERROR, "EXECUTE does not support variable-result cached plans");
+
+	/* Evaluate parameters, if any */
+	if (entry->plansource->num_params > 0)
+	{
+		/*
+		 * Need an EState to evaluate parameters; must not delete it till end
+		 * of query, in case parameters are pass-by-reference.  Note that the
+		 * passed-in "params" could possibly be referenced in the parameter
+		 * expressions.
+		 */
+		estate = CreateExecutorState();
+		estate->es_param_list_info = params;
+		paramLI = EvaluateParams(pstate, entry, stmt->params, estate);
+	}
+
+	/* Create a new portal to run the query in */
+	portal = CreateNewPortal();
+	/* Don't display the portal in pg_cursors, it is for internal use only */
+	portal->visible = false;
+
+	/* Copy the plan's saved query string into the portal's memory */
+	query_string = MemoryContextStrdup(portal->portalContext,
+									   entry->plansource->query_string);
+
+	/* Replan if needed, and increment plan refcount for portal */
+	cplan = GetCachedPlan(entry->plansource, paramLI, NULL, NULL);
+	plan_list = cplan->stmt_list;
+
+	/*
+	 * DO NOT add any logic that could possibly throw an error between
+	 * GetCachedPlan and PortalDefineQuery, or you'll leak the plan refcount.
+	 */
+	PortalDefineQuery(portal,
+					  NULL,
+					  query_string,
+					  entry->plansource->commandTag,
+					  plan_list,
+					  cplan);
+
+	/*
+	 * For CREATE TABLE ... AS EXECUTE, we must verify that the prepared
+	 * statement is one that produces tuples.  Currently we insist that it be
+	 * a plain old SELECT.  In future we might consider supporting other
+	 * things such as INSERT ... RETURNING, but there are a couple of issues
+	 * to be settled first, notably how WITH NO DATA should be handled in such
+	 * a case (do we really want to suppress execution?) and how to pass down
+	 * the OID-determining eflags (PortalStart won't handle them in such a
+	 * case, and for that matter it's not clear the executor will either).
+	 *
+	 * For CREATE TABLE ... AS EXECUTE, we also have to ensure that the proper
+	 * eflags and fetch count are passed to PortalStart/PortalRun.
+	 */
+	if (intoClause)
+	{
+		PlannedStmt *pstmt;
+
+		if (list_length(plan_list) != 1)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("prepared statement is not a SELECT")));
+		pstmt = linitial_node(PlannedStmt, plan_list);
+		if (pstmt->commandType != CMD_SELECT)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("prepared statement is not a SELECT")));
+
+		/* Set appropriate eflags */
+		eflags = GetIntoRelEFlags(intoClause);
+
+		/* And tell PortalRun whether to run to completion or not */
+		if (intoClause->skipData)
+			count = 0;
+		else
+			count = FETCH_ALL;
+	}
+	else
+	{
+		/* Plain old EXECUTE */
+		eflags = 0;
+		count = FETCH_ALL;
+	}
+
+	/*
+	 * Run the portal as appropriate.
+	 */
+	PortalStart(portal, paramLI, eflags, GetActiveSnapshot());
+
+	(void) PortalRun(portal, count, false, true, dest, dest, qc);
+
+	PortalDrop(portal, false);
+
+	if (estate)
+		FreeExecutorState(estate);
+
+	/* No need to pfree other memory, MemoryContext will be reset */
+}
+
+/*
+ * EvaluateParams: evaluate a list of parameters.
+ *
+ * pstate: parse state
+ * pstmt: statement we are getting parameters for.
+ * params: list of given parameter expressions (raw parser output!)
+ * estate: executor state to use.
+ *
+ * Returns a filled-in ParamListInfo -- this can later be passed to
+ * CreateQueryDesc(), which allows the executor to make use of the parameters
+ * during query execution.
+ */
+static ParamListInfo
+EvaluateParams(ParseState *pstate, PreparedStatement *pstmt, List *params,
+			   EState *estate)
+{
+	Oid		   *param_types = pstmt->plansource->param_types;
+	int			num_params = pstmt->plansource->num_params;
+	int			nparams = list_length(params);
+	ParamListInfo paramLI;
+	List	   *exprstates;
+	ListCell   *l;
+	int			i;
+
+	if (nparams != num_params)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("wrong number of parameters for prepared statement \"%s\"",
+						pstmt->stmt_name),
+				 errdetail("Expected %d parameters but got %d.",
+						   num_params, nparams)));
+
+	/* Quick exit if no parameters */
+	if (num_params == 0)
+		return NULL;
+
+	/*
+	 * We have to run parse analysis for the expressions.  Since the parser is
+	 * not cool about scribbling on its input, copy first.
+	 */
+	params = copyObject(params);
+
+	i = 0;
+	foreach(l, params)
+	{
+		Node	   *expr = lfirst(l);
+		Oid			expected_type_id = param_types[i];
+		Oid			given_type_id;
+
+		expr = transformExpr(pstate, expr, EXPR_KIND_EXECUTE_PARAMETER);
+
+		given_type_id = exprType(expr);
+
+		expr = coerce_to_target_type(pstate, expr, given_type_id,
+									 expected_type_id, -1,
+									 COERCION_ASSIGNMENT,
+									 COERCE_IMPLICIT_CAST,
+									 -1);
+
+		if (expr == NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("parameter $%d of type %s cannot be coerced to the expected type %s",
+							i + 1,
+							format_type_be(given_type_id),
+							format_type_be(expected_type_id)),
+					 errhint("You will need to rewrite or cast the expression."),
+					 parser_errposition(pstate, exprLocation(lfirst(l)))));
+
+		/* Take care of collations in the finished expression. */
+		assign_expr_collations(pstate, expr);
+
+		lfirst(l) = expr;
+		i++;
+	}
+
+	/* Prepare the expressions for execution */
+	exprstates = ExecPrepareExprList(params, estate);
+
+	paramLI = makeParamList(num_params);
+
+	i = 0;
+	foreach(l, exprstates)
+	{
+		ExprState  *n = (ExprState *) lfirst(l);
+		ParamExternData *prm = &paramLI->params[i];
+
+		prm->ptype = param_types[i];
+		prm->pflags = PARAM_FLAG_CONST;
+		prm->value = ExecEvalExprSwitchContext(n,
+											   GetPerTupleExprContext(estate),
+											   &prm->isnull);
+
+		i++;
+	}
+
+	return paramLI;
+}
+
+
+/*
+ * Initialize query hash table upon first use.
+ */
+static void
+InitQueryHashTable(void)
+{
+	HASHCTL		hash_ctl;
+
+	hash_ctl.keysize = NAMEDATALEN;
+	hash_ctl.entrysize = sizeof(PreparedStatement);
+
+	prepared_queries = hash_create("Prepared Queries",
+								   32,
+								   &hash_ctl,
+								   HASH_ELEM | HASH_STRINGS);
+}
+
+/*
+ * Store all the data pertaining to a query in the hash table using
+ * the specified key.  The passed CachedPlanSource should be "unsaved"
+ * in case we get an error here; we'll save it once we've created the hash
+ * table entry.
+ */
+void
+StorePreparedStatement(const char *stmt_name,
+					   CachedPlanSource *plansource,
+					   bool from_sql)
+{
+	PreparedStatement *entry;
+	TimestampTz cur_ts = GetCurrentStatementStartTimestamp();
+	bool		found;
+
+	/* Initialize the hash table, if necessary */
+	if (!prepared_queries)
+		InitQueryHashTable();
+
+	/* Add entry to hash table */
+	entry = (PreparedStatement *) hash_search(prepared_queries,
+											  stmt_name,
+											  HASH_ENTER,
+											  &found);
+
+	/* Shouldn't get a duplicate entry */
+	if (found)
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_PSTATEMENT),
+				 errmsg("prepared statement \"%s\" already exists",
+						stmt_name)));
+
+	/* Fill in the hash table entry */
+	entry->plansource = plansource;
+	entry->from_sql = from_sql;
+	entry->prepare_time = cur_ts;
+
+	/* Now it's safe to move the CachedPlanSource to permanent memory */
+	SaveCachedPlan(plansource);
+}
+
+/*
+ * Lookup an existing query in the hash table. If the query does not
+ * actually exist, throw ereport(ERROR) or return NULL per second parameter.
+ *
+ * Note: this does not force the referenced plancache entry to be valid,
+ * since not all callers care.
+ */
+PreparedStatement *
+FetchPreparedStatement(const char *stmt_name, bool throwError)
+{
+	PreparedStatement *entry;
+
+	/*
+	 * If the hash table hasn't been initialized, it can't be storing
+	 * anything, therefore it couldn't possibly store our plan.
+	 */
+	if (prepared_queries)
+		entry = (PreparedStatement *) hash_search(prepared_queries,
+												  stmt_name,
+												  HASH_FIND,
+												  NULL);
+	else
+		entry = NULL;
+
+	if (!entry && throwError)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_PSTATEMENT),
+				 errmsg("prepared statement \"%s\" does not exist",
+						stmt_name)));
+
+	return entry;
+}
+
+/*
+ * Given a prepared statement, determine the result tupledesc it will
+ * produce.  Returns NULL if the execution will not return tuples.
+ *
+ * Note: the result is created or copied into current memory context.
+ */
+TupleDesc
+FetchPreparedStatementResultDesc(PreparedStatement *stmt)
+{
+	/*
+	 * Since we don't allow prepared statements' result tupdescs to change,
+	 * there's no need to worry about revalidating the cached plan here.
+	 */
+	Assert(stmt->plansource->fixed_result);
+	if (stmt->plansource->resultDesc)
+		return CreateTupleDescCopy(stmt->plansource->resultDesc);
+	else
+		return NULL;
+}
+
+/*
+ * Given a prepared statement that returns tuples, extract the query
+ * targetlist.  Returns NIL if the statement doesn't have a determinable
+ * targetlist.
+ *
+ * Note: this is pretty ugly, but since it's only used in corner cases like
+ * Describe Statement on an EXECUTE command, we don't worry too much about
+ * efficiency.
+ */
+List *
+FetchPreparedStatementTargetList(PreparedStatement *stmt)
+{
+	List	   *tlist;
+
+	/* Get the plan's primary targetlist */
+	tlist = CachedPlanGetTargetList(stmt->plansource, NULL);
+
+	/* Copy into caller's context in case plan gets invalidated */
+	return copyObject(tlist);
+}
+
+/*
+ * Implements the 'DEALLOCATE' utility statement: deletes the
+ * specified plan from storage.
+ */
+void
+DeallocateQuery(DeallocateStmt *stmt)
+{
+	if (stmt->name)
+		DropPreparedStatement(stmt->name, true);
+	else
+		DropAllPreparedStatements();
+}
+
+/*
+ * Internal version of DEALLOCATE
+ *
+ * If showError is false, dropping a nonexistent statement is a no-op.
+ */
+void
+DropPreparedStatement(const char *stmt_name, bool showError)
+{
+	PreparedStatement *entry;
+
+	/* Find the query's hash table entry; raise error if wanted */
+	entry = FetchPreparedStatement(stmt_name, showError);
+
+	if (entry)
+	{
+		/* Release the plancache entry */
+		DropCachedPlan(entry->plansource);
+
+		/* Now we can remove the hash table entry */
+		hash_search(prepared_queries, entry->stmt_name, HASH_REMOVE, NULL);
+	}
+}
+
+/*
+ * Drop all cached statements.
+ */
+void
+DropAllPreparedStatements(void)
+{
+	HASH_SEQ_STATUS seq;
+	PreparedStatement *entry;
+
+	/* nothing cached */
+	if (!prepared_queries)
+		return;
+
+	/* walk over cache */
+	hash_seq_init(&seq, prepared_queries);
+	while ((entry = hash_seq_search(&seq)) != NULL)
+	{
+		/* Release the plancache entry */
+		DropCachedPlan(entry->plansource);
+
+		/* Now we can remove the hash table entry */
+		hash_search(prepared_queries, entry->stmt_name, HASH_REMOVE, NULL);
+	}
+}
+
+/*
+ * Implements the 'EXPLAIN EXECUTE' utility statement.
+ *
+ * "into" is NULL unless we are doing EXPLAIN CREATE TABLE AS EXECUTE,
+ * in which case executing the query should result in creating that table.
+ *
+ * Note: the passed-in queryString is that of the EXPLAIN EXECUTE,
+ * not the original PREPARE; we get the latter string from the plancache.
+ */
+void
+ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
+					const char *queryString, ParamListInfo params,
+					QueryEnvironment *queryEnv)
+{
+	PreparedStatement *entry;
+	const char *query_string;
+	CachedPlan *cplan;
+	List	   *plan_list;
+	ListCell   *p;
+	ParamListInfo paramLI = NULL;
+	EState	   *estate = NULL;
+	instr_time	planstart;
+	instr_time	planduration;
+	BufferUsage bufusage_start,
+				bufusage;
+
+	if (es->buffers)
+		bufusage_start = pgBufferUsage;
+	INSTR_TIME_SET_CURRENT(planstart);
+
+	/* Look it up in the hash table */
+	entry = FetchPreparedStatement(execstmt->name, true);
+
+	/* Shouldn't find a non-fixed-result cached plan */
+	if (!entry->plansource->fixed_result)
+		elog(ERROR, "EXPLAIN EXECUTE does not support variable-result cached plans");
+
+	query_string = entry->plansource->query_string;
+
+	/* Evaluate parameters, if any */
+	if (entry->plansource->num_params)
+	{
+		ParseState *pstate;
+
+		pstate = make_parsestate(NULL);
+		pstate->p_sourcetext = queryString;
+
+		/*
+		 * Need an EState to evaluate parameters; must not delete it till end
+		 * of query, in case parameters are pass-by-reference.  Note that the
+		 * passed-in "params" could possibly be referenced in the parameter
+		 * expressions.
+		 */
+		estate = CreateExecutorState();
+		estate->es_param_list_info = params;
+
+		paramLI = EvaluateParams(pstate, entry, execstmt->params, estate);
+	}
+
+	/* Replan if needed, and acquire a transient refcount */
+	cplan = GetCachedPlan(entry->plansource, paramLI,
+						  CurrentResourceOwner, queryEnv);
+
+	INSTR_TIME_SET_CURRENT(planduration);
+	INSTR_TIME_SUBTRACT(planduration, planstart);
+
+	/* calc differences of buffer counters. */
+	if (es->buffers)
+	{
+		memset(&bufusage, 0, sizeof(BufferUsage));
+		BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
+	}
+
+	plan_list = cplan->stmt_list;
+
+	/* Explain each query */
+	foreach(p, plan_list)
+	{
+		PlannedStmt *pstmt = lfirst_node(PlannedStmt, p);
+
+		if (pstmt->commandType != CMD_UTILITY)
+			ExplainOnePlan(pstmt, into, es, query_string, paramLI, queryEnv,
+						   &planduration, (es->buffers ? &bufusage : NULL));
+		else
+			ExplainOneUtility(pstmt->utilityStmt, into, es, query_string,
+							  paramLI, queryEnv);
+
+		/* No need for CommandCounterIncrement, as ExplainOnePlan did it */
+
+		/* Separate plans with an appropriate separator */
+		if (lnext(plan_list, p) != NULL)
+			ExplainSeparatePlans(es);
+	}
+
+	if (estate)
+		FreeExecutorState(estate);
+
+	ReleaseCachedPlan(cplan, CurrentResourceOwner);
+}
+
+/*
+ * This set returning function reads all the prepared statements and
+ * returns a set of (name, statement, prepare_time, param_types, from_sql,
+ * generic_plans, custom_plans).
+ */
+Datum
+pg_prepared_statement(PG_FUNCTION_ARGS)
+{
+	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+
+	/*
+	 * We put all the tuples into a tuplestore in one scan of the hashtable.
+	 * This avoids any issue of the hashtable possibly changing between calls.
+	 */
+	InitMaterializedSRF(fcinfo, 0);
+
+	/* hash table might be uninitialized */
+	if (prepared_queries)
+	{
+		HASH_SEQ_STATUS hash_seq;
+		PreparedStatement *prep_stmt;
+
+		hash_seq_init(&hash_seq, prepared_queries);
+		while ((prep_stmt = hash_seq_search(&hash_seq)) != NULL)
+		{
+			Datum		values[7];
+			bool		nulls[7];
+
+			MemSet(nulls, 0, sizeof(nulls));
+
+			values[0] = CStringGetTextDatum(prep_stmt->stmt_name);
+			values[1] = CStringGetTextDatum(prep_stmt->plansource->query_string);
+			values[2] = TimestampTzGetDatum(prep_stmt->prepare_time);
+			values[3] = build_regtype_array(prep_stmt->plansource->param_types,
+											prep_stmt->plansource->num_params);
+			values[4] = BoolGetDatum(prep_stmt->from_sql);
+			values[5] = Int64GetDatumFast(prep_stmt->plansource->num_generic_plans);
+			values[6] = Int64GetDatumFast(prep_stmt->plansource->num_custom_plans);
+
+			tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+								 values, nulls);
+		}
+	}
+
+	return (Datum) 0;
+}
+
+/*
+ * This utility function takes a C array of Oids, and returns a Datum
+ * pointing to a one-dimensional Postgres array of regtypes. An empty
+ * array is returned as a zero-element array, not NULL.
+ */
+static Datum
+build_regtype_array(Oid *param_types, int num_params)
+{
+	Datum	   *tmp_ary;
+	ArrayType  *result;
+	int			i;
+
+	tmp_ary = (Datum *) palloc(num_params * sizeof(Datum));
+
+	for (i = 0; i < num_params; i++)
+		tmp_ary[i] = ObjectIdGetDatum(param_types[i]);
+
+	/* XXX: this hardcodes assumptions about the regtype type */
+	result = construct_array(tmp_ary, num_params, REGTYPEOID,
+							 4, true, TYPALIGN_INT);
+	return PointerGetDatum(result);
+}
diff --git a/src/backend/commands/proclang.c b/src/backend/commands/proclang.c
new file mode 100644
index 0000000..4a093f4
--- /dev/null
+++ b/src/backend/commands/proclang.c
@@ -0,0 +1,239 @@
+/*-------------------------------------------------------------------------
+ *
+ * proclang.c
+ *	  PostgreSQL LANGUAGE support code.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/proclang.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/table.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_language.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_type.h"
+#include "commands/defrem.h"
+#include "commands/proclang.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+/*
+ * CREATE LANGUAGE
+ */
+ObjectAddress
+CreateProceduralLanguage(CreatePLangStmt *stmt)
+{
+	const char *languageName = stmt->plname;
+	Oid			languageOwner = GetUserId();
+	Oid			handlerOid,
+				inlineOid,
+				valOid;
+	Oid			funcrettype;
+	Oid			funcargtypes[1];
+	Relation	rel;
+	TupleDesc	tupDesc;
+	Datum		values[Natts_pg_language];
+	bool		nulls[Natts_pg_language];
+	bool		replaces[Natts_pg_language];
+	NameData	langname;
+	HeapTuple	oldtup;
+	HeapTuple	tup;
+	Oid			langoid;
+	bool		is_update;
+	ObjectAddress myself,
+				referenced;
+	ObjectAddresses *addrs;
+
+	/*
+	 * Check permission
+	 */
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be superuser to create custom procedural language")));
+
+	/*
+	 * Lookup the PL handler function and check that it is of the expected
+	 * return type
+	 */
+	Assert(stmt->plhandler);
+	handlerOid = LookupFuncName(stmt->plhandler, 0, NULL, false);
+	funcrettype = get_func_rettype(handlerOid);
+	if (funcrettype != LANGUAGE_HANDLEROID)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("function %s must return type %s",
+						NameListToString(stmt->plhandler), "language_handler")));
+
+	/* validate the inline function */
+	if (stmt->plinline)
+	{
+		funcargtypes[0] = INTERNALOID;
+		inlineOid = LookupFuncName(stmt->plinline, 1, funcargtypes, false);
+		/* return value is ignored, so we don't check the type */
+	}
+	else
+		inlineOid = InvalidOid;
+
+	/* validate the validator function */
+	if (stmt->plvalidator)
+	{
+		funcargtypes[0] = OIDOID;
+		valOid = LookupFuncName(stmt->plvalidator, 1, funcargtypes, false);
+		/* return value is ignored, so we don't check the type */
+	}
+	else
+		valOid = InvalidOid;
+
+	/* ok to create it */
+	rel = table_open(LanguageRelationId, RowExclusiveLock);
+	tupDesc = RelationGetDescr(rel);
+
+	/* Prepare data to be inserted */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+	memset(replaces, true, sizeof(replaces));
+
+	namestrcpy(&langname, languageName);
+	values[Anum_pg_language_lanname - 1] = NameGetDatum(&langname);
+	values[Anum_pg_language_lanowner - 1] = ObjectIdGetDatum(languageOwner);
+	values[Anum_pg_language_lanispl - 1] = BoolGetDatum(true);
+	values[Anum_pg_language_lanpltrusted - 1] = BoolGetDatum(stmt->pltrusted);
+	values[Anum_pg_language_lanplcallfoid - 1] = ObjectIdGetDatum(handlerOid);
+	values[Anum_pg_language_laninline - 1] = ObjectIdGetDatum(inlineOid);
+	values[Anum_pg_language_lanvalidator - 1] = ObjectIdGetDatum(valOid);
+	nulls[Anum_pg_language_lanacl - 1] = true;
+
+	/* Check for pre-existing definition */
+	oldtup = SearchSysCache1(LANGNAME, PointerGetDatum(languageName));
+
+	if (HeapTupleIsValid(oldtup))
+	{
+		Form_pg_language oldform = (Form_pg_language) GETSTRUCT(oldtup);
+
+		/* There is one; okay to replace it? */
+		if (!stmt->replace)
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("language \"%s\" already exists", languageName)));
+
+		/* This is currently pointless, since we already checked superuser */
+#ifdef NOT_USED
+		if (!pg_language_ownercheck(oldform->oid, languageOwner))
+			aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_LANGUAGE,
+						   languageName);
+#endif
+
+		/*
+		 * Do not change existing oid, ownership or permissions.  Note
+		 * dependency-update code below has to agree with this decision.
+		 */
+		replaces[Anum_pg_language_oid - 1] = false;
+		replaces[Anum_pg_language_lanowner - 1] = false;
+		replaces[Anum_pg_language_lanacl - 1] = false;
+
+		/* Okay, do it... */
+		tup = heap_modify_tuple(oldtup, tupDesc, values, nulls, replaces);
+		CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+		langoid = oldform->oid;
+		ReleaseSysCache(oldtup);
+		is_update = true;
+	}
+	else
+	{
+		/* Creating a new language */
+		langoid = GetNewOidWithIndex(rel, LanguageOidIndexId,
+									 Anum_pg_language_oid);
+		values[Anum_pg_language_oid - 1] = ObjectIdGetDatum(langoid);
+		tup = heap_form_tuple(tupDesc, values, nulls);
+		CatalogTupleInsert(rel, tup);
+		is_update = false;
+	}
+
+	/*
+	 * Create dependencies for the new language.  If we are updating an
+	 * existing language, first delete any existing pg_depend entries.
+	 * (However, since we are not changing ownership or permissions, the
+	 * shared dependencies do *not* need to change, and we leave them alone.)
+	 */
+	myself.classId = LanguageRelationId;
+	myself.objectId = langoid;
+	myself.objectSubId = 0;
+
+	if (is_update)
+		deleteDependencyRecordsFor(myself.classId, myself.objectId, true);
+
+	/* dependency on owner of language */
+	if (!is_update)
+		recordDependencyOnOwner(myself.classId, myself.objectId,
+								languageOwner);
+
+	/* dependency on extension */
+	recordDependencyOnCurrentExtension(&myself, is_update);
+
+	addrs = new_object_addresses();
+
+	/* dependency on the PL handler function */
+	ObjectAddressSet(referenced, ProcedureRelationId, handlerOid);
+	add_exact_object_address(&referenced, addrs);
+
+	/* dependency on the inline handler function, if any */
+	if (OidIsValid(inlineOid))
+	{
+		ObjectAddressSet(referenced, ProcedureRelationId, inlineOid);
+		add_exact_object_address(&referenced, addrs);
+	}
+
+	/* dependency on the validator function, if any */
+	if (OidIsValid(valOid))
+	{
+		ObjectAddressSet(referenced, ProcedureRelationId, valOid);
+		add_exact_object_address(&referenced, addrs);
+	}
+
+	record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
+	free_object_addresses(addrs);
+
+	/* Post creation hook for new procedural language */
+	InvokeObjectPostCreateHook(LanguageRelationId, myself.objectId, 0);
+
+	table_close(rel, RowExclusiveLock);
+
+	return myself;
+}
+
+/*
+ * get_language_oid - given a language name, look up the OID
+ *
+ * If missing_ok is false, throw an error if language name not found.  If
+ * true, just return InvalidOid.
+ */
+Oid
+get_language_oid(const char *langname, bool missing_ok)
+{
+	Oid			oid;
+
+	oid = GetSysCacheOid1(LANGNAME, Anum_pg_language_oid,
+						  CStringGetDatum(langname));
+	if (!OidIsValid(oid) && !missing_ok)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("language \"%s\" does not exist", langname)));
+	return oid;
+}
diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
new file mode 100644
index 0000000..473c72e
--- /dev/null
+++ b/src/backend/commands/publicationcmds.c
@@ -0,0 +1,2006 @@
+/*-------------------------------------------------------------------------
+ *
+ * publicationcmds.c
+ *		publication manipulation
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *		src/backend/commands/publicationcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/objectaddress.h"
+#include "catalog/partition.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_publication.h"
+#include "catalog/pg_publication_namespace.h"
+#include "catalog/pg_publication_rel.h"
+#include "catalog/pg_type.h"
+#include "commands/dbcommands.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/publicationcmds.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/parse_clause.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_relation.h"
+#include "storage/lmgr.h"
+#include "utils/acl.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/catcache.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/varlena.h"
+
+
+/*
+ * Information used to validate the columns in the row filter expression. See
+ * contain_invalid_rfcolumn_walker for details.
+ */
+typedef struct rf_context
+{
+	Bitmapset  *bms_replident;	/* bitset of replica identity columns */
+	bool		pubviaroot;		/* true if we are validating the parent
+								 * relation's row filter */
+	Oid			relid;			/* relid of the relation */
+	Oid			parentid;		/* relid of the parent relation */
+} rf_context;
+
+static List *OpenTableList(List *tables);
+static void CloseTableList(List *rels);
+static void LockSchemaList(List *schemalist);
+static void PublicationAddTables(Oid pubid, List *rels, bool if_not_exists,
+								 AlterPublicationStmt *stmt);
+static void PublicationDropTables(Oid pubid, List *rels, bool missing_ok);
+static void PublicationAddSchemas(Oid pubid, List *schemas, bool if_not_exists,
+								  AlterPublicationStmt *stmt);
+static void PublicationDropSchemas(Oid pubid, List *schemas, bool missing_ok);
+
+
+static void
+parse_publication_options(ParseState *pstate,
+						  List *options,
+						  bool *publish_given,
+						  PublicationActions *pubactions,
+						  bool *publish_via_partition_root_given,
+						  bool *publish_via_partition_root)
+{
+	ListCell   *lc;
+
+	*publish_given = false;
+	*publish_via_partition_root_given = false;
+
+	/* defaults */
+	pubactions->pubinsert = true;
+	pubactions->pubupdate = true;
+	pubactions->pubdelete = true;
+	pubactions->pubtruncate = true;
+	*publish_via_partition_root = false;
+
+	/* Parse options */
+	foreach(lc, options)
+	{
+		DefElem    *defel = (DefElem *) lfirst(lc);
+
+		if (strcmp(defel->defname, "publish") == 0)
+		{
+			char	   *publish;
+			List	   *publish_list;
+			ListCell   *lc;
+
+			if (*publish_given)
+				errorConflictingDefElem(defel, pstate);
+
+			/*
+			 * If publish option was given only the explicitly listed actions
+			 * should be published.
+			 */
+			pubactions->pubinsert = false;
+			pubactions->pubupdate = false;
+			pubactions->pubdelete = false;
+			pubactions->pubtruncate = false;
+
+			*publish_given = true;
+			publish = defGetString(defel);
+
+			if (!SplitIdentifierString(publish, ',', &publish_list))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("invalid list syntax in parameter \"%s\"",
+								"publish")));
+
+			/* Process the option list. */
+			foreach(lc, publish_list)
+			{
+				char	   *publish_opt = (char *) lfirst(lc);
+
+				if (strcmp(publish_opt, "insert") == 0)
+					pubactions->pubinsert = true;
+				else if (strcmp(publish_opt, "update") == 0)
+					pubactions->pubupdate = true;
+				else if (strcmp(publish_opt, "delete") == 0)
+					pubactions->pubdelete = true;
+				else if (strcmp(publish_opt, "truncate") == 0)
+					pubactions->pubtruncate = true;
+				else
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("unrecognized value for publication option \"%s\": \"%s\"",
+									"publish", publish_opt)));
+			}
+		}
+		else if (strcmp(defel->defname, "publish_via_partition_root") == 0)
+		{
+			if (*publish_via_partition_root_given)
+				errorConflictingDefElem(defel, pstate);
+			*publish_via_partition_root_given = true;
+			*publish_via_partition_root = defGetBoolean(defel);
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized publication parameter: \"%s\"", defel->defname)));
+	}
+}
+
+/*
+ * Convert the PublicationObjSpecType list into schema oid list and
+ * PublicationTable list.
+ */
+static void
+ObjectsInPublicationToOids(List *pubobjspec_list, ParseState *pstate,
+						   List **rels, List **schemas)
+{
+	ListCell   *cell;
+	PublicationObjSpec *pubobj;
+
+	if (!pubobjspec_list)
+		return;
+
+	foreach(cell, pubobjspec_list)
+	{
+		Oid			schemaid;
+		List	   *search_path;
+
+		pubobj = (PublicationObjSpec *) lfirst(cell);
+
+		switch (pubobj->pubobjtype)
+		{
+			case PUBLICATIONOBJ_TABLE:
+				*rels = lappend(*rels, pubobj->pubtable);
+				break;
+			case PUBLICATIONOBJ_TABLES_IN_SCHEMA:
+				schemaid = get_namespace_oid(pubobj->name, false);
+
+				/* Filter out duplicates if user specifies "sch1, sch1" */
+				*schemas = list_append_unique_oid(*schemas, schemaid);
+				break;
+			case PUBLICATIONOBJ_TABLES_IN_CUR_SCHEMA:
+				search_path = fetch_search_path(false);
+				if (search_path == NIL) /* nothing valid in search_path? */
+					ereport(ERROR,
+							errcode(ERRCODE_UNDEFINED_SCHEMA),
+							errmsg("no schema has been selected for CURRENT_SCHEMA"));
+
+				schemaid = linitial_oid(search_path);
+				list_free(search_path);
+
+				/* Filter out duplicates if user specifies "sch1, sch1" */
+				*schemas = list_append_unique_oid(*schemas, schemaid);
+				break;
+			default:
+				/* shouldn't happen */
+				elog(ERROR, "invalid publication object type %d", pubobj->pubobjtype);
+				break;
+		}
+	}
+}
+
+/*
+ * Returns true if any of the columns used in the row filter WHERE expression is
+ * not part of REPLICA IDENTITY, false otherwise.
+ */
+static bool
+contain_invalid_rfcolumn_walker(Node *node, rf_context *context)
+{
+	if (node == NULL)
+		return false;
+
+	if (IsA(node, Var))
+	{
+		Var		   *var = (Var *) node;
+		AttrNumber	attnum = var->varattno;
+
+		/*
+		 * If pubviaroot is true, we are validating the row filter of the
+		 * parent table, but the bitmap contains the replica identity
+		 * information of the child table. So, get the column number of the
+		 * child table as parent and child column order could be different.
+		 */
+		if (context->pubviaroot)
+		{
+			char	   *colname = get_attname(context->parentid, attnum, false);
+
+			attnum = get_attnum(context->relid, colname);
+		}
+
+		if (!bms_is_member(attnum - FirstLowInvalidHeapAttributeNumber,
+						   context->bms_replident))
+			return true;
+	}
+
+	return expression_tree_walker(node, contain_invalid_rfcolumn_walker,
+								  (void *) context);
+}
+
+/*
+ * Check if all columns referenced in the filter expression are part of the
+ * REPLICA IDENTITY index or not.
+ *
+ * Returns true if any invalid column is found.
+ */
+bool
+pub_rf_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
+							   bool pubviaroot)
+{
+	HeapTuple	rftuple;
+	Oid			relid = RelationGetRelid(relation);
+	Oid			publish_as_relid = RelationGetRelid(relation);
+	bool		result = false;
+	Datum		rfdatum;
+	bool		rfisnull;
+
+	/*
+	 * FULL means all columns are in the REPLICA IDENTITY, so all columns are
+	 * allowed in the row filter and we can skip the validation.
+	 */
+	if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+		return false;
+
+	/*
+	 * For a partition, if pubviaroot is true, find the topmost ancestor that
+	 * is published via this publication as we need to use its row filter
+	 * expression to filter the partition's changes.
+	 *
+	 * Note that even though the row filter used is for an ancestor, the
+	 * REPLICA IDENTITY used will be for the actual child table.
+	 */
+	if (pubviaroot && relation->rd_rel->relispartition)
+	{
+		publish_as_relid
+			= GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+		if (!OidIsValid(publish_as_relid))
+			publish_as_relid = relid;
+	}
+
+	rftuple = SearchSysCache2(PUBLICATIONRELMAP,
+							  ObjectIdGetDatum(publish_as_relid),
+							  ObjectIdGetDatum(pubid));
+
+	if (!HeapTupleIsValid(rftuple))
+		return false;
+
+	rfdatum = SysCacheGetAttr(PUBLICATIONRELMAP, rftuple,
+							  Anum_pg_publication_rel_prqual,
+							  &rfisnull);
+
+	if (!rfisnull)
+	{
+		rf_context	context = {0};
+		Node	   *rfnode;
+		Bitmapset  *bms = NULL;
+
+		context.pubviaroot = pubviaroot;
+		context.parentid = publish_as_relid;
+		context.relid = relid;
+
+		/* Remember columns that are part of the REPLICA IDENTITY */
+		bms = RelationGetIndexAttrBitmap(relation,
+										 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+		context.bms_replident = bms;
+		rfnode = stringToNode(TextDatumGetCString(rfdatum));
+		result = contain_invalid_rfcolumn_walker(rfnode, &context);
+	}
+
+	ReleaseSysCache(rftuple);
+
+	return result;
+}
+
+/*
+ * Check if all columns referenced in the REPLICA IDENTITY are covered by
+ * the column list.
+ *
+ * Returns true if any replica identity column is not covered by column list.
+ */
+bool
+pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
+									bool pubviaroot)
+{
+	HeapTuple	tuple;
+	Oid			relid = RelationGetRelid(relation);
+	Oid			publish_as_relid = RelationGetRelid(relation);
+	bool		result = false;
+	Datum		datum;
+	bool		isnull;
+
+	/*
+	 * For a partition, if pubviaroot is true, find the topmost ancestor that
+	 * is published via this publication as we need to use its column list for
+	 * the changes.
+	 *
+	 * Note that even though the column list used is for an ancestor, the
+	 * REPLICA IDENTITY used will be for the actual child table.
+	 */
+	if (pubviaroot && relation->rd_rel->relispartition)
+	{
+		publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+		if (!OidIsValid(publish_as_relid))
+			publish_as_relid = relid;
+	}
+
+	tuple = SearchSysCache2(PUBLICATIONRELMAP,
+							ObjectIdGetDatum(publish_as_relid),
+							ObjectIdGetDatum(pubid));
+
+	if (!HeapTupleIsValid(tuple))
+		return false;
+
+	datum = SysCacheGetAttr(PUBLICATIONRELMAP, tuple,
+							Anum_pg_publication_rel_prattrs,
+							&isnull);
+
+	if (!isnull)
+	{
+		int			x;
+		Bitmapset  *idattrs;
+		Bitmapset  *columns = NULL;
+
+		/* With REPLICA IDENTITY FULL, no column list is allowed. */
+		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+			result = true;
+
+		/* Transform the column list datum to a bitmapset. */
+		columns = pub_collist_to_bitmapset(NULL, datum, NULL);
+
+		/* Remember columns that are part of the REPLICA IDENTITY */
+		idattrs = RelationGetIndexAttrBitmap(relation,
+											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+		/*
+		 * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are
+		 * offset (to handle system columns the usual way), while column list
+		 * does not use offset, so we can't do bms_is_subset(). Instead, we
+		 * have to loop over the idattrs and check all of them are in the
+		 * list.
+		 */
+		x = -1;
+		while ((x = bms_next_member(idattrs, x)) >= 0)
+		{
+			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+
+			/*
+			 * If pubviaroot is true, we are validating the column list of the
+			 * parent table, but the bitmap contains the replica identity
+			 * information of the child table. The parent/child attnums may
+			 * not match, so translate them to the parent - get the attname
+			 * from the child, and look it up in the parent.
+			 */
+			if (pubviaroot)
+			{
+				/* attribute name in the child table */
+				char	   *colname = get_attname(relid, attnum, false);
+
+				/*
+				 * Determine the attnum for the attribute name in parent (we
+				 * are using the column list defined on the parent).
+				 */
+				attnum = get_attnum(publish_as_relid, colname);
+			}
+
+			/* replica identity column, not covered by the column list */
+			if (!bms_is_member(attnum, columns))
+			{
+				result = true;
+				break;
+			}
+		}
+
+		bms_free(idattrs);
+		bms_free(columns);
+	}
+
+	ReleaseSysCache(tuple);
+
+	return result;
+}
+
+/* check_functions_in_node callback */
+static bool
+contain_mutable_or_user_functions_checker(Oid func_id, void *context)
+{
+	return (func_volatile(func_id) != PROVOLATILE_IMMUTABLE ||
+			func_id >= FirstNormalObjectId);
+}
+
+/*
+ * The row filter walker checks if the row filter expression is a "simple
+ * expression".
+ *
+ * It allows only simple or compound expressions such as:
+ * - (Var Op Const)
+ * - (Var Op Var)
+ * - (Var Op Const) AND/OR (Var Op Const)
+ * - etc
+ * (where Var is a column of the table this filter belongs to)
+ *
+ * The simple expression has the following restrictions:
+ * - User-defined operators are not allowed;
+ * - User-defined functions are not allowed;
+ * - User-defined types are not allowed;
+ * - User-defined collations are not allowed;
+ * - Non-immutable built-in functions are not allowed;
+ * - System columns are not allowed.
+ *
+ * NOTES
+ *
+ * We don't allow user-defined functions/operators/types/collations because
+ * (a) if a user drops a user-defined object used in a row filter expression or
+ * if there is any other error while using it, the logical decoding
+ * infrastructure won't be able to recover from such an error even if the
+ * object is recreated again because a historic snapshot is used to evaluate
+ * the row filter;
+ * (b) a user-defined function can be used to access tables that could have
+ * unpleasant results because a historic snapshot is used. That's why only
+ * immutable built-in functions are allowed in row filter expressions.
+ *
+ * We don't allow system columns because currently, we don't have that
+ * information in the tuple passed to downstream. Also, as we don't replicate
+ * those to subscribers, there doesn't seem to be a need for a filter on those
+ * columns.
+ *
+ * We can allow other node types after more analysis and testing.
+ */
+static bool
+check_simple_rowfilter_expr_walker(Node *node, ParseState *pstate)
+{
+	char	   *errdetail_msg = NULL;
+
+	if (node == NULL)
+		return false;
+
+	switch (nodeTag(node))
+	{
+		case T_Var:
+			/* System columns are not allowed. */
+			if (((Var *) node)->varattno < InvalidAttrNumber)
+				errdetail_msg = _("System columns are not allowed.");
+			break;
+		case T_OpExpr:
+		case T_DistinctExpr:
+		case T_NullIfExpr:
+			/* OK, except user-defined operators are not allowed. */
+			if (((OpExpr *) node)->opno >= FirstNormalObjectId)
+				errdetail_msg = _("User-defined operators are not allowed.");
+			break;
+		case T_ScalarArrayOpExpr:
+			/* OK, except user-defined operators are not allowed. */
+			if (((ScalarArrayOpExpr *) node)->opno >= FirstNormalObjectId)
+				errdetail_msg = _("User-defined operators are not allowed.");
+
+			/*
+			 * We don't need to check the hashfuncid and negfuncid of
+			 * ScalarArrayOpExpr as those functions are only built for a
+			 * subquery.
+			 */
+			break;
+		case T_RowCompareExpr:
+			{
+				ListCell   *opid;
+
+				/* OK, except user-defined operators are not allowed. */
+				foreach(opid, ((RowCompareExpr *) node)->opnos)
+				{
+					if (lfirst_oid(opid) >= FirstNormalObjectId)
+					{
+						errdetail_msg = _("User-defined operators are not allowed.");
+						break;
+					}
+				}
+			}
+			break;
+		case T_Const:
+		case T_FuncExpr:
+		case T_BoolExpr:
+		case T_RelabelType:
+		case T_CollateExpr:
+		case T_CaseExpr:
+		case T_CaseTestExpr:
+		case T_ArrayExpr:
+		case T_RowExpr:
+		case T_CoalesceExpr:
+		case T_MinMaxExpr:
+		case T_XmlExpr:
+		case T_NullTest:
+		case T_BooleanTest:
+		case T_List:
+			/* OK, supported */
+			break;
+		default:
+			errdetail_msg = _("Only columns, constants, built-in operators, built-in data types, built-in collations, and immutable built-in functions are allowed.");
+			break;
+	}
+
+	/*
+	 * For all the supported nodes, if we haven't already found a problem,
+	 * check the types, functions, and collations used in it.  We check List
+	 * by walking through each element.
+	 */
+	if (!errdetail_msg && !IsA(node, List))
+	{
+		if (exprType(node) >= FirstNormalObjectId)
+			errdetail_msg = _("User-defined types are not allowed.");
+		else if (check_functions_in_node(node, contain_mutable_or_user_functions_checker,
+										 (void *) pstate))
+			errdetail_msg = _("User-defined or built-in mutable functions are not allowed.");
+		else if (exprCollation(node) >= FirstNormalObjectId ||
+				 exprInputCollation(node) >= FirstNormalObjectId)
+			errdetail_msg = _("User-defined collations are not allowed.");
+	}
+
+	/*
+	 * If we found a problem in this node, throw error now. Otherwise keep
+	 * going.
+	 */
+	if (errdetail_msg)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("invalid publication WHERE expression"),
+				 errdetail_internal("%s", errdetail_msg),
+				 parser_errposition(pstate, exprLocation(node))));
+
+	return expression_tree_walker(node, check_simple_rowfilter_expr_walker,
+								  (void *) pstate);
+}
+
+/*
+ * Check if the row filter expression is a "simple expression".
+ *
+ * See check_simple_rowfilter_expr_walker for details.
+ */
+static bool
+check_simple_rowfilter_expr(Node *node, ParseState *pstate)
+{
+	return check_simple_rowfilter_expr_walker(node, pstate);
+}
+
+/*
+ * Transform the publication WHERE expression for all the relations in the list,
+ * ensuring it is coerced to boolean and necessary collation information is
+ * added if required, and add a new nsitem/RTE for the associated relation to
+ * the ParseState's namespace list.
+ *
+ * Also check the publication row filter expression and throw an error if
+ * anything not permitted or unexpected is encountered.
+ */
+static void
+TransformPubWhereClauses(List *tables, const char *queryString,
+						 bool pubviaroot)
+{
+	ListCell   *lc;
+
+	foreach(lc, tables)
+	{
+		ParseNamespaceItem *nsitem;
+		Node	   *whereclause = NULL;
+		ParseState *pstate;
+		PublicationRelInfo *pri = (PublicationRelInfo *) lfirst(lc);
+
+		if (pri->whereClause == NULL)
+			continue;
+
+		/*
+		 * If the publication doesn't publish changes via the root partitioned
+		 * table, the partition's row filter will be used. So disallow using
+		 * WHERE clause on partitioned table in this case.
+		 */
+		if (!pubviaroot &&
+			pri->relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("cannot use publication WHERE clause for relation \"%s\"",
+							RelationGetRelationName(pri->relation)),
+					 errdetail("WHERE clause cannot be used for a partitioned table when %s is false.",
+							   "publish_via_partition_root")));
+
+		/*
+		 * A fresh pstate is required so that we only have "this" table in its
+		 * rangetable
+		 */
+		pstate = make_parsestate(NULL);
+		pstate->p_sourcetext = queryString;
+		nsitem = addRangeTableEntryForRelation(pstate, pri->relation,
+											   AccessShareLock, NULL,
+											   false, false);
+		addNSItemToQuery(pstate, nsitem, false, true, true);
+
+		whereclause = transformWhereClause(pstate,
+										   copyObject(pri->whereClause),
+										   EXPR_KIND_WHERE,
+										   "PUBLICATION WHERE");
+
+		/* Fix up collation information */
+		assign_expr_collations(pstate, whereclause);
+
+		/*
+		 * We allow only simple expressions in row filters. See
+		 * check_simple_rowfilter_expr_walker.
+		 */
+		check_simple_rowfilter_expr(whereclause, pstate);
+
+		free_parsestate(pstate);
+
+		pri->whereClause = whereclause;
+	}
+}
+
+
+/*
+ * Given a list of tables that are going to be added to a publication,
+ * verify that they fulfill the necessary preconditions, namely: no tables
+ * have a column list if any schema is published; and partitioned tables do
+ * not have column lists if publish_via_partition_root is not set.
+ *
+ * 'publish_schema' indicates that the publication contains any TABLES IN
+ * SCHEMA elements (newly added in this command, or preexisting).
+ * 'pubviaroot' is the value of publish_via_partition_root.
+ */
+static void
+CheckPubRelationColumnList(char *pubname, List *tables,
+						   bool publish_schema, bool pubviaroot)
+{
+	ListCell   *lc;
+
+	foreach(lc, tables)
+	{
+		PublicationRelInfo *pri = (PublicationRelInfo *) lfirst(lc);
+
+		if (pri->columns == NIL)
+			continue;
+
+		/*
+		 * Disallow specifying column list if any schema is in the
+		 * publication.
+		 *
+		 * XXX We could instead just forbid the case when the publication
+		 * tries to publish the table with a column list and a schema for that
+		 * table. However, if we do that then we need a restriction during
+		 * ALTER TABLE ... SET SCHEMA to prevent such a case which doesn't
+		 * seem to be a good idea.
+		 */
+		if (publish_schema)
+			ereport(ERROR,
+					errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					errmsg("cannot use column list for relation \"%s.%s\" in publication \"%s\"",
+						   get_namespace_name(RelationGetNamespace(pri->relation)),
+						   RelationGetRelationName(pri->relation), pubname),
+					errdetail("Column lists cannot be specified in publications containing FOR TABLES IN SCHEMA elements."));
+
+		/*
+		 * If the publication doesn't publish changes via the root partitioned
+		 * table, the partition's column list will be used. So disallow using
+		 * a column list on the partitioned table in this case.
+		 */
+		if (!pubviaroot &&
+			pri->relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("cannot use column list for relation \"%s.%s\" in publication \"%s\"",
+							get_namespace_name(RelationGetNamespace(pri->relation)),
+							RelationGetRelationName(pri->relation), pubname),
+					 errdetail("Column lists cannot be specified for partitioned tables when %s is false.",
+							   "publish_via_partition_root")));
+	}
+}
+
+/*
+ * Create new publication.
+ */
+ObjectAddress
+CreatePublication(ParseState *pstate, CreatePublicationStmt *stmt)
+{
+	Relation	rel;
+	ObjectAddress myself;
+	Oid			puboid;
+	bool		nulls[Natts_pg_publication];
+	Datum		values[Natts_pg_publication];
+	HeapTuple	tup;
+	bool		publish_given;
+	PublicationActions pubactions;
+	bool		publish_via_partition_root_given;
+	bool		publish_via_partition_root;
+	AclResult	aclresult;
+	List	   *relations = NIL;
+	List	   *schemaidlist = NIL;
+
+	/* must have CREATE privilege on database */
+	aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_DATABASE,
+					   get_database_name(MyDatabaseId));
+
+	/* FOR ALL TABLES requires superuser */
+	if (stmt->for_all_tables && !superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be superuser to create FOR ALL TABLES publication")));
+
+	rel = table_open(PublicationRelationId, RowExclusiveLock);
+
+	/* Check if name is used */
+	puboid = GetSysCacheOid1(PUBLICATIONNAME, Anum_pg_publication_oid,
+							 CStringGetDatum(stmt->pubname));
+	if (OidIsValid(puboid))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("publication \"%s\" already exists",
+						stmt->pubname)));
+
+	/* Form a tuple. */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	values[Anum_pg_publication_pubname - 1] =
+		DirectFunctionCall1(namein, CStringGetDatum(stmt->pubname));
+	values[Anum_pg_publication_pubowner - 1] = ObjectIdGetDatum(GetUserId());
+
+	parse_publication_options(pstate,
+							  stmt->options,
+							  &publish_given, &pubactions,
+							  &publish_via_partition_root_given,
+							  &publish_via_partition_root);
+
+	puboid = GetNewOidWithIndex(rel, PublicationObjectIndexId,
+								Anum_pg_publication_oid);
+	values[Anum_pg_publication_oid - 1] = ObjectIdGetDatum(puboid);
+	values[Anum_pg_publication_puballtables - 1] =
+		BoolGetDatum(stmt->for_all_tables);
+	values[Anum_pg_publication_pubinsert - 1] =
+		BoolGetDatum(pubactions.pubinsert);
+	values[Anum_pg_publication_pubupdate - 1] =
+		BoolGetDatum(pubactions.pubupdate);
+	values[Anum_pg_publication_pubdelete - 1] =
+		BoolGetDatum(pubactions.pubdelete);
+	values[Anum_pg_publication_pubtruncate - 1] =
+		BoolGetDatum(pubactions.pubtruncate);
+	values[Anum_pg_publication_pubviaroot - 1] =
+		BoolGetDatum(publish_via_partition_root);
+
+	tup = heap_form_tuple(RelationGetDescr(rel), values, nulls);
+
+	/* Insert tuple into catalog. */
+	CatalogTupleInsert(rel, tup);
+	heap_freetuple(tup);
+
+	recordDependencyOnOwner(PublicationRelationId, puboid, GetUserId());
+
+	ObjectAddressSet(myself, PublicationRelationId, puboid);
+
+	/* Make the changes visible. */
+	CommandCounterIncrement();
+
+	/* Associate objects with the publication. */
+	if (stmt->for_all_tables)
+	{
+		/* Invalidate relcache so that publication info is rebuilt. */
+		CacheInvalidateRelcacheAll();
+	}
+	else
+	{
+		ObjectsInPublicationToOids(stmt->pubobjects, pstate, &relations,
+								   &schemaidlist);
+
+		/* FOR TABLES IN SCHEMA requires superuser */
+		if (schemaidlist != NIL && !superuser())
+			ereport(ERROR,
+					errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					errmsg("must be superuser to create FOR TABLES IN SCHEMA publication"));
+
+		if (list_length(relations) > 0)
+		{
+			List	   *rels;
+
+			rels = OpenTableList(relations);
+			TransformPubWhereClauses(rels, pstate->p_sourcetext,
+									 publish_via_partition_root);
+
+			CheckPubRelationColumnList(stmt->pubname, rels,
+									   schemaidlist != NIL,
+									   publish_via_partition_root);
+
+			PublicationAddTables(puboid, rels, true, NULL);
+			CloseTableList(rels);
+		}
+
+		if (list_length(schemaidlist) > 0)
+		{
+			/*
+			 * Schema lock is held until the publication is created to prevent
+			 * concurrent schema deletion.
+			 */
+			LockSchemaList(schemaidlist);
+			PublicationAddSchemas(puboid, schemaidlist, true, NULL);
+		}
+	}
+
+	table_close(rel, RowExclusiveLock);
+
+	InvokeObjectPostCreateHook(PublicationRelationId, puboid, 0);
+
+	if (wal_level != WAL_LEVEL_LOGICAL)
+		ereport(WARNING,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("wal_level is insufficient to publish logical changes"),
+				 errhint("Set wal_level to \"logical\" before creating subscriptions.")));
+
+	return myself;
+}
+
+/*
+ * Change options of a publication.
+ */
+static void
+AlterPublicationOptions(ParseState *pstate, AlterPublicationStmt *stmt,
+						Relation rel, HeapTuple tup)
+{
+	bool		nulls[Natts_pg_publication];
+	bool		replaces[Natts_pg_publication];
+	Datum		values[Natts_pg_publication];
+	bool		publish_given;
+	PublicationActions pubactions;
+	bool		publish_via_partition_root_given;
+	bool		publish_via_partition_root;
+	ObjectAddress obj;
+	Form_pg_publication pubform;
+	List	   *root_relids = NIL;
+	ListCell   *lc;
+
+	parse_publication_options(pstate,
+							  stmt->options,
+							  &publish_given, &pubactions,
+							  &publish_via_partition_root_given,
+							  &publish_via_partition_root);
+
+	pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+	/*
+	 * If the publication doesn't publish changes via the root partitioned
+	 * table, the partition's row filter and column list will be used. So
+	 * disallow using WHERE clause and column lists on partitioned table in
+	 * this case.
+	 */
+	if (!pubform->puballtables && publish_via_partition_root_given &&
+		!publish_via_partition_root)
+	{
+		/*
+		 * Lock the publication so nobody else can do anything with it. This
+		 * prevents concurrent alter to add partitioned table(s) with WHERE
+		 * clause(s) and/or column lists which we don't allow when not
+		 * publishing via root.
+		 */
+		LockDatabaseObject(PublicationRelationId, pubform->oid, 0,
+						   AccessShareLock);
+
+		root_relids = GetPublicationRelations(pubform->oid,
+											  PUBLICATION_PART_ROOT);
+
+		foreach(lc, root_relids)
+		{
+			Oid			relid = lfirst_oid(lc);
+			HeapTuple	rftuple;
+			char		relkind;
+			char	   *relname;
+			bool		has_rowfilter;
+			bool		has_collist;
+
+			/*
+			 * Beware: we don't have lock on the relations, so cope silently
+			 * with the cache lookups returning NULL.
+			 */
+
+			rftuple = SearchSysCache2(PUBLICATIONRELMAP,
+									  ObjectIdGetDatum(relid),
+									  ObjectIdGetDatum(pubform->oid));
+			if (!HeapTupleIsValid(rftuple))
+				continue;
+			has_rowfilter = !heap_attisnull(rftuple, Anum_pg_publication_rel_prqual, NULL);
+			has_collist = !heap_attisnull(rftuple, Anum_pg_publication_rel_prattrs, NULL);
+			if (!has_rowfilter && !has_collist)
+			{
+				ReleaseSysCache(rftuple);
+				continue;
+			}
+
+			relkind = get_rel_relkind(relid);
+			if (relkind != RELKIND_PARTITIONED_TABLE)
+			{
+				ReleaseSysCache(rftuple);
+				continue;
+			}
+			relname = get_rel_name(relid);
+			if (relname == NULL)	/* table concurrently dropped */
+			{
+				ReleaseSysCache(rftuple);
+				continue;
+			}
+
+			if (has_rowfilter)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("cannot set parameter \"%s\" to false for publication \"%s\"",
+								"publish_via_partition_root",
+								stmt->pubname),
+						 errdetail("The publication contains a WHERE clause for partitioned table \"%s\", which is not allowed when \"%s\" is false.",
+								   relname, "publish_via_partition_root")));
+			Assert(has_collist);
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("cannot set parameter \"%s\" to false for publication \"%s\"",
+							"publish_via_partition_root",
+							stmt->pubname),
+					 errdetail("The publication contains a column list for partitioned table \"%s\", which is not allowed when \"%s\" is false.",
+							   relname, "publish_via_partition_root")));
+		}
+	}
+
+	/* Everything ok, form a new tuple. */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+	memset(replaces, false, sizeof(replaces));
+
+	if (publish_given)
+	{
+		values[Anum_pg_publication_pubinsert - 1] = BoolGetDatum(pubactions.pubinsert);
+		replaces[Anum_pg_publication_pubinsert - 1] = true;
+
+		values[Anum_pg_publication_pubupdate - 1] = BoolGetDatum(pubactions.pubupdate);
+		replaces[Anum_pg_publication_pubupdate - 1] = true;
+
+		values[Anum_pg_publication_pubdelete - 1] = BoolGetDatum(pubactions.pubdelete);
+		replaces[Anum_pg_publication_pubdelete - 1] = true;
+
+		values[Anum_pg_publication_pubtruncate - 1] = BoolGetDatum(pubactions.pubtruncate);
+		replaces[Anum_pg_publication_pubtruncate - 1] = true;
+	}
+
+	if (publish_via_partition_root_given)
+	{
+		values[Anum_pg_publication_pubviaroot - 1] = BoolGetDatum(publish_via_partition_root);
+		replaces[Anum_pg_publication_pubviaroot - 1] = true;
+	}
+
+	tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls,
+							replaces);
+
+	/* Update the catalog. */
+	CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+	CommandCounterIncrement();
+
+	pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+	/* Invalidate the relcache. */
+	if (pubform->puballtables)
+	{
+		CacheInvalidateRelcacheAll();
+	}
+	else
+	{
+		List	   *relids = NIL;
+		List	   *schemarelids = NIL;
+
+		/*
+		 * For any partitioned tables contained in the publication, we must
+		 * invalidate all partitions contained in the respective partition
+		 * trees, not just those explicitly mentioned in the publication.
+		 */
+		if (root_relids == NIL)
+			relids = GetPublicationRelations(pubform->oid,
+											 PUBLICATION_PART_ALL);
+		else
+		{
+			/*
+			 * We already got tables explicitly mentioned in the publication.
+			 * Now get all partitions for the partitioned table in the list.
+			 */
+			foreach(lc, root_relids)
+				relids = GetPubPartitionOptionRelations(relids,
+														PUBLICATION_PART_ALL,
+														lfirst_oid(lc));
+		}
+
+		schemarelids = GetAllSchemaPublicationRelations(pubform->oid,
+														PUBLICATION_PART_ALL);
+		relids = list_concat_unique_oid(relids, schemarelids);
+
+		InvalidatePublicationRels(relids);
+	}
+
+	ObjectAddressSet(obj, PublicationRelationId, pubform->oid);
+	EventTriggerCollectSimpleCommand(obj, InvalidObjectAddress,
+									 (Node *) stmt);
+
+	InvokeObjectPostAlterHook(PublicationRelationId, pubform->oid, 0);
+}
+
+/*
+ * Invalidate the relations.
+ */
+void
+InvalidatePublicationRels(List *relids)
+{
+	/*
+	 * We don't want to send too many individual messages, at some point it's
+	 * cheaper to just reset whole relcache.
+	 */
+	if (list_length(relids) < MAX_RELCACHE_INVAL_MSGS)
+	{
+		ListCell   *lc;
+
+		foreach(lc, relids)
+			CacheInvalidateRelcacheByRelid(lfirst_oid(lc));
+	}
+	else
+		CacheInvalidateRelcacheAll();
+}
+
+/*
+ * Add or remove table to/from publication.
+ */
+static void
+AlterPublicationTables(AlterPublicationStmt *stmt, HeapTuple tup,
+					   List *tables, const char *queryString,
+					   bool publish_schema)
+{
+	List	   *rels = NIL;
+	Form_pg_publication pubform = (Form_pg_publication) GETSTRUCT(tup);
+	Oid			pubid = pubform->oid;
+
+	/*
+	 * Nothing to do if no objects, except in SET: for that it is quite
+	 * possible that user has not specified any tables in which case we need
+	 * to remove all the existing tables.
+	 */
+	if (!tables && stmt->action != AP_SetObjects)
+		return;
+
+	rels = OpenTableList(tables);
+
+	if (stmt->action == AP_AddObjects)
+	{
+		TransformPubWhereClauses(rels, queryString, pubform->pubviaroot);
+
+		publish_schema |= is_schema_publication(pubid);
+
+		CheckPubRelationColumnList(stmt->pubname, rels, publish_schema,
+								   pubform->pubviaroot);
+
+		PublicationAddTables(pubid, rels, false, stmt);
+	}
+	else if (stmt->action == AP_DropObjects)
+		PublicationDropTables(pubid, rels, false);
+	else						/* AP_SetObjects */
+	{
+		List	   *oldrelids = GetPublicationRelations(pubid,
+														PUBLICATION_PART_ROOT);
+		List	   *delrels = NIL;
+		ListCell   *oldlc;
+
+		TransformPubWhereClauses(rels, queryString, pubform->pubviaroot);
+
+		CheckPubRelationColumnList(stmt->pubname, rels, publish_schema,
+								   pubform->pubviaroot);
+
+		/*
+		 * To recreate the relation list for the publication, look for
+		 * existing relations that do not need to be dropped.
+		 */
+		foreach(oldlc, oldrelids)
+		{
+			Oid			oldrelid = lfirst_oid(oldlc);
+			ListCell   *newlc;
+			PublicationRelInfo *oldrel;
+			bool		found = false;
+			HeapTuple	rftuple;
+			Node	   *oldrelwhereclause = NULL;
+			Bitmapset  *oldcolumns = NULL;
+
+			/* look up the cache for the old relmap */
+			rftuple = SearchSysCache2(PUBLICATIONRELMAP,
+									  ObjectIdGetDatum(oldrelid),
+									  ObjectIdGetDatum(pubid));
+
+			/*
+			 * See if the existing relation currently has a WHERE clause or a
+			 * column list. We need to compare those too.
+			 */
+			if (HeapTupleIsValid(rftuple))
+			{
+				bool		isnull = true;
+				Datum		whereClauseDatum;
+				Datum		columnListDatum;
+
+				/* Load the WHERE clause for this table. */
+				whereClauseDatum = SysCacheGetAttr(PUBLICATIONRELMAP, rftuple,
+												   Anum_pg_publication_rel_prqual,
+												   &isnull);
+				if (!isnull)
+					oldrelwhereclause = stringToNode(TextDatumGetCString(whereClauseDatum));
+
+				/* Transform the int2vector column list to a bitmap. */
+				columnListDatum = SysCacheGetAttr(PUBLICATIONRELMAP, rftuple,
+												  Anum_pg_publication_rel_prattrs,
+												  &isnull);
+
+				if (!isnull)
+					oldcolumns = pub_collist_to_bitmapset(NULL, columnListDatum, NULL);
+
+				ReleaseSysCache(rftuple);
+			}
+
+			foreach(newlc, rels)
+			{
+				PublicationRelInfo *newpubrel;
+				Oid			newrelid;
+				Bitmapset  *newcolumns = NULL;
+
+				newpubrel = (PublicationRelInfo *) lfirst(newlc);
+				newrelid = RelationGetRelid(newpubrel->relation);
+
+				/*
+				 * If the new publication has column list, transform it to a
+				 * bitmap too.
+				 */
+				if (newpubrel->columns)
+				{
+					ListCell   *lc;
+
+					foreach(lc, newpubrel->columns)
+					{
+						char	   *colname = strVal(lfirst(lc));
+						AttrNumber	attnum = get_attnum(newrelid, colname);
+
+						newcolumns = bms_add_member(newcolumns, attnum);
+					}
+				}
+
+				/*
+				 * Check if any of the new set of relations matches with the
+				 * existing relations in the publication. Additionally, if the
+				 * relation has an associated WHERE clause, check the WHERE
+				 * expressions also match. Same for the column list. Drop the
+				 * rest.
+				 */
+				if (RelationGetRelid(newpubrel->relation) == oldrelid)
+				{
+					if (equal(oldrelwhereclause, newpubrel->whereClause) &&
+						bms_equal(oldcolumns, newcolumns))
+					{
+						found = true;
+						break;
+					}
+				}
+			}
+
+			/*
+			 * Add the non-matched relations to a list so that they can be
+			 * dropped.
+			 */
+			if (!found)
+			{
+				oldrel = palloc(sizeof(PublicationRelInfo));
+				oldrel->whereClause = NULL;
+				oldrel->columns = NIL;
+				oldrel->relation = table_open(oldrelid,
+											  ShareUpdateExclusiveLock);
+				delrels = lappend(delrels, oldrel);
+			}
+		}
+
+		/* And drop them. */
+		PublicationDropTables(pubid, delrels, true);
+
+		/*
+		 * Don't bother calculating the difference for adding, we'll catch and
+		 * skip existing ones when doing catalog update.
+		 */
+		PublicationAddTables(pubid, rels, true, stmt);
+
+		CloseTableList(delrels);
+	}
+
+	CloseTableList(rels);
+}
+
+/*
+ * Alter the publication schemas.
+ *
+ * Add or remove schemas to/from publication.
+ */
+static void
+AlterPublicationSchemas(AlterPublicationStmt *stmt,
+						HeapTuple tup, List *schemaidlist)
+{
+	Form_pg_publication pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+	/*
+	 * Nothing to do if no objects, except in SET: for that it is quite
+	 * possible that user has not specified any schemas in which case we need
+	 * to remove all the existing schemas.
+	 */
+	if (!schemaidlist && stmt->action != AP_SetObjects)
+		return;
+
+	/*
+	 * Schema lock is held until the publication is altered to prevent
+	 * concurrent schema deletion.
+	 */
+	LockSchemaList(schemaidlist);
+	if (stmt->action == AP_AddObjects)
+	{
+		ListCell   *lc;
+		List	   *reloids;
+
+		reloids = GetPublicationRelations(pubform->oid, PUBLICATION_PART_ROOT);
+
+		foreach(lc, reloids)
+		{
+			HeapTuple	coltuple;
+
+			coltuple = SearchSysCache2(PUBLICATIONRELMAP,
+									   ObjectIdGetDatum(lfirst_oid(lc)),
+									   ObjectIdGetDatum(pubform->oid));
+
+			if (!HeapTupleIsValid(coltuple))
+				continue;
+
+			/*
+			 * Disallow adding schema if column list is already part of the
+			 * publication. See CheckPubRelationColumnList.
+			 */
+			if (!heap_attisnull(coltuple, Anum_pg_publication_rel_prattrs, NULL))
+				ereport(ERROR,
+						errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						errmsg("cannot add schema to publication \"%s\"",
+							   stmt->pubname),
+						errdetail("Schemas cannot be added if any tables that specify a column list are already part of the publication."));
+
+			ReleaseSysCache(coltuple);
+		}
+
+		PublicationAddSchemas(pubform->oid, schemaidlist, false, stmt);
+	}
+	else if (stmt->action == AP_DropObjects)
+		PublicationDropSchemas(pubform->oid, schemaidlist, false);
+	else						/* AP_SetObjects */
+	{
+		List	   *oldschemaids = GetPublicationSchemas(pubform->oid);
+		List	   *delschemas = NIL;
+
+		/* Identify which schemas should be dropped */
+		delschemas = list_difference_oid(oldschemaids, schemaidlist);
+
+		/*
+		 * Schema lock is held until the publication is altered to prevent
+		 * concurrent schema deletion.
+		 */
+		LockSchemaList(delschemas);
+
+		/* And drop them */
+		PublicationDropSchemas(pubform->oid, delschemas, true);
+
+		/*
+		 * Don't bother calculating the difference for adding, we'll catch and
+		 * skip existing ones when doing catalog update.
+		 */
+		PublicationAddSchemas(pubform->oid, schemaidlist, true, stmt);
+	}
+}
+
+/*
+ * Check if relations and schemas can be in a given publication and throw
+ * appropriate error if not.
+ */
+static void
+CheckAlterPublication(AlterPublicationStmt *stmt, HeapTuple tup,
+					  List *tables, List *schemaidlist)
+{
+	Form_pg_publication pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+	if ((stmt->action == AP_AddObjects || stmt->action == AP_SetObjects) &&
+		schemaidlist && !superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be superuser to add or set schemas")));
+
+	/*
+	 * Check that user is allowed to manipulate the publication tables in
+	 * schema
+	 */
+	if (schemaidlist && pubform->puballtables)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("publication \"%s\" is defined as FOR ALL TABLES",
+						NameStr(pubform->pubname)),
+				 errdetail("Schemas cannot be added to or dropped from FOR ALL TABLES publications.")));
+
+	/* Check that user is allowed to manipulate the publication tables. */
+	if (tables && pubform->puballtables)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("publication \"%s\" is defined as FOR ALL TABLES",
+						NameStr(pubform->pubname)),
+				 errdetail("Tables cannot be added to or dropped from FOR ALL TABLES publications.")));
+}
+
+/*
+ * Alter the existing publication.
+ *
+ * This is dispatcher function for AlterPublicationOptions,
+ * AlterPublicationSchemas and AlterPublicationTables.
+ */
+void
+AlterPublication(ParseState *pstate, AlterPublicationStmt *stmt)
+{
+	Relation	rel;
+	HeapTuple	tup;
+	Form_pg_publication pubform;
+
+	rel = table_open(PublicationRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(PUBLICATIONNAME,
+							  CStringGetDatum(stmt->pubname));
+
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("publication \"%s\" does not exist",
+						stmt->pubname)));
+
+	pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+	/* must be owner */
+	if (!pg_publication_ownercheck(pubform->oid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_PUBLICATION,
+					   stmt->pubname);
+
+	if (stmt->options)
+		AlterPublicationOptions(pstate, stmt, rel, tup);
+	else
+	{
+		List	   *relations = NIL;
+		List	   *schemaidlist = NIL;
+		Oid			pubid = pubform->oid;
+
+		ObjectsInPublicationToOids(stmt->pubobjects, pstate, &relations,
+								   &schemaidlist);
+
+		CheckAlterPublication(stmt, tup, relations, schemaidlist);
+
+		heap_freetuple(tup);
+
+		/* Lock the publication so nobody else can do anything with it. */
+		LockDatabaseObject(PublicationRelationId, pubid, 0,
+						   AccessExclusiveLock);
+
+		/*
+		 * It is possible that by the time we acquire the lock on publication,
+		 * concurrent DDL has removed it. We can test this by checking the
+		 * existence of publication. We get the tuple again to avoid the risk
+		 * of any publication option getting changed.
+		 */
+		tup = SearchSysCacheCopy1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
+		if (!HeapTupleIsValid(tup))
+			ereport(ERROR,
+					errcode(ERRCODE_UNDEFINED_OBJECT),
+					errmsg("publication \"%s\" does not exist",
+						   stmt->pubname));
+
+		AlterPublicationTables(stmt, tup, relations, pstate->p_sourcetext,
+							   schemaidlist != NIL);
+		AlterPublicationSchemas(stmt, tup, schemaidlist);
+	}
+
+	/* Cleanup. */
+	heap_freetuple(tup);
+	table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Remove relation from publication by mapping OID.
+ */
+void
+RemovePublicationRelById(Oid proid)
+{
+	Relation	rel;
+	HeapTuple	tup;
+	Form_pg_publication_rel pubrel;
+	List	   *relids = NIL;
+
+	rel = table_open(PublicationRelRelationId, RowExclusiveLock);
+
+	tup = SearchSysCache1(PUBLICATIONREL, ObjectIdGetDatum(proid));
+
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for publication table %u",
+			 proid);
+
+	pubrel = (Form_pg_publication_rel) GETSTRUCT(tup);
+
+	/*
+	 * Invalidate relcache so that publication info is rebuilt.
+	 *
+	 * For the partitioned tables, we must invalidate all partitions contained
+	 * in the respective partition hierarchies, not just the one explicitly
+	 * mentioned in the publication. This is required because we implicitly
+	 * publish the child tables when the parent table is published.
+	 */
+	relids = GetPubPartitionOptionRelations(relids, PUBLICATION_PART_ALL,
+											pubrel->prrelid);
+
+	InvalidatePublicationRels(relids);
+
+	CatalogTupleDelete(rel, &tup->t_self);
+
+	ReleaseSysCache(tup);
+
+	table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Remove the publication by mapping OID.
+ */
+void
+RemovePublicationById(Oid pubid)
+{
+	Relation	rel;
+	HeapTuple	tup;
+	Form_pg_publication pubform;
+
+	rel = table_open(PublicationRelationId, RowExclusiveLock);
+
+	tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for publication %u", pubid);
+
+	pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+	/* Invalidate relcache so that publication info is rebuilt. */
+	if (pubform->puballtables)
+		CacheInvalidateRelcacheAll();
+
+	CatalogTupleDelete(rel, &tup->t_self);
+
+	ReleaseSysCache(tup);
+
+	table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Remove schema from publication by mapping OID.
+ */
+void
+RemovePublicationSchemaById(Oid psoid)
+{
+	Relation	rel;
+	HeapTuple	tup;
+	List	   *schemaRels = NIL;
+	Form_pg_publication_namespace pubsch;
+
+	rel = table_open(PublicationNamespaceRelationId, RowExclusiveLock);
+
+	tup = SearchSysCache1(PUBLICATIONNAMESPACE, ObjectIdGetDatum(psoid));
+
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for publication schema %u", psoid);
+
+	pubsch = (Form_pg_publication_namespace) GETSTRUCT(tup);
+
+	/*
+	 * Invalidate relcache so that publication info is rebuilt. See
+	 * RemovePublicationRelById for why we need to consider all the
+	 * partitions.
+	 */
+	schemaRels = GetSchemaPublicationRelations(pubsch->pnnspid,
+											   PUBLICATION_PART_ALL);
+	InvalidatePublicationRels(schemaRels);
+
+	CatalogTupleDelete(rel, &tup->t_self);
+
+	ReleaseSysCache(tup);
+
+	table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Open relations specified by a PublicationTable list.
+ * The returned tables are locked in ShareUpdateExclusiveLock mode in order to
+ * add them to a publication.
+ */
+static List *
+OpenTableList(List *tables)
+{
+	List	   *relids = NIL;
+	List	   *rels = NIL;
+	ListCell   *lc;
+	List	   *relids_with_rf = NIL;
+	List	   *relids_with_collist = NIL;
+
+	/*
+	 * Open, share-lock, and check all the explicitly-specified relations
+	 */
+	foreach(lc, tables)
+	{
+		PublicationTable *t = lfirst_node(PublicationTable, lc);
+		bool		recurse = t->relation->inh;
+		Relation	rel;
+		Oid			myrelid;
+		PublicationRelInfo *pub_rel;
+
+		/* Allow query cancel in case this takes a long time */
+		CHECK_FOR_INTERRUPTS();
+
+		rel = table_openrv(t->relation, ShareUpdateExclusiveLock);
+		myrelid = RelationGetRelid(rel);
+
+		/*
+		 * Filter out duplicates if user specifies "foo, foo".
+		 *
+		 * Note that this algorithm is known to not be very efficient (O(N^2))
+		 * but given that it only works on list of tables given to us by user
+		 * it's deemed acceptable.
+		 */
+		if (list_member_oid(relids, myrelid))
+		{
+			/* Disallow duplicate tables if there are any with row filters. */
+			if (t->whereClause || list_member_oid(relids_with_rf, myrelid))
+				ereport(ERROR,
+						(errcode(ERRCODE_DUPLICATE_OBJECT),
+						 errmsg("conflicting or redundant WHERE clauses for table \"%s\"",
+								RelationGetRelationName(rel))));
+
+			/* Disallow duplicate tables if there are any with column lists. */
+			if (t->columns || list_member_oid(relids_with_collist, myrelid))
+				ereport(ERROR,
+						(errcode(ERRCODE_DUPLICATE_OBJECT),
+						 errmsg("conflicting or redundant column lists for table \"%s\"",
+								RelationGetRelationName(rel))));
+
+			table_close(rel, ShareUpdateExclusiveLock);
+			continue;
+		}
+
+		pub_rel = palloc(sizeof(PublicationRelInfo));
+		pub_rel->relation = rel;
+		pub_rel->whereClause = t->whereClause;
+		pub_rel->columns = t->columns;
+		rels = lappend(rels, pub_rel);
+		relids = lappend_oid(relids, myrelid);
+
+		if (t->whereClause)
+			relids_with_rf = lappend_oid(relids_with_rf, myrelid);
+
+		if (t->columns)
+			relids_with_collist = lappend_oid(relids_with_collist, myrelid);
+
+		/*
+		 * Add children of this rel, if requested, so that they too are added
+		 * to the publication.  A partitioned table can't have any inheritance
+		 * children other than its partitions, which need not be explicitly
+		 * added to the publication.
+		 */
+		if (recurse && rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		{
+			List	   *children;
+			ListCell   *child;
+
+			children = find_all_inheritors(myrelid, ShareUpdateExclusiveLock,
+										   NULL);
+
+			foreach(child, children)
+			{
+				Oid			childrelid = lfirst_oid(child);
+
+				/* Allow query cancel in case this takes a long time */
+				CHECK_FOR_INTERRUPTS();
+
+				/*
+				 * Skip duplicates if user specified both parent and child
+				 * tables.
+				 */
+				if (list_member_oid(relids, childrelid))
+				{
+					/*
+					 * We don't allow to specify row filter for both parent
+					 * and child table at the same time as it is not very
+					 * clear which one should be given preference.
+					 */
+					if (childrelid != myrelid &&
+						(t->whereClause || list_member_oid(relids_with_rf, childrelid)))
+						ereport(ERROR,
+								(errcode(ERRCODE_DUPLICATE_OBJECT),
+								 errmsg("conflicting or redundant WHERE clauses for table \"%s\"",
+										RelationGetRelationName(rel))));
+
+					/*
+					 * We don't allow to specify column list for both parent
+					 * and child table at the same time as it is not very
+					 * clear which one should be given preference.
+					 */
+					if (childrelid != myrelid &&
+						(t->columns || list_member_oid(relids_with_collist, childrelid)))
+						ereport(ERROR,
+								(errcode(ERRCODE_DUPLICATE_OBJECT),
+								 errmsg("conflicting or redundant column lists for table \"%s\"",
+										RelationGetRelationName(rel))));
+
+					continue;
+				}
+
+				/* find_all_inheritors already got lock */
+				rel = table_open(childrelid, NoLock);
+				pub_rel = palloc(sizeof(PublicationRelInfo));
+				pub_rel->relation = rel;
+				/* child inherits WHERE clause from parent */
+				pub_rel->whereClause = t->whereClause;
+
+				/* child inherits column list from parent */
+				pub_rel->columns = t->columns;
+				rels = lappend(rels, pub_rel);
+				relids = lappend_oid(relids, childrelid);
+
+				if (t->whereClause)
+					relids_with_rf = lappend_oid(relids_with_rf, childrelid);
+
+				if (t->columns)
+					relids_with_collist = lappend_oid(relids_with_collist, childrelid);
+			}
+		}
+	}
+
+	list_free(relids);
+	list_free(relids_with_rf);
+
+	return rels;
+}
+
+/*
+ * Close all relations in the list.
+ */
+static void
+CloseTableList(List *rels)
+{
+	ListCell   *lc;
+
+	foreach(lc, rels)
+	{
+		PublicationRelInfo *pub_rel;
+
+		pub_rel = (PublicationRelInfo *) lfirst(lc);
+		table_close(pub_rel->relation, NoLock);
+	}
+
+	list_free_deep(rels);
+}
+
+/*
+ * Lock the schemas specified in the schema list in AccessShareLock mode in
+ * order to prevent concurrent schema deletion.
+ */
+static void
+LockSchemaList(List *schemalist)
+{
+	ListCell   *lc;
+
+	foreach(lc, schemalist)
+	{
+		Oid			schemaid = lfirst_oid(lc);
+
+		/* Allow query cancel in case this takes a long time */
+		CHECK_FOR_INTERRUPTS();
+		LockDatabaseObject(NamespaceRelationId, schemaid, 0, AccessShareLock);
+
+		/*
+		 * It is possible that by the time we acquire the lock on schema,
+		 * concurrent DDL has removed it. We can test this by checking the
+		 * existence of schema.
+		 */
+		if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(schemaid)))
+			ereport(ERROR,
+					errcode(ERRCODE_UNDEFINED_SCHEMA),
+					errmsg("schema with OID %u does not exist", schemaid));
+	}
+}
+
+/*
+ * Add listed tables to the publication.
+ */
+static void
+PublicationAddTables(Oid pubid, List *rels, bool if_not_exists,
+					 AlterPublicationStmt *stmt)
+{
+	ListCell   *lc;
+
+	Assert(!stmt || !stmt->for_all_tables);
+
+	foreach(lc, rels)
+	{
+		PublicationRelInfo *pub_rel = (PublicationRelInfo *) lfirst(lc);
+		Relation	rel = pub_rel->relation;
+		ObjectAddress obj;
+
+		/* Must be owner of the table or superuser. */
+		if (!pg_class_ownercheck(RelationGetRelid(rel), GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind),
+						   RelationGetRelationName(rel));
+
+		obj = publication_add_relation(pubid, pub_rel, if_not_exists);
+		if (stmt)
+		{
+			EventTriggerCollectSimpleCommand(obj, InvalidObjectAddress,
+											 (Node *) stmt);
+
+			InvokeObjectPostCreateHook(PublicationRelRelationId,
+									   obj.objectId, 0);
+		}
+	}
+}
+
+/*
+ * Remove listed tables from the publication.
+ */
+static void
+PublicationDropTables(Oid pubid, List *rels, bool missing_ok)
+{
+	ObjectAddress obj;
+	ListCell   *lc;
+	Oid			prid;
+
+	foreach(lc, rels)
+	{
+		PublicationRelInfo *pubrel = (PublicationRelInfo *) lfirst(lc);
+		Relation	rel = pubrel->relation;
+		Oid			relid = RelationGetRelid(rel);
+
+		if (pubrel->columns)
+			ereport(ERROR,
+					errcode(ERRCODE_SYNTAX_ERROR),
+					errmsg("column list must not be specified in ALTER PUBLICATION ... DROP"));
+
+		prid = GetSysCacheOid2(PUBLICATIONRELMAP, Anum_pg_publication_rel_oid,
+							   ObjectIdGetDatum(relid),
+							   ObjectIdGetDatum(pubid));
+		if (!OidIsValid(prid))
+		{
+			if (missing_ok)
+				continue;
+
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("relation \"%s\" is not part of the publication",
+							RelationGetRelationName(rel))));
+		}
+
+		if (pubrel->whereClause)
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("cannot use a WHERE clause when removing a table from a publication")));
+
+		ObjectAddressSet(obj, PublicationRelRelationId, prid);
+		performDeletion(&obj, DROP_CASCADE, 0);
+	}
+}
+
+/*
+ * Add listed schemas to the publication.
+ */
+static void
+PublicationAddSchemas(Oid pubid, List *schemas, bool if_not_exists,
+					  AlterPublicationStmt *stmt)
+{
+	ListCell   *lc;
+
+	Assert(!stmt || !stmt->for_all_tables);
+
+	foreach(lc, schemas)
+	{
+		Oid			schemaid = lfirst_oid(lc);
+		ObjectAddress obj;
+
+		obj = publication_add_schema(pubid, schemaid, if_not_exists);
+		if (stmt)
+		{
+			EventTriggerCollectSimpleCommand(obj, InvalidObjectAddress,
+											 (Node *) stmt);
+
+			InvokeObjectPostCreateHook(PublicationNamespaceRelationId,
+									   obj.objectId, 0);
+		}
+	}
+}
+
+/*
+ * Remove listed schemas from the publication.
+ */
+static void
+PublicationDropSchemas(Oid pubid, List *schemas, bool missing_ok)
+{
+	ObjectAddress obj;
+	ListCell   *lc;
+	Oid			psid;
+
+	foreach(lc, schemas)
+	{
+		Oid			schemaid = lfirst_oid(lc);
+
+		psid = GetSysCacheOid2(PUBLICATIONNAMESPACEMAP,
+							   Anum_pg_publication_namespace_oid,
+							   ObjectIdGetDatum(schemaid),
+							   ObjectIdGetDatum(pubid));
+		if (!OidIsValid(psid))
+		{
+			if (missing_ok)
+				continue;
+
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("tables from schema \"%s\" are not part of the publication",
+							get_namespace_name(schemaid))));
+		}
+
+		ObjectAddressSet(obj, PublicationNamespaceRelationId, psid);
+		performDeletion(&obj, DROP_CASCADE, 0);
+	}
+}
+
+/*
+ * Internal workhorse for changing a publication owner
+ */
+static void
+AlterPublicationOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId)
+{
+	Form_pg_publication form;
+
+	form = (Form_pg_publication) GETSTRUCT(tup);
+
+	if (form->pubowner == newOwnerId)
+		return;
+
+	if (!superuser())
+	{
+		AclResult	aclresult;
+
+		/* Must be owner */
+		if (!pg_publication_ownercheck(form->oid, GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_PUBLICATION,
+						   NameStr(form->pubname));
+
+		/* Must be able to become new owner */
+		check_is_member_of_role(GetUserId(), newOwnerId);
+
+		/* New owner must have CREATE privilege on database */
+		aclresult = pg_database_aclcheck(MyDatabaseId, newOwnerId, ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_DATABASE,
+						   get_database_name(MyDatabaseId));
+
+		if (form->puballtables && !superuser_arg(newOwnerId))
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("permission denied to change owner of publication \"%s\"",
+							NameStr(form->pubname)),
+					 errhint("The owner of a FOR ALL TABLES publication must be a superuser.")));
+
+		if (!superuser_arg(newOwnerId) && is_schema_publication(form->oid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("permission denied to change owner of publication \"%s\"",
+							NameStr(form->pubname)),
+					 errhint("The owner of a FOR TABLES IN SCHEMA publication must be a superuser.")));
+	}
+
+	form->pubowner = newOwnerId;
+	CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+	/* Update owner dependency reference */
+	changeDependencyOnOwner(PublicationRelationId,
+							form->oid,
+							newOwnerId);
+
+	InvokeObjectPostAlterHook(PublicationRelationId,
+							  form->oid, 0);
+}
+
+/*
+ * Change publication owner -- by name
+ */
+ObjectAddress
+AlterPublicationOwner(const char *name, Oid newOwnerId)
+{
+	Oid			subid;
+	HeapTuple	tup;
+	Relation	rel;
+	ObjectAddress address;
+	Form_pg_publication pubform;
+
+	rel = table_open(PublicationRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(PUBLICATIONNAME, CStringGetDatum(name));
+
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("publication \"%s\" does not exist", name)));
+
+	pubform = (Form_pg_publication) GETSTRUCT(tup);
+	subid = pubform->oid;
+
+	AlterPublicationOwner_internal(rel, tup, newOwnerId);
+
+	ObjectAddressSet(address, PublicationRelationId, subid);
+
+	heap_freetuple(tup);
+
+	table_close(rel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * Change publication owner -- by OID
+ */
+void
+AlterPublicationOwner_oid(Oid subid, Oid newOwnerId)
+{
+	HeapTuple	tup;
+	Relation	rel;
+
+	rel = table_open(PublicationRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(PUBLICATIONOID, ObjectIdGetDatum(subid));
+
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("publication with OID %u does not exist", subid)));
+
+	AlterPublicationOwner_internal(rel, tup, newOwnerId);
+
+	heap_freetuple(tup);
+
+	table_close(rel, RowExclusiveLock);
+}
diff --git a/src/backend/commands/schemacmds.c b/src/backend/commands/schemacmds.c
new file mode 100644
index 0000000..1a9132c
--- /dev/null
+++ b/src/backend/commands/schemacmds.c
@@ -0,0 +1,441 @@
+/*-------------------------------------------------------------------------
+ *
+ * schemacmds.c
+ *	  schema creation/manipulation commands
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/schemacmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_namespace.h"
+#include "commands/dbcommands.h"
+#include "commands/event_trigger.h"
+#include "commands/schemacmds.h"
+#include "miscadmin.h"
+#include "parser/parse_utilcmd.h"
+#include "parser/scansup.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+static void AlterSchemaOwner_internal(HeapTuple tup, Relation rel, Oid newOwnerId);
+
+/*
+ * CREATE SCHEMA
+ *
+ * Note: caller should pass in location information for the whole
+ * CREATE SCHEMA statement, which in turn we pass down as the location
+ * of the component commands.  This comports with our general plan of
+ * reporting location/len for the whole command even when executing
+ * a subquery.
+ */
+Oid
+CreateSchemaCommand(CreateSchemaStmt *stmt, const char *queryString,
+					int stmt_location, int stmt_len)
+{
+	const char *schemaName = stmt->schemaname;
+	Oid			namespaceId;
+	List	   *parsetree_list;
+	ListCell   *parsetree_item;
+	Oid			owner_uid;
+	Oid			saved_uid;
+	int			save_sec_context;
+	int			save_nestlevel;
+	char	   *nsp = namespace_search_path;
+	AclResult	aclresult;
+	ObjectAddress address;
+	StringInfoData pathbuf;
+
+	GetUserIdAndSecContext(&saved_uid, &save_sec_context);
+
+	/*
+	 * Who is supposed to own the new schema?
+	 */
+	if (stmt->authrole)
+		owner_uid = get_rolespec_oid(stmt->authrole, false);
+	else
+		owner_uid = saved_uid;
+
+	/* fill schema name with the user name if not specified */
+	if (!schemaName)
+	{
+		HeapTuple	tuple;
+
+		tuple = SearchSysCache1(AUTHOID, ObjectIdGetDatum(owner_uid));
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "cache lookup failed for role %u", owner_uid);
+		schemaName =
+			pstrdup(NameStr(((Form_pg_authid) GETSTRUCT(tuple))->rolname));
+		ReleaseSysCache(tuple);
+	}
+
+	/*
+	 * To create a schema, must have schema-create privilege on the current
+	 * database and must be able to become the target role (this does not
+	 * imply that the target role itself must have create-schema privilege).
+	 * The latter provision guards against "giveaway" attacks.  Note that a
+	 * superuser will always have both of these privileges a fortiori.
+	 */
+	aclresult = pg_database_aclcheck(MyDatabaseId, saved_uid, ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_DATABASE,
+					   get_database_name(MyDatabaseId));
+
+	check_is_member_of_role(saved_uid, owner_uid);
+
+	/* Additional check to protect reserved schema names */
+	if (!allowSystemTableMods && IsReservedName(schemaName))
+		ereport(ERROR,
+				(errcode(ERRCODE_RESERVED_NAME),
+				 errmsg("unacceptable schema name \"%s\"", schemaName),
+				 errdetail("The prefix \"pg_\" is reserved for system schemas.")));
+
+	/*
+	 * If if_not_exists was given and the schema already exists, bail out.
+	 * (Note: we needn't check this when not if_not_exists, because
+	 * NamespaceCreate will complain anyway.)  We could do this before making
+	 * the permissions checks, but since CREATE TABLE IF NOT EXISTS makes its
+	 * creation-permission check first, we do likewise.
+	 */
+	if (stmt->if_not_exists)
+	{
+		namespaceId = get_namespace_oid(schemaName, true);
+		if (OidIsValid(namespaceId))
+		{
+			/*
+			 * If we are in an extension script, insist that the pre-existing
+			 * object be a member of the extension, to avoid security risks.
+			 */
+			ObjectAddressSet(address, NamespaceRelationId, namespaceId);
+			checkMembershipInCurrentExtension(&address);
+
+			/* OK to skip */
+			ereport(NOTICE,
+					(errcode(ERRCODE_DUPLICATE_SCHEMA),
+					 errmsg("schema \"%s\" already exists, skipping",
+							schemaName)));
+			return InvalidOid;
+		}
+	}
+
+	/*
+	 * If the requested authorization is different from the current user,
+	 * temporarily set the current user so that the object(s) will be created
+	 * with the correct ownership.
+	 *
+	 * (The setting will be restored at the end of this routine, or in case of
+	 * error, transaction abort will clean things up.)
+	 */
+	if (saved_uid != owner_uid)
+		SetUserIdAndSecContext(owner_uid,
+							   save_sec_context | SECURITY_LOCAL_USERID_CHANGE);
+
+	/* Create the schema's namespace */
+	namespaceId = NamespaceCreate(schemaName, owner_uid, false);
+
+	/* Advance cmd counter to make the namespace visible */
+	CommandCounterIncrement();
+
+	/*
+	 * Prepend the new schema to the current search path.
+	 *
+	 * We use the equivalent of a function SET option to allow the setting to
+	 * persist for exactly the duration of the schema creation.  guc.c also
+	 * takes care of undoing the setting on error.
+	 */
+	save_nestlevel = NewGUCNestLevel();
+
+	initStringInfo(&pathbuf);
+	appendStringInfoString(&pathbuf, quote_identifier(schemaName));
+
+	while (scanner_isspace(*nsp))
+		nsp++;
+
+	if (*nsp != '\0')
+		appendStringInfo(&pathbuf, ", %s", nsp);
+
+	(void) set_config_option("search_path", pathbuf.data,
+							 PGC_USERSET, PGC_S_SESSION,
+							 GUC_ACTION_SAVE, true, 0, false);
+
+	/*
+	 * Report the new schema to possibly interested event triggers.  Note we
+	 * must do this here and not in ProcessUtilitySlow because otherwise the
+	 * objects created below are reported before the schema, which would be
+	 * wrong.
+	 */
+	ObjectAddressSet(address, NamespaceRelationId, namespaceId);
+	EventTriggerCollectSimpleCommand(address, InvalidObjectAddress,
+									 (Node *) stmt);
+
+	/*
+	 * Examine the list of commands embedded in the CREATE SCHEMA command, and
+	 * reorganize them into a sequentially executable order with no forward
+	 * references.  Note that the result is still a list of raw parsetrees ---
+	 * we cannot, in general, run parse analysis on one statement until we
+	 * have actually executed the prior ones.
+	 */
+	parsetree_list = transformCreateSchemaStmtElements(stmt->schemaElts,
+													   schemaName);
+
+	/*
+	 * Execute each command contained in the CREATE SCHEMA.  Since the grammar
+	 * allows only utility commands in CREATE SCHEMA, there is no need to pass
+	 * them through parse_analyze_*() or the rewriter; we can just hand them
+	 * straight to ProcessUtility.
+	 */
+	foreach(parsetree_item, parsetree_list)
+	{
+		Node	   *stmt = (Node *) lfirst(parsetree_item);
+		PlannedStmt *wrapper;
+
+		/* need to make a wrapper PlannedStmt */
+		wrapper = makeNode(PlannedStmt);
+		wrapper->commandType = CMD_UTILITY;
+		wrapper->canSetTag = false;
+		wrapper->utilityStmt = stmt;
+		wrapper->stmt_location = stmt_location;
+		wrapper->stmt_len = stmt_len;
+
+		/* do this step */
+		ProcessUtility(wrapper,
+					   queryString,
+					   false,
+					   PROCESS_UTILITY_SUBCOMMAND,
+					   NULL,
+					   NULL,
+					   None_Receiver,
+					   NULL);
+
+		/* make sure later steps can see the object created here */
+		CommandCounterIncrement();
+	}
+
+	/*
+	 * Restore the GUC variable search_path we set above.
+	 */
+	AtEOXact_GUC(true, save_nestlevel);
+
+	/* Reset current user and security context */
+	SetUserIdAndSecContext(saved_uid, save_sec_context);
+
+	return namespaceId;
+}
+
+
+/*
+ * Rename schema
+ */
+ObjectAddress
+RenameSchema(const char *oldname, const char *newname)
+{
+	Oid			nspOid;
+	HeapTuple	tup;
+	Relation	rel;
+	AclResult	aclresult;
+	ObjectAddress address;
+	Form_pg_namespace nspform;
+
+	rel = table_open(NamespaceRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(NAMESPACENAME, CStringGetDatum(oldname));
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_SCHEMA),
+				 errmsg("schema \"%s\" does not exist", oldname)));
+
+	nspform = (Form_pg_namespace) GETSTRUCT(tup);
+	nspOid = nspform->oid;
+
+	/* make sure the new name doesn't exist */
+	if (OidIsValid(get_namespace_oid(newname, true)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_SCHEMA),
+				 errmsg("schema \"%s\" already exists", newname)));
+
+	/* must be owner */
+	if (!pg_namespace_ownercheck(nspOid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA,
+					   oldname);
+
+	/* must have CREATE privilege on database */
+	aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_DATABASE,
+					   get_database_name(MyDatabaseId));
+
+	if (!allowSystemTableMods && IsReservedName(newname))
+		ereport(ERROR,
+				(errcode(ERRCODE_RESERVED_NAME),
+				 errmsg("unacceptable schema name \"%s\"", newname),
+				 errdetail("The prefix \"pg_\" is reserved for system schemas.")));
+
+	/* rename */
+	namestrcpy(&nspform->nspname, newname);
+	CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+	InvokeObjectPostAlterHook(NamespaceRelationId, nspOid, 0);
+
+	ObjectAddressSet(address, NamespaceRelationId, nspOid);
+
+	table_close(rel, NoLock);
+	heap_freetuple(tup);
+
+	return address;
+}
+
+void
+AlterSchemaOwner_oid(Oid oid, Oid newOwnerId)
+{
+	HeapTuple	tup;
+	Relation	rel;
+
+	rel = table_open(NamespaceRelationId, RowExclusiveLock);
+
+	tup = SearchSysCache1(NAMESPACEOID, ObjectIdGetDatum(oid));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for schema %u", oid);
+
+	AlterSchemaOwner_internal(tup, rel, newOwnerId);
+
+	ReleaseSysCache(tup);
+
+	table_close(rel, RowExclusiveLock);
+}
+
+
+/*
+ * Change schema owner
+ */
+ObjectAddress
+AlterSchemaOwner(const char *name, Oid newOwnerId)
+{
+	Oid			nspOid;
+	HeapTuple	tup;
+	Relation	rel;
+	ObjectAddress address;
+	Form_pg_namespace nspform;
+
+	rel = table_open(NamespaceRelationId, RowExclusiveLock);
+
+	tup = SearchSysCache1(NAMESPACENAME, CStringGetDatum(name));
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_SCHEMA),
+				 errmsg("schema \"%s\" does not exist", name)));
+
+	nspform = (Form_pg_namespace) GETSTRUCT(tup);
+	nspOid = nspform->oid;
+
+	AlterSchemaOwner_internal(tup, rel, newOwnerId);
+
+	ObjectAddressSet(address, NamespaceRelationId, nspOid);
+
+	ReleaseSysCache(tup);
+
+	table_close(rel, RowExclusiveLock);
+
+	return address;
+}
+
+static void
+AlterSchemaOwner_internal(HeapTuple tup, Relation rel, Oid newOwnerId)
+{
+	Form_pg_namespace nspForm;
+
+	Assert(tup->t_tableOid == NamespaceRelationId);
+	Assert(RelationGetRelid(rel) == NamespaceRelationId);
+
+	nspForm = (Form_pg_namespace) GETSTRUCT(tup);
+
+	/*
+	 * If the new owner is the same as the existing owner, consider the
+	 * command to have succeeded.  This is for dump restoration purposes.
+	 */
+	if (nspForm->nspowner != newOwnerId)
+	{
+		Datum		repl_val[Natts_pg_namespace];
+		bool		repl_null[Natts_pg_namespace];
+		bool		repl_repl[Natts_pg_namespace];
+		Acl		   *newAcl;
+		Datum		aclDatum;
+		bool		isNull;
+		HeapTuple	newtuple;
+		AclResult	aclresult;
+
+		/* Otherwise, must be owner of the existing object */
+		if (!pg_namespace_ownercheck(nspForm->oid, GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA,
+						   NameStr(nspForm->nspname));
+
+		/* Must be able to become new owner */
+		check_is_member_of_role(GetUserId(), newOwnerId);
+
+		/*
+		 * must have create-schema rights
+		 *
+		 * NOTE: This is different from other alter-owner checks in that the
+		 * current user is checked for create privileges instead of the
+		 * destination owner.  This is consistent with the CREATE case for
+		 * schemas.  Because superusers will always have this right, we need
+		 * no special case for them.
+		 */
+		aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(),
+										 ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_DATABASE,
+						   get_database_name(MyDatabaseId));
+
+		memset(repl_null, false, sizeof(repl_null));
+		memset(repl_repl, false, sizeof(repl_repl));
+
+		repl_repl[Anum_pg_namespace_nspowner - 1] = true;
+		repl_val[Anum_pg_namespace_nspowner - 1] = ObjectIdGetDatum(newOwnerId);
+
+		/*
+		 * Determine the modified ACL for the new owner.  This is only
+		 * necessary when the ACL is non-null.
+		 */
+		aclDatum = SysCacheGetAttr(NAMESPACENAME, tup,
+								   Anum_pg_namespace_nspacl,
+								   &isNull);
+		if (!isNull)
+		{
+			newAcl = aclnewowner(DatumGetAclP(aclDatum),
+								 nspForm->nspowner, newOwnerId);
+			repl_repl[Anum_pg_namespace_nspacl - 1] = true;
+			repl_val[Anum_pg_namespace_nspacl - 1] = PointerGetDatum(newAcl);
+		}
+
+		newtuple = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, repl_null, repl_repl);
+
+		CatalogTupleUpdate(rel, &newtuple->t_self, newtuple);
+
+		heap_freetuple(newtuple);
+
+		/* Update owner dependency reference */
+		changeDependencyOnOwner(NamespaceRelationId, nspForm->oid,
+								newOwnerId);
+	}
+
+	InvokeObjectPostAlterHook(NamespaceRelationId,
+							  nspForm->oid, 0);
+}
diff --git a/src/backend/commands/seclabel.c b/src/backend/commands/seclabel.c
new file mode 100644
index 0000000..7ae19b9
--- /dev/null
+++ b/src/backend/commands/seclabel.c
@@ -0,0 +1,581 @@
+/* -------------------------------------------------------------------------
+ *
+ * seclabel.c
+ *	  routines to support security label feature.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * -------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/relation.h"
+#include "access/table.h"
+#include "catalog/catalog.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_seclabel.h"
+#include "catalog/pg_shseclabel.h"
+#include "commands/seclabel.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+typedef struct
+{
+	const char *provider_name;
+	check_object_relabel_type hook;
+} LabelProvider;
+
+static List *label_provider_list = NIL;
+
+static bool
+SecLabelSupportsObjectType(ObjectType objtype)
+{
+	switch (objtype)
+	{
+		case OBJECT_AGGREGATE:
+		case OBJECT_COLUMN:
+		case OBJECT_DATABASE:
+		case OBJECT_DOMAIN:
+		case OBJECT_EVENT_TRIGGER:
+		case OBJECT_FOREIGN_TABLE:
+		case OBJECT_FUNCTION:
+		case OBJECT_LANGUAGE:
+		case OBJECT_LARGEOBJECT:
+		case OBJECT_MATVIEW:
+		case OBJECT_PROCEDURE:
+		case OBJECT_PUBLICATION:
+		case OBJECT_ROLE:
+		case OBJECT_ROUTINE:
+		case OBJECT_SCHEMA:
+		case OBJECT_SEQUENCE:
+		case OBJECT_SUBSCRIPTION:
+		case OBJECT_TABLE:
+		case OBJECT_TABLESPACE:
+		case OBJECT_TYPE:
+		case OBJECT_VIEW:
+			return true;
+
+		case OBJECT_ACCESS_METHOD:
+		case OBJECT_AMOP:
+		case OBJECT_AMPROC:
+		case OBJECT_ATTRIBUTE:
+		case OBJECT_CAST:
+		case OBJECT_COLLATION:
+		case OBJECT_CONVERSION:
+		case OBJECT_DEFAULT:
+		case OBJECT_DEFACL:
+		case OBJECT_DOMCONSTRAINT:
+		case OBJECT_EXTENSION:
+		case OBJECT_FDW:
+		case OBJECT_FOREIGN_SERVER:
+		case OBJECT_INDEX:
+		case OBJECT_OPCLASS:
+		case OBJECT_OPERATOR:
+		case OBJECT_OPFAMILY:
+		case OBJECT_PARAMETER_ACL:
+		case OBJECT_POLICY:
+		case OBJECT_PUBLICATION_NAMESPACE:
+		case OBJECT_PUBLICATION_REL:
+		case OBJECT_RULE:
+		case OBJECT_STATISTIC_EXT:
+		case OBJECT_TABCONSTRAINT:
+		case OBJECT_TRANSFORM:
+		case OBJECT_TRIGGER:
+		case OBJECT_TSCONFIGURATION:
+		case OBJECT_TSDICTIONARY:
+		case OBJECT_TSPARSER:
+		case OBJECT_TSTEMPLATE:
+		case OBJECT_USER_MAPPING:
+			return false;
+
+			/*
+			 * There's intentionally no default: case here; we want the
+			 * compiler to warn if a new ObjectType hasn't been handled above.
+			 */
+	}
+
+	/* Shouldn't get here, but if we do, say "no support" */
+	return false;
+}
+
+/*
+ * ExecSecLabelStmt --
+ *
+ * Apply a security label to a database object.
+ *
+ * Returns the ObjectAddress of the object to which the policy was applied.
+ */
+ObjectAddress
+ExecSecLabelStmt(SecLabelStmt *stmt)
+{
+	LabelProvider *provider = NULL;
+	ObjectAddress address;
+	Relation	relation;
+	ListCell   *lc;
+
+	/*
+	 * Find the named label provider, or if none specified, check whether
+	 * there's exactly one, and if so use it.
+	 */
+	if (stmt->provider == NULL)
+	{
+		if (label_provider_list == NIL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("no security label providers have been loaded")));
+		if (list_length(label_provider_list) != 1)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("must specify provider when multiple security label providers have been loaded")));
+		provider = (LabelProvider *) linitial(label_provider_list);
+	}
+	else
+	{
+		foreach(lc, label_provider_list)
+		{
+			LabelProvider *lp = lfirst(lc);
+
+			if (strcmp(stmt->provider, lp->provider_name) == 0)
+			{
+				provider = lp;
+				break;
+			}
+		}
+		if (provider == NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("security label provider \"%s\" is not loaded",
+							stmt->provider)));
+	}
+
+	if (!SecLabelSupportsObjectType(stmt->objtype))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("security labels are not supported for this type of object")));
+
+	/*
+	 * Translate the parser representation which identifies this object into
+	 * an ObjectAddress. get_object_address() will throw an error if the
+	 * object does not exist, and will also acquire a lock on the target to
+	 * guard against concurrent modifications.
+	 */
+	address = get_object_address(stmt->objtype, stmt->object,
+								 &relation, ShareUpdateExclusiveLock, false);
+
+	/* Require ownership of the target object. */
+	check_object_ownership(GetUserId(), stmt->objtype, address,
+						   stmt->object, relation);
+
+	/* Perform other integrity checks as needed. */
+	switch (stmt->objtype)
+	{
+		case OBJECT_COLUMN:
+
+			/*
+			 * Allow security labels only on columns of tables, views,
+			 * materialized views, composite types, and foreign tables (which
+			 * are the only relkinds for which pg_dump will dump labels).
+			 */
+			if (relation->rd_rel->relkind != RELKIND_RELATION &&
+				relation->rd_rel->relkind != RELKIND_VIEW &&
+				relation->rd_rel->relkind != RELKIND_MATVIEW &&
+				relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE &&
+				relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+				relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+				ereport(ERROR,
+						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+						 errmsg("cannot set security label on relation \"%s\"",
+								RelationGetRelationName(relation)),
+						 errdetail_relkind_not_supported(relation->rd_rel->relkind)));
+			break;
+		default:
+			break;
+	}
+
+	/* Provider gets control here, may throw ERROR to veto new label. */
+	provider->hook(&address, stmt->label);
+
+	/* Apply new label. */
+	SetSecurityLabel(&address, provider->provider_name, stmt->label);
+
+	/*
+	 * If get_object_address() opened the relation for us, we close it to keep
+	 * the reference count correct - but we retain any locks acquired by
+	 * get_object_address() until commit time, to guard against concurrent
+	 * activity.
+	 */
+	if (relation != NULL)
+		relation_close(relation, NoLock);
+
+	return address;
+}
+
+/*
+ * GetSharedSecurityLabel returns the security label for a shared object for
+ * a given provider, or NULL if there is no such label.
+ */
+static char *
+GetSharedSecurityLabel(const ObjectAddress *object, const char *provider)
+{
+	Relation	pg_shseclabel;
+	ScanKeyData keys[3];
+	SysScanDesc scan;
+	HeapTuple	tuple;
+	Datum		datum;
+	bool		isnull;
+	char	   *seclabel = NULL;
+
+	ScanKeyInit(&keys[0],
+				Anum_pg_shseclabel_objoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(object->objectId));
+	ScanKeyInit(&keys[1],
+				Anum_pg_shseclabel_classoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(object->classId));
+	ScanKeyInit(&keys[2],
+				Anum_pg_shseclabel_provider,
+				BTEqualStrategyNumber, F_TEXTEQ,
+				CStringGetTextDatum(provider));
+
+	pg_shseclabel = table_open(SharedSecLabelRelationId, AccessShareLock);
+
+	scan = systable_beginscan(pg_shseclabel, SharedSecLabelObjectIndexId,
+							  criticalSharedRelcachesBuilt, NULL, 3, keys);
+
+	tuple = systable_getnext(scan);
+	if (HeapTupleIsValid(tuple))
+	{
+		datum = heap_getattr(tuple, Anum_pg_shseclabel_label,
+							 RelationGetDescr(pg_shseclabel), &isnull);
+		if (!isnull)
+			seclabel = TextDatumGetCString(datum);
+	}
+	systable_endscan(scan);
+
+	table_close(pg_shseclabel, AccessShareLock);
+
+	return seclabel;
+}
+
+/*
+ * GetSecurityLabel returns the security label for a shared or database object
+ * for a given provider, or NULL if there is no such label.
+ */
+char *
+GetSecurityLabel(const ObjectAddress *object, const char *provider)
+{
+	Relation	pg_seclabel;
+	ScanKeyData keys[4];
+	SysScanDesc scan;
+	HeapTuple	tuple;
+	Datum		datum;
+	bool		isnull;
+	char	   *seclabel = NULL;
+
+	/* Shared objects have their own security label catalog. */
+	if (IsSharedRelation(object->classId))
+		return GetSharedSecurityLabel(object, provider);
+
+	/* Must be an unshared object, so examine pg_seclabel. */
+	ScanKeyInit(&keys[0],
+				Anum_pg_seclabel_objoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(object->objectId));
+	ScanKeyInit(&keys[1],
+				Anum_pg_seclabel_classoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(object->classId));
+	ScanKeyInit(&keys[2],
+				Anum_pg_seclabel_objsubid,
+				BTEqualStrategyNumber, F_INT4EQ,
+				Int32GetDatum(object->objectSubId));
+	ScanKeyInit(&keys[3],
+				Anum_pg_seclabel_provider,
+				BTEqualStrategyNumber, F_TEXTEQ,
+				CStringGetTextDatum(provider));
+
+	pg_seclabel = table_open(SecLabelRelationId, AccessShareLock);
+
+	scan = systable_beginscan(pg_seclabel, SecLabelObjectIndexId, true,
+							  NULL, 4, keys);
+
+	tuple = systable_getnext(scan);
+	if (HeapTupleIsValid(tuple))
+	{
+		datum = heap_getattr(tuple, Anum_pg_seclabel_label,
+							 RelationGetDescr(pg_seclabel), &isnull);
+		if (!isnull)
+			seclabel = TextDatumGetCString(datum);
+	}
+	systable_endscan(scan);
+
+	table_close(pg_seclabel, AccessShareLock);
+
+	return seclabel;
+}
+
+/*
+ * SetSharedSecurityLabel is a helper function of SetSecurityLabel to
+ * handle shared database objects.
+ */
+static void
+SetSharedSecurityLabel(const ObjectAddress *object,
+					   const char *provider, const char *label)
+{
+	Relation	pg_shseclabel;
+	ScanKeyData keys[4];
+	SysScanDesc scan;
+	HeapTuple	oldtup;
+	HeapTuple	newtup = NULL;
+	Datum		values[Natts_pg_shseclabel];
+	bool		nulls[Natts_pg_shseclabel];
+	bool		replaces[Natts_pg_shseclabel];
+
+	/* Prepare to form or update a tuple, if necessary. */
+	memset(nulls, false, sizeof(nulls));
+	memset(replaces, false, sizeof(replaces));
+	values[Anum_pg_shseclabel_objoid - 1] = ObjectIdGetDatum(object->objectId);
+	values[Anum_pg_shseclabel_classoid - 1] = ObjectIdGetDatum(object->classId);
+	values[Anum_pg_shseclabel_provider - 1] = CStringGetTextDatum(provider);
+	if (label != NULL)
+		values[Anum_pg_shseclabel_label - 1] = CStringGetTextDatum(label);
+
+	/* Use the index to search for a matching old tuple */
+	ScanKeyInit(&keys[0],
+				Anum_pg_shseclabel_objoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(object->objectId));
+	ScanKeyInit(&keys[1],
+				Anum_pg_shseclabel_classoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(object->classId));
+	ScanKeyInit(&keys[2],
+				Anum_pg_shseclabel_provider,
+				BTEqualStrategyNumber, F_TEXTEQ,
+				CStringGetTextDatum(provider));
+
+	pg_shseclabel = table_open(SharedSecLabelRelationId, RowExclusiveLock);
+
+	scan = systable_beginscan(pg_shseclabel, SharedSecLabelObjectIndexId, true,
+							  NULL, 3, keys);
+
+	oldtup = systable_getnext(scan);
+	if (HeapTupleIsValid(oldtup))
+	{
+		if (label == NULL)
+			CatalogTupleDelete(pg_shseclabel, &oldtup->t_self);
+		else
+		{
+			replaces[Anum_pg_shseclabel_label - 1] = true;
+			newtup = heap_modify_tuple(oldtup, RelationGetDescr(pg_shseclabel),
+									   values, nulls, replaces);
+			CatalogTupleUpdate(pg_shseclabel, &oldtup->t_self, newtup);
+		}
+	}
+	systable_endscan(scan);
+
+	/* If we didn't find an old tuple, insert a new one */
+	if (newtup == NULL && label != NULL)
+	{
+		newtup = heap_form_tuple(RelationGetDescr(pg_shseclabel),
+								 values, nulls);
+		CatalogTupleInsert(pg_shseclabel, newtup);
+	}
+
+	if (newtup != NULL)
+		heap_freetuple(newtup);
+
+	table_close(pg_shseclabel, RowExclusiveLock);
+}
+
+/*
+ * SetSecurityLabel attempts to set the security label for the specified
+ * provider on the specified object to the given value.  NULL means that any
+ * existing label should be deleted.
+ */
+void
+SetSecurityLabel(const ObjectAddress *object,
+				 const char *provider, const char *label)
+{
+	Relation	pg_seclabel;
+	ScanKeyData keys[4];
+	SysScanDesc scan;
+	HeapTuple	oldtup;
+	HeapTuple	newtup = NULL;
+	Datum		values[Natts_pg_seclabel];
+	bool		nulls[Natts_pg_seclabel];
+	bool		replaces[Natts_pg_seclabel];
+
+	/* Shared objects have their own security label catalog. */
+	if (IsSharedRelation(object->classId))
+	{
+		SetSharedSecurityLabel(object, provider, label);
+		return;
+	}
+
+	/* Prepare to form or update a tuple, if necessary. */
+	memset(nulls, false, sizeof(nulls));
+	memset(replaces, false, sizeof(replaces));
+	values[Anum_pg_seclabel_objoid - 1] = ObjectIdGetDatum(object->objectId);
+	values[Anum_pg_seclabel_classoid - 1] = ObjectIdGetDatum(object->classId);
+	values[Anum_pg_seclabel_objsubid - 1] = Int32GetDatum(object->objectSubId);
+	values[Anum_pg_seclabel_provider - 1] = CStringGetTextDatum(provider);
+	if (label != NULL)
+		values[Anum_pg_seclabel_label - 1] = CStringGetTextDatum(label);
+
+	/* Use the index to search for a matching old tuple */
+	ScanKeyInit(&keys[0],
+				Anum_pg_seclabel_objoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(object->objectId));
+	ScanKeyInit(&keys[1],
+				Anum_pg_seclabel_classoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(object->classId));
+	ScanKeyInit(&keys[2],
+				Anum_pg_seclabel_objsubid,
+				BTEqualStrategyNumber, F_INT4EQ,
+				Int32GetDatum(object->objectSubId));
+	ScanKeyInit(&keys[3],
+				Anum_pg_seclabel_provider,
+				BTEqualStrategyNumber, F_TEXTEQ,
+				CStringGetTextDatum(provider));
+
+	pg_seclabel = table_open(SecLabelRelationId, RowExclusiveLock);
+
+	scan = systable_beginscan(pg_seclabel, SecLabelObjectIndexId, true,
+							  NULL, 4, keys);
+
+	oldtup = systable_getnext(scan);
+	if (HeapTupleIsValid(oldtup))
+	{
+		if (label == NULL)
+			CatalogTupleDelete(pg_seclabel, &oldtup->t_self);
+		else
+		{
+			replaces[Anum_pg_seclabel_label - 1] = true;
+			newtup = heap_modify_tuple(oldtup, RelationGetDescr(pg_seclabel),
+									   values, nulls, replaces);
+			CatalogTupleUpdate(pg_seclabel, &oldtup->t_self, newtup);
+		}
+	}
+	systable_endscan(scan);
+
+	/* If we didn't find an old tuple, insert a new one */
+	if (newtup == NULL && label != NULL)
+	{
+		newtup = heap_form_tuple(RelationGetDescr(pg_seclabel),
+								 values, nulls);
+		CatalogTupleInsert(pg_seclabel, newtup);
+	}
+
+	/* Update indexes, if necessary */
+	if (newtup != NULL)
+		heap_freetuple(newtup);
+
+	table_close(pg_seclabel, RowExclusiveLock);
+}
+
+/*
+ * DeleteSharedSecurityLabel is a helper function of DeleteSecurityLabel
+ * to handle shared database objects.
+ */
+void
+DeleteSharedSecurityLabel(Oid objectId, Oid classId)
+{
+	Relation	pg_shseclabel;
+	ScanKeyData skey[2];
+	SysScanDesc scan;
+	HeapTuple	oldtup;
+
+	ScanKeyInit(&skey[0],
+				Anum_pg_shseclabel_objoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(objectId));
+	ScanKeyInit(&skey[1],
+				Anum_pg_shseclabel_classoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(classId));
+
+	pg_shseclabel = table_open(SharedSecLabelRelationId, RowExclusiveLock);
+
+	scan = systable_beginscan(pg_shseclabel, SharedSecLabelObjectIndexId, true,
+							  NULL, 2, skey);
+	while (HeapTupleIsValid(oldtup = systable_getnext(scan)))
+		CatalogTupleDelete(pg_shseclabel, &oldtup->t_self);
+	systable_endscan(scan);
+
+	table_close(pg_shseclabel, RowExclusiveLock);
+}
+
+/*
+ * DeleteSecurityLabel removes all security labels for an object (and any
+ * sub-objects, if applicable).
+ */
+void
+DeleteSecurityLabel(const ObjectAddress *object)
+{
+	Relation	pg_seclabel;
+	ScanKeyData skey[3];
+	SysScanDesc scan;
+	HeapTuple	oldtup;
+	int			nkeys;
+
+	/* Shared objects have their own security label catalog. */
+	if (IsSharedRelation(object->classId))
+	{
+		Assert(object->objectSubId == 0);
+		DeleteSharedSecurityLabel(object->objectId, object->classId);
+		return;
+	}
+
+	ScanKeyInit(&skey[0],
+				Anum_pg_seclabel_objoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(object->objectId));
+	ScanKeyInit(&skey[1],
+				Anum_pg_seclabel_classoid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(object->classId));
+	if (object->objectSubId != 0)
+	{
+		ScanKeyInit(&skey[2],
+					Anum_pg_seclabel_objsubid,
+					BTEqualStrategyNumber, F_INT4EQ,
+					Int32GetDatum(object->objectSubId));
+		nkeys = 3;
+	}
+	else
+		nkeys = 2;
+
+	pg_seclabel = table_open(SecLabelRelationId, RowExclusiveLock);
+
+	scan = systable_beginscan(pg_seclabel, SecLabelObjectIndexId, true,
+							  NULL, nkeys, skey);
+	while (HeapTupleIsValid(oldtup = systable_getnext(scan)))
+		CatalogTupleDelete(pg_seclabel, &oldtup->t_self);
+	systable_endscan(scan);
+
+	table_close(pg_seclabel, RowExclusiveLock);
+}
+
+void
+register_label_provider(const char *provider_name, check_object_relabel_type hook)
+{
+	LabelProvider *provider;
+	MemoryContext oldcxt;
+
+	oldcxt = MemoryContextSwitchTo(TopMemoryContext);
+	provider = palloc(sizeof(LabelProvider));
+	provider->provider_name = pstrdup(provider_name);
+	provider->hook = hook;
+	label_provider_list = lappend(label_provider_list, provider);
+	MemoryContextSwitchTo(oldcxt);
+}
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
new file mode 100644
index 0000000..acaf660
--- /dev/null
+++ b/src/backend/commands/sequence.c
@@ -0,0 +1,1917 @@
+/*-------------------------------------------------------------------------
+ *
+ * sequence.c
+ *	  PostgreSQL sequences support code.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/sequence.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/bufmask.h"
+#include "access/htup_details.h"
+#include "access/multixact.h"
+#include "access/relation.h"
+#include "access/table.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "access/xloginsert.h"
+#include "access/xlogutils.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_sequence.h"
+#include "catalog/pg_type.h"
+#include "catalog/storage_xlog.h"
+#include "commands/defrem.h"
+#include "commands/sequence.h"
+#include "commands/tablecmds.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "parser/parse_type.h"
+#include "storage/lmgr.h"
+#include "storage/proc.h"
+#include "storage/smgr.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/resowner.h"
+#include "utils/syscache.h"
+#include "utils/varlena.h"
+
+
+/*
+ * We don't want to log each fetching of a value from a sequence,
+ * so we pre-log a few fetches in advance. In the event of
+ * crash we can lose (skip over) as many values as we pre-logged.
+ */
+#define SEQ_LOG_VALS	32
+
+/*
+ * The "special area" of a sequence's buffer page looks like this.
+ */
+#define SEQ_MAGIC	  0x1717
+
+typedef struct sequence_magic
+{
+	uint32		magic;
+} sequence_magic;
+
+/*
+ * We store a SeqTable item for every sequence we have touched in the current
+ * session.  This is needed to hold onto nextval/currval state.  (We can't
+ * rely on the relcache, since it's only, well, a cache, and may decide to
+ * discard entries.)
+ */
+typedef struct SeqTableData
+{
+	Oid			relid;			/* pg_class OID of this sequence (hash key) */
+	Oid			filenode;		/* last seen relfilenode of this sequence */
+	LocalTransactionId lxid;	/* xact in which we last did a seq op */
+	bool		last_valid;		/* do we have a valid "last" value? */
+	int64		last;			/* value last returned by nextval */
+	int64		cached;			/* last value already cached for nextval */
+	/* if last != cached, we have not used up all the cached values */
+	int64		increment;		/* copy of sequence's increment field */
+	/* note that increment is zero until we first do nextval_internal() */
+} SeqTableData;
+
+typedef SeqTableData *SeqTable;
+
+static HTAB *seqhashtab = NULL; /* hash table for SeqTable items */
+
+/*
+ * last_used_seq is updated by nextval() to point to the last used
+ * sequence.
+ */
+static SeqTableData *last_used_seq = NULL;
+
+static void fill_seq_with_data(Relation rel, HeapTuple tuple);
+static void fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum);
+static Relation lock_and_open_sequence(SeqTable seq);
+static void create_seq_hashtable(void);
+static void init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel);
+static Form_pg_sequence_data read_seq_tuple(Relation rel,
+											Buffer *buf, HeapTuple seqdatatuple);
+static void init_params(ParseState *pstate, List *options, bool for_identity,
+						bool isInit,
+						Form_pg_sequence seqform,
+						Form_pg_sequence_data seqdataform,
+						bool *need_seq_rewrite,
+						List **owned_by);
+static void do_setval(Oid relid, int64 next, bool iscalled);
+static void process_owned_by(Relation seqrel, List *owned_by, bool for_identity);
+
+
+/*
+ * DefineSequence
+ *				Creates a new sequence relation
+ */
+ObjectAddress
+DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
+{
+	FormData_pg_sequence seqform;
+	FormData_pg_sequence_data seqdataform;
+	bool		need_seq_rewrite;
+	List	   *owned_by;
+	CreateStmt *stmt = makeNode(CreateStmt);
+	Oid			seqoid;
+	ObjectAddress address;
+	Relation	rel;
+	HeapTuple	tuple;
+	TupleDesc	tupDesc;
+	Datum		value[SEQ_COL_LASTCOL];
+	bool		null[SEQ_COL_LASTCOL];
+	Datum		pgs_values[Natts_pg_sequence];
+	bool		pgs_nulls[Natts_pg_sequence];
+	int			i;
+
+	/*
+	 * If if_not_exists was given and a relation with the same name already
+	 * exists, bail out. (Note: we needn't check this when not if_not_exists,
+	 * because DefineRelation will complain anyway.)
+	 */
+	if (seq->if_not_exists)
+	{
+		RangeVarGetAndCheckCreationNamespace(seq->sequence, NoLock, &seqoid);
+		if (OidIsValid(seqoid))
+		{
+			/*
+			 * If we are in an extension script, insist that the pre-existing
+			 * object be a member of the extension, to avoid security risks.
+			 */
+			ObjectAddressSet(address, RelationRelationId, seqoid);
+			checkMembershipInCurrentExtension(&address);
+
+			/* OK to skip */
+			ereport(NOTICE,
+					(errcode(ERRCODE_DUPLICATE_TABLE),
+					 errmsg("relation \"%s\" already exists, skipping",
+							seq->sequence->relname)));
+			return InvalidObjectAddress;
+		}
+	}
+
+	/* Check and set all option values */
+	init_params(pstate, seq->options, seq->for_identity, true,
+				&seqform, &seqdataform,
+				&need_seq_rewrite, &owned_by);
+
+	/*
+	 * Create relation (and fill value[] and null[] for the tuple)
+	 */
+	stmt->tableElts = NIL;
+	for (i = SEQ_COL_FIRSTCOL; i <= SEQ_COL_LASTCOL; i++)
+	{
+		ColumnDef  *coldef = makeNode(ColumnDef);
+
+		coldef->inhcount = 0;
+		coldef->is_local = true;
+		coldef->is_not_null = true;
+		coldef->is_from_type = false;
+		coldef->storage = 0;
+		coldef->raw_default = NULL;
+		coldef->cooked_default = NULL;
+		coldef->collClause = NULL;
+		coldef->collOid = InvalidOid;
+		coldef->constraints = NIL;
+		coldef->location = -1;
+
+		null[i - 1] = false;
+
+		switch (i)
+		{
+			case SEQ_COL_LASTVAL:
+				coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
+				coldef->colname = "last_value";
+				value[i - 1] = Int64GetDatumFast(seqdataform.last_value);
+				break;
+			case SEQ_COL_LOG:
+				coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
+				coldef->colname = "log_cnt";
+				value[i - 1] = Int64GetDatum((int64) 0);
+				break;
+			case SEQ_COL_CALLED:
+				coldef->typeName = makeTypeNameFromOid(BOOLOID, -1);
+				coldef->colname = "is_called";
+				value[i - 1] = BoolGetDatum(false);
+				break;
+		}
+		stmt->tableElts = lappend(stmt->tableElts, coldef);
+	}
+
+	stmt->relation = seq->sequence;
+	stmt->inhRelations = NIL;
+	stmt->constraints = NIL;
+	stmt->options = NIL;
+	stmt->oncommit = ONCOMMIT_NOOP;
+	stmt->tablespacename = NULL;
+	stmt->if_not_exists = seq->if_not_exists;
+
+	address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL, NULL);
+	seqoid = address.objectId;
+	Assert(seqoid != InvalidOid);
+
+	rel = table_open(seqoid, AccessExclusiveLock);
+	tupDesc = RelationGetDescr(rel);
+
+	/* now initialize the sequence's data */
+	tuple = heap_form_tuple(tupDesc, value, null);
+	fill_seq_with_data(rel, tuple);
+
+	/* process OWNED BY if given */
+	if (owned_by)
+		process_owned_by(rel, owned_by, seq->for_identity);
+
+	table_close(rel, NoLock);
+
+	/* fill in pg_sequence */
+	rel = table_open(SequenceRelationId, RowExclusiveLock);
+	tupDesc = RelationGetDescr(rel);
+
+	memset(pgs_nulls, 0, sizeof(pgs_nulls));
+
+	pgs_values[Anum_pg_sequence_seqrelid - 1] = ObjectIdGetDatum(seqoid);
+	pgs_values[Anum_pg_sequence_seqtypid - 1] = ObjectIdGetDatum(seqform.seqtypid);
+	pgs_values[Anum_pg_sequence_seqstart - 1] = Int64GetDatumFast(seqform.seqstart);
+	pgs_values[Anum_pg_sequence_seqincrement - 1] = Int64GetDatumFast(seqform.seqincrement);
+	pgs_values[Anum_pg_sequence_seqmax - 1] = Int64GetDatumFast(seqform.seqmax);
+	pgs_values[Anum_pg_sequence_seqmin - 1] = Int64GetDatumFast(seqform.seqmin);
+	pgs_values[Anum_pg_sequence_seqcache - 1] = Int64GetDatumFast(seqform.seqcache);
+	pgs_values[Anum_pg_sequence_seqcycle - 1] = BoolGetDatum(seqform.seqcycle);
+
+	tuple = heap_form_tuple(tupDesc, pgs_values, pgs_nulls);
+	CatalogTupleInsert(rel, tuple);
+
+	heap_freetuple(tuple);
+	table_close(rel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * Reset a sequence to its initial value.
+ *
+ * The change is made transactionally, so that on failure of the current
+ * transaction, the sequence will be restored to its previous state.
+ * We do that by creating a whole new relfilenode for the sequence; so this
+ * works much like the rewriting forms of ALTER TABLE.
+ *
+ * Caller is assumed to have acquired AccessExclusiveLock on the sequence,
+ * which must not be released until end of transaction.  Caller is also
+ * responsible for permissions checking.
+ */
+void
+ResetSequence(Oid seq_relid)
+{
+	Relation	seq_rel;
+	SeqTable	elm;
+	Form_pg_sequence_data seq;
+	Buffer		buf;
+	HeapTupleData seqdatatuple;
+	HeapTuple	tuple;
+	HeapTuple	pgstuple;
+	Form_pg_sequence pgsform;
+	int64		startv;
+
+	/*
+	 * Read the old sequence.  This does a bit more work than really
+	 * necessary, but it's simple, and we do want to double-check that it's
+	 * indeed a sequence.
+	 */
+	init_sequence(seq_relid, &elm, &seq_rel);
+	(void) read_seq_tuple(seq_rel, &buf, &seqdatatuple);
+
+	pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(seq_relid));
+	if (!HeapTupleIsValid(pgstuple))
+		elog(ERROR, "cache lookup failed for sequence %u", seq_relid);
+	pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple);
+	startv = pgsform->seqstart;
+	ReleaseSysCache(pgstuple);
+
+	/*
+	 * Copy the existing sequence tuple.
+	 */
+	tuple = heap_copytuple(&seqdatatuple);
+
+	/* Now we're done with the old page */
+	UnlockReleaseBuffer(buf);
+
+	/*
+	 * Modify the copied tuple to execute the restart (compare the RESTART
+	 * action in AlterSequence)
+	 */
+	seq = (Form_pg_sequence_data) GETSTRUCT(tuple);
+	seq->last_value = startv;
+	seq->is_called = false;
+	seq->log_cnt = 0;
+
+	/*
+	 * Create a new storage file for the sequence.
+	 */
+	RelationSetNewRelfilenode(seq_rel, seq_rel->rd_rel->relpersistence);
+
+	/*
+	 * Ensure sequence's relfrozenxid is at 0, since it won't contain any
+	 * unfrozen XIDs.  Same with relminmxid, since a sequence will never
+	 * contain multixacts.
+	 */
+	Assert(seq_rel->rd_rel->relfrozenxid == InvalidTransactionId);
+	Assert(seq_rel->rd_rel->relminmxid == InvalidMultiXactId);
+
+	/*
+	 * Insert the modified tuple into the new storage file.
+	 */
+	fill_seq_with_data(seq_rel, tuple);
+
+	/* Clear local cache so that we don't think we have cached numbers */
+	/* Note that we do not change the currval() state */
+	elm->cached = elm->last;
+
+	relation_close(seq_rel, NoLock);
+}
+
+/*
+ * Initialize a sequence's relation with the specified tuple as content
+ *
+ * This handles unlogged sequences by writing to both the main and the init
+ * fork as necessary.
+ */
+static void
+fill_seq_with_data(Relation rel, HeapTuple tuple)
+{
+	fill_seq_fork_with_data(rel, tuple, MAIN_FORKNUM);
+
+	if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
+	{
+		SMgrRelation srel;
+
+		srel = smgropen(rel->rd_node, InvalidBackendId);
+		smgrcreate(srel, INIT_FORKNUM, false);
+		log_smgrcreate(&rel->rd_node, INIT_FORKNUM);
+		fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM);
+		FlushRelationBuffers(rel);
+		smgrclose(srel);
+	}
+}
+
+/*
+ * Initialize a sequence's relation fork with the specified tuple as content
+ */
+static void
+fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum)
+{
+	Buffer		buf;
+	Page		page;
+	sequence_magic *sm;
+	OffsetNumber offnum;
+
+	/* Initialize first page of relation with special magic number */
+
+	buf = ReadBufferExtended(rel, forkNum, P_NEW, RBM_NORMAL, NULL);
+	Assert(BufferGetBlockNumber(buf) == 0);
+
+	page = BufferGetPage(buf);
+
+	PageInit(page, BufferGetPageSize(buf), sizeof(sequence_magic));
+	sm = (sequence_magic *) PageGetSpecialPointer(page);
+	sm->magic = SEQ_MAGIC;
+
+	/* Now insert sequence tuple */
+
+	LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+
+	/*
+	 * Since VACUUM does not process sequences, we have to force the tuple to
+	 * have xmin = FrozenTransactionId now.  Otherwise it would become
+	 * invisible to SELECTs after 2G transactions.  It is okay to do this
+	 * because if the current transaction aborts, no other xact will ever
+	 * examine the sequence tuple anyway.
+	 */
+	HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId);
+	HeapTupleHeaderSetXminFrozen(tuple->t_data);
+	HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId);
+	HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId);
+	tuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
+	ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber);
+
+	/* check the comment above nextval_internal()'s equivalent call. */
+	if (RelationNeedsWAL(rel))
+		GetTopTransactionId();
+
+	START_CRIT_SECTION();
+
+	MarkBufferDirty(buf);
+
+	offnum = PageAddItem(page, (Item) tuple->t_data, tuple->t_len,
+						 InvalidOffsetNumber, false, false);
+	if (offnum != FirstOffsetNumber)
+		elog(ERROR, "failed to add sequence tuple to page");
+
+	/* XLOG stuff */
+	if (RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM)
+	{
+		xl_seq_rec	xlrec;
+		XLogRecPtr	recptr;
+
+		XLogBeginInsert();
+		XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+
+		xlrec.node = rel->rd_node;
+
+		XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+		XLogRegisterData((char *) tuple->t_data, tuple->t_len);
+
+		recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
+
+		PageSetLSN(page, recptr);
+	}
+
+	END_CRIT_SECTION();
+
+	UnlockReleaseBuffer(buf);
+}
+
+/*
+ * AlterSequence
+ *
+ * Modify the definition of a sequence relation
+ */
+ObjectAddress
+AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
+{
+	Oid			relid;
+	SeqTable	elm;
+	Relation	seqrel;
+	Buffer		buf;
+	HeapTupleData datatuple;
+	Form_pg_sequence seqform;
+	Form_pg_sequence_data newdataform;
+	bool		need_seq_rewrite;
+	List	   *owned_by;
+	ObjectAddress address;
+	Relation	rel;
+	HeapTuple	seqtuple;
+	HeapTuple	newdatatuple;
+
+	/* Open and lock sequence, and check for ownership along the way. */
+	relid = RangeVarGetRelidExtended(stmt->sequence,
+									 ShareRowExclusiveLock,
+									 stmt->missing_ok ? RVR_MISSING_OK : 0,
+									 RangeVarCallbackOwnsRelation,
+									 NULL);
+	if (relid == InvalidOid)
+	{
+		ereport(NOTICE,
+				(errmsg("relation \"%s\" does not exist, skipping",
+						stmt->sequence->relname)));
+		return InvalidObjectAddress;
+	}
+
+	init_sequence(relid, &elm, &seqrel);
+
+	rel = table_open(SequenceRelationId, RowExclusiveLock);
+	seqtuple = SearchSysCacheCopy1(SEQRELID,
+								   ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(seqtuple))
+		elog(ERROR, "cache lookup failed for sequence %u",
+			 relid);
+
+	seqform = (Form_pg_sequence) GETSTRUCT(seqtuple);
+
+	/* lock page's buffer and read tuple into new sequence structure */
+	(void) read_seq_tuple(seqrel, &buf, &datatuple);
+
+	/* copy the existing sequence data tuple, so it can be modified locally */
+	newdatatuple = heap_copytuple(&datatuple);
+	newdataform = (Form_pg_sequence_data) GETSTRUCT(newdatatuple);
+
+	UnlockReleaseBuffer(buf);
+
+	/* Check and set new values */
+	init_params(pstate, stmt->options, stmt->for_identity, false,
+				seqform, newdataform,
+				&need_seq_rewrite, &owned_by);
+
+	/* Clear local cache so that we don't think we have cached numbers */
+	/* Note that we do not change the currval() state */
+	elm->cached = elm->last;
+
+	/* If needed, rewrite the sequence relation itself */
+	if (need_seq_rewrite)
+	{
+		/* check the comment above nextval_internal()'s equivalent call. */
+		if (RelationNeedsWAL(seqrel))
+			GetTopTransactionId();
+
+		/*
+		 * Create a new storage file for the sequence, making the state
+		 * changes transactional.
+		 */
+		RelationSetNewRelfilenode(seqrel, seqrel->rd_rel->relpersistence);
+
+		/*
+		 * Ensure sequence's relfrozenxid is at 0, since it won't contain any
+		 * unfrozen XIDs.  Same with relminmxid, since a sequence will never
+		 * contain multixacts.
+		 */
+		Assert(seqrel->rd_rel->relfrozenxid == InvalidTransactionId);
+		Assert(seqrel->rd_rel->relminmxid == InvalidMultiXactId);
+
+		/*
+		 * Insert the modified tuple into the new storage file.
+		 */
+		fill_seq_with_data(seqrel, newdatatuple);
+	}
+
+	/* process OWNED BY if given */
+	if (owned_by)
+		process_owned_by(seqrel, owned_by, stmt->for_identity);
+
+	/* update the pg_sequence tuple (we could skip this in some cases...) */
+	CatalogTupleUpdate(rel, &seqtuple->t_self, seqtuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId, relid, 0);
+
+	ObjectAddressSet(address, RelationRelationId, relid);
+
+	table_close(rel, RowExclusiveLock);
+	relation_close(seqrel, NoLock);
+
+	return address;
+}
+
+void
+SequenceChangePersistence(Oid relid, char newrelpersistence)
+{
+	SeqTable	elm;
+	Relation	seqrel;
+	Buffer		buf;
+	HeapTupleData seqdatatuple;
+
+	init_sequence(relid, &elm, &seqrel);
+
+	/* check the comment above nextval_internal()'s equivalent call. */
+	if (RelationNeedsWAL(seqrel))
+		GetTopTransactionId();
+
+	(void) read_seq_tuple(seqrel, &buf, &seqdatatuple);
+	RelationSetNewRelfilenode(seqrel, newrelpersistence);
+	fill_seq_with_data(seqrel, &seqdatatuple);
+	UnlockReleaseBuffer(buf);
+
+	relation_close(seqrel, NoLock);
+}
+
+void
+DeleteSequenceTuple(Oid relid)
+{
+	Relation	rel;
+	HeapTuple	tuple;
+
+	rel = table_open(SequenceRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for sequence %u", relid);
+
+	CatalogTupleDelete(rel, &tuple->t_self);
+
+	ReleaseSysCache(tuple);
+	table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Note: nextval with a text argument is no longer exported as a pg_proc
+ * entry, but we keep it around to ease porting of C code that may have
+ * called the function directly.
+ */
+Datum
+nextval(PG_FUNCTION_ARGS)
+{
+	text	   *seqin = PG_GETARG_TEXT_PP(0);
+	RangeVar   *sequence;
+	Oid			relid;
+
+	sequence = makeRangeVarFromNameList(textToQualifiedNameList(seqin));
+
+	/*
+	 * XXX: This is not safe in the presence of concurrent DDL, but acquiring
+	 * a lock here is more expensive than letting nextval_internal do it,
+	 * since the latter maintains a cache that keeps us from hitting the lock
+	 * manager more than once per transaction.  It's not clear whether the
+	 * performance penalty is material in practice, but for now, we do it this
+	 * way.
+	 */
+	relid = RangeVarGetRelid(sequence, NoLock, false);
+
+	PG_RETURN_INT64(nextval_internal(relid, true));
+}
+
+Datum
+nextval_oid(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+
+	PG_RETURN_INT64(nextval_internal(relid, true));
+}
+
+int64
+nextval_internal(Oid relid, bool check_permissions)
+{
+	SeqTable	elm;
+	Relation	seqrel;
+	Buffer		buf;
+	Page		page;
+	HeapTuple	pgstuple;
+	Form_pg_sequence pgsform;
+	HeapTupleData seqdatatuple;
+	Form_pg_sequence_data seq;
+	int64		incby,
+				maxv,
+				minv,
+				cache,
+				log,
+				fetch,
+				last;
+	int64		result,
+				next,
+				rescnt = 0;
+	bool		cycle;
+	bool		logit = false;
+
+	/* open and lock sequence */
+	init_sequence(relid, &elm, &seqrel);
+
+	if (check_permissions &&
+		pg_class_aclcheck(elm->relid, GetUserId(),
+						  ACL_USAGE | ACL_UPDATE) != ACLCHECK_OK)
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied for sequence %s",
+						RelationGetRelationName(seqrel))));
+
+	/* read-only transactions may only modify temp sequences */
+	if (!seqrel->rd_islocaltemp)
+		PreventCommandIfReadOnly("nextval()");
+
+	/*
+	 * Forbid this during parallel operation because, to make it work, the
+	 * cooperating backends would need to share the backend-local cached
+	 * sequence information.  Currently, we don't support that.
+	 */
+	PreventCommandIfParallelMode("nextval()");
+
+	if (elm->last != elm->cached)	/* some numbers were cached */
+	{
+		Assert(elm->last_valid);
+		Assert(elm->increment != 0);
+		elm->last += elm->increment;
+		relation_close(seqrel, NoLock);
+		last_used_seq = elm;
+		return elm->last;
+	}
+
+	pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(pgstuple))
+		elog(ERROR, "cache lookup failed for sequence %u", relid);
+	pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple);
+	incby = pgsform->seqincrement;
+	maxv = pgsform->seqmax;
+	minv = pgsform->seqmin;
+	cache = pgsform->seqcache;
+	cycle = pgsform->seqcycle;
+	ReleaseSysCache(pgstuple);
+
+	/* lock page' buffer and read tuple */
+	seq = read_seq_tuple(seqrel, &buf, &seqdatatuple);
+	page = BufferGetPage(buf);
+
+	elm->increment = incby;
+	last = next = result = seq->last_value;
+	fetch = cache;
+	log = seq->log_cnt;
+
+	if (!seq->is_called)
+	{
+		rescnt++;				/* return last_value if not is_called */
+		fetch--;
+	}
+
+	/*
+	 * Decide whether we should emit a WAL log record.  If so, force up the
+	 * fetch count to grab SEQ_LOG_VALS more values than we actually need to
+	 * cache.  (These will then be usable without logging.)
+	 *
+	 * If this is the first nextval after a checkpoint, we must force a new
+	 * WAL record to be written anyway, else replay starting from the
+	 * checkpoint would fail to advance the sequence past the logged values.
+	 * In this case we may as well fetch extra values.
+	 */
+	if (log < fetch || !seq->is_called)
+	{
+		/* forced log to satisfy local demand for values */
+		fetch = log = fetch + SEQ_LOG_VALS;
+		logit = true;
+	}
+	else
+	{
+		XLogRecPtr	redoptr = GetRedoRecPtr();
+
+		if (PageGetLSN(page) <= redoptr)
+		{
+			/* last update of seq was before checkpoint */
+			fetch = log = fetch + SEQ_LOG_VALS;
+			logit = true;
+		}
+	}
+
+	while (fetch)				/* try to fetch cache [+ log ] numbers */
+	{
+		/*
+		 * Check MAXVALUE for ascending sequences and MINVALUE for descending
+		 * sequences
+		 */
+		if (incby > 0)
+		{
+			/* ascending sequence */
+			if ((maxv >= 0 && next > maxv - incby) ||
+				(maxv < 0 && next + incby > maxv))
+			{
+				if (rescnt > 0)
+					break;		/* stop fetching */
+				if (!cycle)
+					ereport(ERROR,
+							(errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED),
+							 errmsg("nextval: reached maximum value of sequence \"%s\" (%lld)",
+									RelationGetRelationName(seqrel),
+									(long long) maxv)));
+				next = minv;
+			}
+			else
+				next += incby;
+		}
+		else
+		{
+			/* descending sequence */
+			if ((minv < 0 && next < minv - incby) ||
+				(minv >= 0 && next + incby < minv))
+			{
+				if (rescnt > 0)
+					break;		/* stop fetching */
+				if (!cycle)
+					ereport(ERROR,
+							(errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED),
+							 errmsg("nextval: reached minimum value of sequence \"%s\" (%lld)",
+									RelationGetRelationName(seqrel),
+									(long long) minv)));
+				next = maxv;
+			}
+			else
+				next += incby;
+		}
+		fetch--;
+		if (rescnt < cache)
+		{
+			log--;
+			rescnt++;
+			last = next;
+			if (rescnt == 1)	/* if it's first result - */
+				result = next;	/* it's what to return */
+		}
+	}
+
+	log -= fetch;				/* adjust for any unfetched numbers */
+	Assert(log >= 0);
+
+	/* save info in local cache */
+	elm->last = result;			/* last returned number */
+	elm->cached = last;			/* last fetched number */
+	elm->last_valid = true;
+
+	last_used_seq = elm;
+
+	/*
+	 * If something needs to be WAL logged, acquire an xid, so this
+	 * transaction's commit will trigger a WAL flush and wait for syncrep.
+	 * It's sufficient to ensure the toplevel transaction has an xid, no need
+	 * to assign xids subxacts, that'll already trigger an appropriate wait.
+	 * (Have to do that here, so we're outside the critical section)
+	 */
+	if (logit && RelationNeedsWAL(seqrel))
+		GetTopTransactionId();
+
+	/* ready to change the on-disk (or really, in-buffer) tuple */
+	START_CRIT_SECTION();
+
+	/*
+	 * We must mark the buffer dirty before doing XLogInsert(); see notes in
+	 * SyncOneBuffer().  However, we don't apply the desired changes just yet.
+	 * This looks like a violation of the buffer update protocol, but it is in
+	 * fact safe because we hold exclusive lock on the buffer.  Any other
+	 * process, including a checkpoint, that tries to examine the buffer
+	 * contents will block until we release the lock, and then will see the
+	 * final state that we install below.
+	 */
+	MarkBufferDirty(buf);
+
+	/* XLOG stuff */
+	if (logit && RelationNeedsWAL(seqrel))
+	{
+		xl_seq_rec	xlrec;
+		XLogRecPtr	recptr;
+
+		/*
+		 * We don't log the current state of the tuple, but rather the state
+		 * as it would appear after "log" more fetches.  This lets us skip
+		 * that many future WAL records, at the cost that we lose those
+		 * sequence values if we crash.
+		 */
+		XLogBeginInsert();
+		XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+
+		/* set values that will be saved in xlog */
+		seq->last_value = next;
+		seq->is_called = true;
+		seq->log_cnt = 0;
+
+		xlrec.node = seqrel->rd_node;
+
+		XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+		XLogRegisterData((char *) seqdatatuple.t_data, seqdatatuple.t_len);
+
+		recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
+
+		PageSetLSN(page, recptr);
+	}
+
+	/* Now update sequence tuple to the intended final state */
+	seq->last_value = last;		/* last fetched number */
+	seq->is_called = true;
+	seq->log_cnt = log;			/* how much is logged */
+
+	END_CRIT_SECTION();
+
+	UnlockReleaseBuffer(buf);
+
+	relation_close(seqrel, NoLock);
+
+	return result;
+}
+
+Datum
+currval_oid(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	int64		result;
+	SeqTable	elm;
+	Relation	seqrel;
+
+	/* open and lock sequence */
+	init_sequence(relid, &elm, &seqrel);
+
+	if (pg_class_aclcheck(elm->relid, GetUserId(),
+						  ACL_SELECT | ACL_USAGE) != ACLCHECK_OK)
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied for sequence %s",
+						RelationGetRelationName(seqrel))));
+
+	if (!elm->last_valid)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("currval of sequence \"%s\" is not yet defined in this session",
+						RelationGetRelationName(seqrel))));
+
+	result = elm->last;
+
+	relation_close(seqrel, NoLock);
+
+	PG_RETURN_INT64(result);
+}
+
+Datum
+lastval(PG_FUNCTION_ARGS)
+{
+	Relation	seqrel;
+	int64		result;
+
+	if (last_used_seq == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("lastval is not yet defined in this session")));
+
+	/* Someone may have dropped the sequence since the last nextval() */
+	if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(last_used_seq->relid)))
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("lastval is not yet defined in this session")));
+
+	seqrel = lock_and_open_sequence(last_used_seq);
+
+	/* nextval() must have already been called for this sequence */
+	Assert(last_used_seq->last_valid);
+
+	if (pg_class_aclcheck(last_used_seq->relid, GetUserId(),
+						  ACL_SELECT | ACL_USAGE) != ACLCHECK_OK)
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied for sequence %s",
+						RelationGetRelationName(seqrel))));
+
+	result = last_used_seq->last;
+	relation_close(seqrel, NoLock);
+
+	PG_RETURN_INT64(result);
+}
+
+/*
+ * Main internal procedure that handles 2 & 3 arg forms of SETVAL.
+ *
+ * Note that the 3 arg version (which sets the is_called flag) is
+ * only for use in pg_dump, and setting the is_called flag may not
+ * work if multiple users are attached to the database and referencing
+ * the sequence (unlikely if pg_dump is restoring it).
+ *
+ * It is necessary to have the 3 arg version so that pg_dump can
+ * restore the state of a sequence exactly during data-only restores -
+ * it is the only way to clear the is_called flag in an existing
+ * sequence.
+ */
+static void
+do_setval(Oid relid, int64 next, bool iscalled)
+{
+	SeqTable	elm;
+	Relation	seqrel;
+	Buffer		buf;
+	HeapTupleData seqdatatuple;
+	Form_pg_sequence_data seq;
+	HeapTuple	pgstuple;
+	Form_pg_sequence pgsform;
+	int64		maxv,
+				minv;
+
+	/* open and lock sequence */
+	init_sequence(relid, &elm, &seqrel);
+
+	if (pg_class_aclcheck(elm->relid, GetUserId(), ACL_UPDATE) != ACLCHECK_OK)
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied for sequence %s",
+						RelationGetRelationName(seqrel))));
+
+	pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(pgstuple))
+		elog(ERROR, "cache lookup failed for sequence %u", relid);
+	pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple);
+	maxv = pgsform->seqmax;
+	minv = pgsform->seqmin;
+	ReleaseSysCache(pgstuple);
+
+	/* read-only transactions may only modify temp sequences */
+	if (!seqrel->rd_islocaltemp)
+		PreventCommandIfReadOnly("setval()");
+
+	/*
+	 * Forbid this during parallel operation because, to make it work, the
+	 * cooperating backends would need to share the backend-local cached
+	 * sequence information.  Currently, we don't support that.
+	 */
+	PreventCommandIfParallelMode("setval()");
+
+	/* lock page' buffer and read tuple */
+	seq = read_seq_tuple(seqrel, &buf, &seqdatatuple);
+
+	if ((next < minv) || (next > maxv))
+		ereport(ERROR,
+				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+				 errmsg("setval: value %lld is out of bounds for sequence \"%s\" (%lld..%lld)",
+						(long long) next, RelationGetRelationName(seqrel),
+						(long long) minv, (long long) maxv)));
+
+	/* Set the currval() state only if iscalled = true */
+	if (iscalled)
+	{
+		elm->last = next;		/* last returned number */
+		elm->last_valid = true;
+	}
+
+	/* In any case, forget any future cached numbers */
+	elm->cached = elm->last;
+
+	/* check the comment above nextval_internal()'s equivalent call. */
+	if (RelationNeedsWAL(seqrel))
+		GetTopTransactionId();
+
+	/* ready to change the on-disk (or really, in-buffer) tuple */
+	START_CRIT_SECTION();
+
+	seq->last_value = next;		/* last fetched number */
+	seq->is_called = iscalled;
+	seq->log_cnt = 0;
+
+	MarkBufferDirty(buf);
+
+	/* XLOG stuff */
+	if (RelationNeedsWAL(seqrel))
+	{
+		xl_seq_rec	xlrec;
+		XLogRecPtr	recptr;
+		Page		page = BufferGetPage(buf);
+
+		XLogBeginInsert();
+		XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+
+		xlrec.node = seqrel->rd_node;
+		XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+		XLogRegisterData((char *) seqdatatuple.t_data, seqdatatuple.t_len);
+
+		recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
+
+		PageSetLSN(page, recptr);
+	}
+
+	END_CRIT_SECTION();
+
+	UnlockReleaseBuffer(buf);
+
+	relation_close(seqrel, NoLock);
+}
+
+/*
+ * Implement the 2 arg setval procedure.
+ * See do_setval for discussion.
+ */
+Datum
+setval_oid(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	int64		next = PG_GETARG_INT64(1);
+
+	do_setval(relid, next, true);
+
+	PG_RETURN_INT64(next);
+}
+
+/*
+ * Implement the 3 arg setval procedure.
+ * See do_setval for discussion.
+ */
+Datum
+setval3_oid(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	int64		next = PG_GETARG_INT64(1);
+	bool		iscalled = PG_GETARG_BOOL(2);
+
+	do_setval(relid, next, iscalled);
+
+	PG_RETURN_INT64(next);
+}
+
+
+/*
+ * Open the sequence and acquire lock if needed
+ *
+ * If we haven't touched the sequence already in this transaction,
+ * we need to acquire a lock.  We arrange for the lock to
+ * be owned by the top transaction, so that we don't need to do it
+ * more than once per xact.
+ */
+static Relation
+lock_and_open_sequence(SeqTable seq)
+{
+	LocalTransactionId thislxid = MyProc->lxid;
+
+	/* Get the lock if not already held in this xact */
+	if (seq->lxid != thislxid)
+	{
+		ResourceOwner currentOwner;
+
+		currentOwner = CurrentResourceOwner;
+		CurrentResourceOwner = TopTransactionResourceOwner;
+
+		LockRelationOid(seq->relid, RowExclusiveLock);
+
+		CurrentResourceOwner = currentOwner;
+
+		/* Flag that we have a lock in the current xact */
+		seq->lxid = thislxid;
+	}
+
+	/* We now know we have the lock, and can safely open the rel */
+	return relation_open(seq->relid, NoLock);
+}
+
+/*
+ * Creates the hash table for storing sequence data
+ */
+static void
+create_seq_hashtable(void)
+{
+	HASHCTL		ctl;
+
+	ctl.keysize = sizeof(Oid);
+	ctl.entrysize = sizeof(SeqTableData);
+
+	seqhashtab = hash_create("Sequence values", 16, &ctl,
+							 HASH_ELEM | HASH_BLOBS);
+}
+
+/*
+ * Given a relation OID, open and lock the sequence.  p_elm and p_rel are
+ * output parameters.
+ */
+static void
+init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel)
+{
+	SeqTable	elm;
+	Relation	seqrel;
+	bool		found;
+
+	/* Find or create a hash table entry for this sequence */
+	if (seqhashtab == NULL)
+		create_seq_hashtable();
+
+	elm = (SeqTable) hash_search(seqhashtab, &relid, HASH_ENTER, &found);
+
+	/*
+	 * Initialize the new hash table entry if it did not exist already.
+	 *
+	 * NOTE: seqhashtab entries are stored for the life of a backend (unless
+	 * explicitly discarded with DISCARD). If the sequence itself is deleted
+	 * then the entry becomes wasted memory, but it's small enough that this
+	 * should not matter.
+	 */
+	if (!found)
+	{
+		/* relid already filled in */
+		elm->filenode = InvalidOid;
+		elm->lxid = InvalidLocalTransactionId;
+		elm->last_valid = false;
+		elm->last = elm->cached = 0;
+	}
+
+	/*
+	 * Open the sequence relation.
+	 */
+	seqrel = lock_and_open_sequence(elm);
+
+	if (seqrel->rd_rel->relkind != RELKIND_SEQUENCE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a sequence",
+						RelationGetRelationName(seqrel))));
+
+	/*
+	 * If the sequence has been transactionally replaced since we last saw it,
+	 * discard any cached-but-unissued values.  We do not touch the currval()
+	 * state, however.
+	 */
+	if (seqrel->rd_rel->relfilenode != elm->filenode)
+	{
+		elm->filenode = seqrel->rd_rel->relfilenode;
+		elm->cached = elm->last;
+	}
+
+	/* Return results */
+	*p_elm = elm;
+	*p_rel = seqrel;
+}
+
+
+/*
+ * Given an opened sequence relation, lock the page buffer and find the tuple
+ *
+ * *buf receives the reference to the pinned-and-ex-locked buffer
+ * *seqdatatuple receives the reference to the sequence tuple proper
+ *		(this arg should point to a local variable of type HeapTupleData)
+ *
+ * Function's return value points to the data payload of the tuple
+ */
+static Form_pg_sequence_data
+read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple)
+{
+	Page		page;
+	ItemId		lp;
+	sequence_magic *sm;
+	Form_pg_sequence_data seq;
+
+	*buf = ReadBuffer(rel, 0);
+	LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
+
+	page = BufferGetPage(*buf);
+	sm = (sequence_magic *) PageGetSpecialPointer(page);
+
+	if (sm->magic != SEQ_MAGIC)
+		elog(ERROR, "bad magic number in sequence \"%s\": %08X",
+			 RelationGetRelationName(rel), sm->magic);
+
+	lp = PageGetItemId(page, FirstOffsetNumber);
+	Assert(ItemIdIsNormal(lp));
+
+	/* Note we currently only bother to set these two fields of *seqdatatuple */
+	seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
+	seqdatatuple->t_len = ItemIdGetLength(lp);
+
+	/*
+	 * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on
+	 * a sequence, which would leave a non-frozen XID in the sequence tuple's
+	 * xmax, which eventually leads to clog access failures or worse. If we
+	 * see this has happened, clean up after it.  We treat this like a hint
+	 * bit update, ie, don't bother to WAL-log it, since we can certainly do
+	 * this again if the update gets lost.
+	 */
+	Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
+	if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId)
+	{
+		HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId);
+		seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED;
+		seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
+		MarkBufferDirtyHint(*buf, true);
+	}
+
+	seq = (Form_pg_sequence_data) GETSTRUCT(seqdatatuple);
+
+	return seq;
+}
+
+/*
+ * init_params: process the options list of CREATE or ALTER SEQUENCE, and
+ * store the values into appropriate fields of seqform, for changes that go
+ * into the pg_sequence catalog, and fields of seqdataform for changes to the
+ * sequence relation itself.  Set *need_seq_rewrite to true if we changed any
+ * parameters that require rewriting the sequence's relation (interesting for
+ * ALTER SEQUENCE).  Also set *owned_by to any OWNED BY option, or to NIL if
+ * there is none.
+ *
+ * If isInit is true, fill any unspecified options with default values;
+ * otherwise, do not change existing options that aren't explicitly overridden.
+ *
+ * Note: we force a sequence rewrite whenever we change parameters that affect
+ * generation of future sequence values, even if the seqdataform per se is not
+ * changed.  This allows ALTER SEQUENCE to behave transactionally.  Currently,
+ * the only option that doesn't cause that is OWNED BY.  It's *necessary* for
+ * ALTER SEQUENCE OWNED BY to not rewrite the sequence, because that would
+ * break pg_upgrade by causing unwanted changes in the sequence's relfilenode.
+ */
+static void
+init_params(ParseState *pstate, List *options, bool for_identity,
+			bool isInit,
+			Form_pg_sequence seqform,
+			Form_pg_sequence_data seqdataform,
+			bool *need_seq_rewrite,
+			List **owned_by)
+{
+	DefElem    *as_type = NULL;
+	DefElem    *start_value = NULL;
+	DefElem    *restart_value = NULL;
+	DefElem    *increment_by = NULL;
+	DefElem    *max_value = NULL;
+	DefElem    *min_value = NULL;
+	DefElem    *cache_value = NULL;
+	DefElem    *is_cycled = NULL;
+	ListCell   *option;
+	bool		reset_max_value = false;
+	bool		reset_min_value = false;
+
+	*need_seq_rewrite = false;
+	*owned_by = NIL;
+
+	foreach(option, options)
+	{
+		DefElem    *defel = (DefElem *) lfirst(option);
+
+		if (strcmp(defel->defname, "as") == 0)
+		{
+			if (as_type)
+				errorConflictingDefElem(defel, pstate);
+			as_type = defel;
+			*need_seq_rewrite = true;
+		}
+		else if (strcmp(defel->defname, "increment") == 0)
+		{
+			if (increment_by)
+				errorConflictingDefElem(defel, pstate);
+			increment_by = defel;
+			*need_seq_rewrite = true;
+		}
+		else if (strcmp(defel->defname, "start") == 0)
+		{
+			if (start_value)
+				errorConflictingDefElem(defel, pstate);
+			start_value = defel;
+			*need_seq_rewrite = true;
+		}
+		else if (strcmp(defel->defname, "restart") == 0)
+		{
+			if (restart_value)
+				errorConflictingDefElem(defel, pstate);
+			restart_value = defel;
+			*need_seq_rewrite = true;
+		}
+		else if (strcmp(defel->defname, "maxvalue") == 0)
+		{
+			if (max_value)
+				errorConflictingDefElem(defel, pstate);
+			max_value = defel;
+			*need_seq_rewrite = true;
+		}
+		else if (strcmp(defel->defname, "minvalue") == 0)
+		{
+			if (min_value)
+				errorConflictingDefElem(defel, pstate);
+			min_value = defel;
+			*need_seq_rewrite = true;
+		}
+		else if (strcmp(defel->defname, "cache") == 0)
+		{
+			if (cache_value)
+				errorConflictingDefElem(defel, pstate);
+			cache_value = defel;
+			*need_seq_rewrite = true;
+		}
+		else if (strcmp(defel->defname, "cycle") == 0)
+		{
+			if (is_cycled)
+				errorConflictingDefElem(defel, pstate);
+			is_cycled = defel;
+			*need_seq_rewrite = true;
+		}
+		else if (strcmp(defel->defname, "owned_by") == 0)
+		{
+			if (*owned_by)
+				errorConflictingDefElem(defel, pstate);
+			*owned_by = defGetQualifiedName(defel);
+		}
+		else if (strcmp(defel->defname, "sequence_name") == 0)
+		{
+			/*
+			 * The parser allows this, but it is only for identity columns, in
+			 * which case it is filtered out in parse_utilcmd.c.  We only get
+			 * here if someone puts it into a CREATE SEQUENCE.
+			 */
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("invalid sequence option SEQUENCE NAME"),
+					 parser_errposition(pstate, defel->location)));
+		}
+		else
+			elog(ERROR, "option \"%s\" not recognized",
+				 defel->defname);
+	}
+
+	/*
+	 * We must reset log_cnt when isInit or when changing any parameters that
+	 * would affect future nextval allocations.
+	 */
+	if (isInit)
+		seqdataform->log_cnt = 0;
+
+	/* AS type */
+	if (as_type != NULL)
+	{
+		Oid			newtypid = typenameTypeId(pstate, defGetTypeName(as_type));
+
+		if (newtypid != INT2OID &&
+			newtypid != INT4OID &&
+			newtypid != INT8OID)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 for_identity
+					 ? errmsg("identity column type must be smallint, integer, or bigint")
+					 : errmsg("sequence type must be smallint, integer, or bigint")));
+
+		if (!isInit)
+		{
+			/*
+			 * When changing type and the old sequence min/max values were the
+			 * min/max of the old type, adjust sequence min/max values to
+			 * min/max of new type.  (Otherwise, the user chose explicit
+			 * min/max values, which we'll leave alone.)
+			 */
+			if ((seqform->seqtypid == INT2OID && seqform->seqmax == PG_INT16_MAX) ||
+				(seqform->seqtypid == INT4OID && seqform->seqmax == PG_INT32_MAX) ||
+				(seqform->seqtypid == INT8OID && seqform->seqmax == PG_INT64_MAX))
+				reset_max_value = true;
+			if ((seqform->seqtypid == INT2OID && seqform->seqmin == PG_INT16_MIN) ||
+				(seqform->seqtypid == INT4OID && seqform->seqmin == PG_INT32_MIN) ||
+				(seqform->seqtypid == INT8OID && seqform->seqmin == PG_INT64_MIN))
+				reset_min_value = true;
+		}
+
+		seqform->seqtypid = newtypid;
+	}
+	else if (isInit)
+	{
+		seqform->seqtypid = INT8OID;
+	}
+
+	/* INCREMENT BY */
+	if (increment_by != NULL)
+	{
+		seqform->seqincrement = defGetInt64(increment_by);
+		if (seqform->seqincrement == 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("INCREMENT must not be zero")));
+		seqdataform->log_cnt = 0;
+	}
+	else if (isInit)
+	{
+		seqform->seqincrement = 1;
+	}
+
+	/* CYCLE */
+	if (is_cycled != NULL)
+	{
+		seqform->seqcycle = boolVal(is_cycled->arg);
+		Assert(BoolIsValid(seqform->seqcycle));
+		seqdataform->log_cnt = 0;
+	}
+	else if (isInit)
+	{
+		seqform->seqcycle = false;
+	}
+
+	/* MAXVALUE (null arg means NO MAXVALUE) */
+	if (max_value != NULL && max_value->arg)
+	{
+		seqform->seqmax = defGetInt64(max_value);
+		seqdataform->log_cnt = 0;
+	}
+	else if (isInit || max_value != NULL || reset_max_value)
+	{
+		if (seqform->seqincrement > 0 || reset_max_value)
+		{
+			/* ascending seq */
+			if (seqform->seqtypid == INT2OID)
+				seqform->seqmax = PG_INT16_MAX;
+			else if (seqform->seqtypid == INT4OID)
+				seqform->seqmax = PG_INT32_MAX;
+			else
+				seqform->seqmax = PG_INT64_MAX;
+		}
+		else
+			seqform->seqmax = -1;	/* descending seq */
+		seqdataform->log_cnt = 0;
+	}
+
+	/* Validate maximum value.  No need to check INT8 as seqmax is an int64 */
+	if ((seqform->seqtypid == INT2OID && (seqform->seqmax < PG_INT16_MIN || seqform->seqmax > PG_INT16_MAX))
+		|| (seqform->seqtypid == INT4OID && (seqform->seqmax < PG_INT32_MIN || seqform->seqmax > PG_INT32_MAX)))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("MAXVALUE (%lld) is out of range for sequence data type %s",
+						(long long) seqform->seqmax,
+						format_type_be(seqform->seqtypid))));
+
+	/* MINVALUE (null arg means NO MINVALUE) */
+	if (min_value != NULL && min_value->arg)
+	{
+		seqform->seqmin = defGetInt64(min_value);
+		seqdataform->log_cnt = 0;
+	}
+	else if (isInit || min_value != NULL || reset_min_value)
+	{
+		if (seqform->seqincrement < 0 || reset_min_value)
+		{
+			/* descending seq */
+			if (seqform->seqtypid == INT2OID)
+				seqform->seqmin = PG_INT16_MIN;
+			else if (seqform->seqtypid == INT4OID)
+				seqform->seqmin = PG_INT32_MIN;
+			else
+				seqform->seqmin = PG_INT64_MIN;
+		}
+		else
+			seqform->seqmin = 1;	/* ascending seq */
+		seqdataform->log_cnt = 0;
+	}
+
+	/* Validate minimum value.  No need to check INT8 as seqmin is an int64 */
+	if ((seqform->seqtypid == INT2OID && (seqform->seqmin < PG_INT16_MIN || seqform->seqmin > PG_INT16_MAX))
+		|| (seqform->seqtypid == INT4OID && (seqform->seqmin < PG_INT32_MIN || seqform->seqmin > PG_INT32_MAX)))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("MINVALUE (%lld) is out of range for sequence data type %s",
+						(long long) seqform->seqmin,
+						format_type_be(seqform->seqtypid))));
+
+	/* crosscheck min/max */
+	if (seqform->seqmin >= seqform->seqmax)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("MINVALUE (%lld) must be less than MAXVALUE (%lld)",
+						(long long) seqform->seqmin,
+						(long long) seqform->seqmax)));
+
+	/* START WITH */
+	if (start_value != NULL)
+	{
+		seqform->seqstart = defGetInt64(start_value);
+	}
+	else if (isInit)
+	{
+		if (seqform->seqincrement > 0)
+			seqform->seqstart = seqform->seqmin;	/* ascending seq */
+		else
+			seqform->seqstart = seqform->seqmax;	/* descending seq */
+	}
+
+	/* crosscheck START */
+	if (seqform->seqstart < seqform->seqmin)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("START value (%lld) cannot be less than MINVALUE (%lld)",
+						(long long) seqform->seqstart,
+						(long long) seqform->seqmin)));
+	if (seqform->seqstart > seqform->seqmax)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("START value (%lld) cannot be greater than MAXVALUE (%lld)",
+						(long long) seqform->seqstart,
+						(long long) seqform->seqmax)));
+
+	/* RESTART [WITH] */
+	if (restart_value != NULL)
+	{
+		if (restart_value->arg != NULL)
+			seqdataform->last_value = defGetInt64(restart_value);
+		else
+			seqdataform->last_value = seqform->seqstart;
+		seqdataform->is_called = false;
+		seqdataform->log_cnt = 0;
+	}
+	else if (isInit)
+	{
+		seqdataform->last_value = seqform->seqstart;
+		seqdataform->is_called = false;
+	}
+
+	/* crosscheck RESTART (or current value, if changing MIN/MAX) */
+	if (seqdataform->last_value < seqform->seqmin)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("RESTART value (%lld) cannot be less than MINVALUE (%lld)",
+						(long long) seqdataform->last_value,
+						(long long) seqform->seqmin)));
+	if (seqdataform->last_value > seqform->seqmax)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("RESTART value (%lld) cannot be greater than MAXVALUE (%lld)",
+						(long long) seqdataform->last_value,
+						(long long) seqform->seqmax)));
+
+	/* CACHE */
+	if (cache_value != NULL)
+	{
+		seqform->seqcache = defGetInt64(cache_value);
+		if (seqform->seqcache <= 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("CACHE (%lld) must be greater than zero",
+							(long long) seqform->seqcache)));
+		seqdataform->log_cnt = 0;
+	}
+	else if (isInit)
+	{
+		seqform->seqcache = 1;
+	}
+}
+
+/*
+ * Process an OWNED BY option for CREATE/ALTER SEQUENCE
+ *
+ * Ownership permissions on the sequence are already checked,
+ * but if we are establishing a new owned-by dependency, we must
+ * enforce that the referenced table has the same owner and namespace
+ * as the sequence.
+ */
+static void
+process_owned_by(Relation seqrel, List *owned_by, bool for_identity)
+{
+	DependencyType deptype;
+	int			nnames;
+	Relation	tablerel;
+	AttrNumber	attnum;
+
+	deptype = for_identity ? DEPENDENCY_INTERNAL : DEPENDENCY_AUTO;
+
+	nnames = list_length(owned_by);
+	Assert(nnames > 0);
+	if (nnames == 1)
+	{
+		/* Must be OWNED BY NONE */
+		if (strcmp(strVal(linitial(owned_by)), "none") != 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("invalid OWNED BY option"),
+					 errhint("Specify OWNED BY table.column or OWNED BY NONE.")));
+		tablerel = NULL;
+		attnum = 0;
+	}
+	else
+	{
+		List	   *relname;
+		char	   *attrname;
+		RangeVar   *rel;
+
+		/* Separate relname and attr name */
+		relname = list_truncate(list_copy(owned_by), nnames - 1);
+		attrname = strVal(llast(owned_by));
+
+		/* Open and lock rel to ensure it won't go away meanwhile */
+		rel = makeRangeVarFromNameList(relname);
+		tablerel = relation_openrv(rel, AccessShareLock);
+
+		/* Must be a regular or foreign table */
+		if (!(tablerel->rd_rel->relkind == RELKIND_RELATION ||
+			  tablerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE ||
+			  tablerel->rd_rel->relkind == RELKIND_VIEW ||
+			  tablerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE))
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("sequence cannot be owned by relation \"%s\"",
+							RelationGetRelationName(tablerel)),
+					 errdetail_relkind_not_supported(tablerel->rd_rel->relkind)));
+
+		/* We insist on same owner and schema */
+		if (seqrel->rd_rel->relowner != tablerel->rd_rel->relowner)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("sequence must have same owner as table it is linked to")));
+		if (RelationGetNamespace(seqrel) != RelationGetNamespace(tablerel))
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("sequence must be in same schema as table it is linked to")));
+
+		/* Now, fetch the attribute number from the system cache */
+		attnum = get_attnum(RelationGetRelid(tablerel), attrname);
+		if (attnum == InvalidAttrNumber)
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column \"%s\" of relation \"%s\" does not exist",
+							attrname, RelationGetRelationName(tablerel))));
+	}
+
+	/*
+	 * Catch user explicitly running OWNED BY on identity sequence.
+	 */
+	if (deptype == DEPENDENCY_AUTO)
+	{
+		Oid			tableId;
+		int32		colId;
+
+		if (sequenceIsOwned(RelationGetRelid(seqrel), DEPENDENCY_INTERNAL, &tableId, &colId))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot change ownership of identity sequence"),
+					 errdetail("Sequence \"%s\" is linked to table \"%s\".",
+							   RelationGetRelationName(seqrel),
+							   get_rel_name(tableId))));
+	}
+
+	/*
+	 * OK, we are ready to update pg_depend.  First remove any existing
+	 * dependencies for the sequence, then optionally add a new one.
+	 */
+	deleteDependencyRecordsForClass(RelationRelationId, RelationGetRelid(seqrel),
+									RelationRelationId, deptype);
+
+	if (tablerel)
+	{
+		ObjectAddress refobject,
+					depobject;
+
+		refobject.classId = RelationRelationId;
+		refobject.objectId = RelationGetRelid(tablerel);
+		refobject.objectSubId = attnum;
+		depobject.classId = RelationRelationId;
+		depobject.objectId = RelationGetRelid(seqrel);
+		depobject.objectSubId = 0;
+		recordDependencyOn(&depobject, &refobject, deptype);
+	}
+
+	/* Done, but hold lock until commit */
+	if (tablerel)
+		relation_close(tablerel, NoLock);
+}
+
+
+/*
+ * Return sequence parameters in a list of the form created by the parser.
+ */
+List *
+sequence_options(Oid relid)
+{
+	HeapTuple	pgstuple;
+	Form_pg_sequence pgsform;
+	List	   *options = NIL;
+
+	pgstuple = SearchSysCache1(SEQRELID, relid);
+	if (!HeapTupleIsValid(pgstuple))
+		elog(ERROR, "cache lookup failed for sequence %u", relid);
+	pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple);
+
+	/* Use makeFloat() for 64-bit integers, like gram.y does. */
+	options = lappend(options,
+					  makeDefElem("cache", (Node *) makeFloat(psprintf(INT64_FORMAT, pgsform->seqcache)), -1));
+	options = lappend(options,
+					  makeDefElem("cycle", (Node *) makeBoolean(pgsform->seqcycle), -1));
+	options = lappend(options,
+					  makeDefElem("increment", (Node *) makeFloat(psprintf(INT64_FORMAT, pgsform->seqincrement)), -1));
+	options = lappend(options,
+					  makeDefElem("maxvalue", (Node *) makeFloat(psprintf(INT64_FORMAT, pgsform->seqmax)), -1));
+	options = lappend(options,
+					  makeDefElem("minvalue", (Node *) makeFloat(psprintf(INT64_FORMAT, pgsform->seqmin)), -1));
+	options = lappend(options,
+					  makeDefElem("start", (Node *) makeFloat(psprintf(INT64_FORMAT, pgsform->seqstart)), -1));
+
+	ReleaseSysCache(pgstuple);
+
+	return options;
+}
+
+/*
+ * Return sequence parameters (formerly for use by information schema)
+ */
+Datum
+pg_sequence_parameters(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	TupleDesc	tupdesc;
+	Datum		values[7];
+	bool		isnull[7];
+	HeapTuple	pgstuple;
+	Form_pg_sequence pgsform;
+
+	if (pg_class_aclcheck(relid, GetUserId(), ACL_SELECT | ACL_UPDATE | ACL_USAGE) != ACLCHECK_OK)
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied for sequence %s",
+						get_rel_name(relid))));
+
+	tupdesc = CreateTemplateTupleDesc(7);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "start_value",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "minimum_value",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "maximum_value",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 4, "increment",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 5, "cycle_option",
+					   BOOLOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 6, "cache_size",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 7, "data_type",
+					   OIDOID, -1, 0);
+
+	BlessTupleDesc(tupdesc);
+
+	memset(isnull, 0, sizeof(isnull));
+
+	pgstuple = SearchSysCache1(SEQRELID, relid);
+	if (!HeapTupleIsValid(pgstuple))
+		elog(ERROR, "cache lookup failed for sequence %u", relid);
+	pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple);
+
+	values[0] = Int64GetDatum(pgsform->seqstart);
+	values[1] = Int64GetDatum(pgsform->seqmin);
+	values[2] = Int64GetDatum(pgsform->seqmax);
+	values[3] = Int64GetDatum(pgsform->seqincrement);
+	values[4] = BoolGetDatum(pgsform->seqcycle);
+	values[5] = Int64GetDatum(pgsform->seqcache);
+	values[6] = ObjectIdGetDatum(pgsform->seqtypid);
+
+	ReleaseSysCache(pgstuple);
+
+	return HeapTupleGetDatum(heap_form_tuple(tupdesc, values, isnull));
+}
+
+/*
+ * Return the last value from the sequence
+ *
+ * Note: This has a completely different meaning than lastval().
+ */
+Datum
+pg_sequence_last_value(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	SeqTable	elm;
+	Relation	seqrel;
+	Buffer		buf;
+	HeapTupleData seqtuple;
+	Form_pg_sequence_data seq;
+	bool		is_called;
+	int64		result;
+
+	/* open and lock sequence */
+	init_sequence(relid, &elm, &seqrel);
+
+	if (pg_class_aclcheck(relid, GetUserId(), ACL_SELECT | ACL_USAGE) != ACLCHECK_OK)
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied for sequence %s",
+						RelationGetRelationName(seqrel))));
+
+	seq = read_seq_tuple(seqrel, &buf, &seqtuple);
+
+	is_called = seq->is_called;
+	result = seq->last_value;
+
+	UnlockReleaseBuffer(buf);
+	relation_close(seqrel, NoLock);
+
+	if (is_called)
+		PG_RETURN_INT64(result);
+	else
+		PG_RETURN_NULL();
+}
+
+
+void
+seq_redo(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	uint8		info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+	Buffer		buffer;
+	Page		page;
+	Page		localpage;
+	char	   *item;
+	Size		itemsz;
+	xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record);
+	sequence_magic *sm;
+
+	if (info != XLOG_SEQ_LOG)
+		elog(PANIC, "seq_redo: unknown op code %u", info);
+
+	buffer = XLogInitBufferForRedo(record, 0);
+	page = (Page) BufferGetPage(buffer);
+
+	/*
+	 * We always reinit the page.  However, since this WAL record type is also
+	 * used for updating sequences, it's possible that a hot-standby backend
+	 * is examining the page concurrently; so we mustn't transiently trash the
+	 * buffer.  The solution is to build the correct new page contents in
+	 * local workspace and then memcpy into the buffer.  Then only bytes that
+	 * are supposed to change will change, even transiently. We must palloc
+	 * the local page for alignment reasons.
+	 */
+	localpage = (Page) palloc(BufferGetPageSize(buffer));
+
+	PageInit(localpage, BufferGetPageSize(buffer), sizeof(sequence_magic));
+	sm = (sequence_magic *) PageGetSpecialPointer(localpage);
+	sm->magic = SEQ_MAGIC;
+
+	item = (char *) xlrec + sizeof(xl_seq_rec);
+	itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_rec);
+
+	if (PageAddItem(localpage, (Item) item, itemsz,
+					FirstOffsetNumber, false, false) == InvalidOffsetNumber)
+		elog(PANIC, "seq_redo: failed to add item to page");
+
+	PageSetLSN(localpage, lsn);
+
+	memcpy(page, localpage, BufferGetPageSize(buffer));
+	MarkBufferDirty(buffer);
+	UnlockReleaseBuffer(buffer);
+
+	pfree(localpage);
+}
+
+/*
+ * Flush cached sequence information.
+ */
+void
+ResetSequenceCaches(void)
+{
+	if (seqhashtab)
+	{
+		hash_destroy(seqhashtab);
+		seqhashtab = NULL;
+	}
+
+	last_used_seq = NULL;
+}
+
+/*
+ * Mask a Sequence page before performing consistency checks on it.
+ */
+void
+seq_mask(char *page, BlockNumber blkno)
+{
+	mask_page_lsn_and_checksum(page);
+
+	mask_unused_space(page);
+}
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
new file mode 100644
index 0000000..f442d85
--- /dev/null
+++ b/src/backend/commands/statscmds.c
@@ -0,0 +1,898 @@
+/*-------------------------------------------------------------------------
+ *
+ * statscmds.c
+ *	  Commands for creating and altering extended statistics objects
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/statscmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/relation.h"
+#include "access/relscan.h"
+#include "access/table.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_statistic_ext.h"
+#include "catalog/pg_statistic_ext_data.h"
+#include "commands/comment.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
+#include "statistics/statistics.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+
+static char *ChooseExtendedStatisticName(const char *name1, const char *name2,
+										 const char *label, Oid namespaceid);
+static char *ChooseExtendedStatisticNameAddition(List *exprs);
+
+
+/* qsort comparator for the attnums in CreateStatistics */
+static int
+compare_int16(const void *a, const void *b)
+{
+	int			av = *(const int16 *) a;
+	int			bv = *(const int16 *) b;
+
+	/* this can't overflow if int is wider than int16 */
+	return (av - bv);
+}
+
+/*
+ *		CREATE STATISTICS
+ */
+ObjectAddress
+CreateStatistics(CreateStatsStmt *stmt)
+{
+	int16		attnums[STATS_MAX_DIMENSIONS];
+	int			nattnums = 0;
+	int			numcols;
+	char	   *namestr;
+	NameData	stxname;
+	Oid			statoid;
+	Oid			namespaceId;
+	Oid			stxowner = GetUserId();
+	HeapTuple	htup;
+	Datum		values[Natts_pg_statistic_ext];
+	bool		nulls[Natts_pg_statistic_ext];
+	int2vector *stxkeys;
+	List	   *stxexprs = NIL;
+	Datum		exprsDatum;
+	Relation	statrel;
+	Relation	rel = NULL;
+	Oid			relid;
+	ObjectAddress parentobject,
+				myself;
+	Datum		types[4];		/* one for each possible type of statistic */
+	int			ntypes;
+	ArrayType  *stxkind;
+	bool		build_ndistinct;
+	bool		build_dependencies;
+	bool		build_mcv;
+	bool		build_expressions;
+	bool		requested_type = false;
+	int			i;
+	ListCell   *cell;
+	ListCell   *cell2;
+
+	Assert(IsA(stmt, CreateStatsStmt));
+
+	/*
+	 * Examine the FROM clause.  Currently, we only allow it to be a single
+	 * simple table, but later we'll probably allow multiple tables and JOIN
+	 * syntax.  The grammar is already prepared for that, so we have to check
+	 * here that what we got is what we can support.
+	 */
+	if (list_length(stmt->relations) != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("only a single relation is allowed in CREATE STATISTICS")));
+
+	foreach(cell, stmt->relations)
+	{
+		Node	   *rln = (Node *) lfirst(cell);
+
+		if (!IsA(rln, RangeVar))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("only a single relation is allowed in CREATE STATISTICS")));
+
+		/*
+		 * CREATE STATISTICS will influence future execution plans but does
+		 * not interfere with currently executing plans.  So it should be
+		 * enough to take only ShareUpdateExclusiveLock on relation,
+		 * conflicting with ANALYZE and other DDL that sets statistical
+		 * information, but not with normal queries.
+		 */
+		rel = relation_openrv((RangeVar *) rln, ShareUpdateExclusiveLock);
+
+		/* Restrict to allowed relation types */
+		if (rel->rd_rel->relkind != RELKIND_RELATION &&
+			rel->rd_rel->relkind != RELKIND_MATVIEW &&
+			rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+			rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot define statistics for relation \"%s\"",
+							RelationGetRelationName(rel)),
+					 errdetail_relkind_not_supported(rel->rd_rel->relkind)));
+
+		/* You must own the relation to create stats on it */
+		if (!pg_class_ownercheck(RelationGetRelid(rel), stxowner))
+			aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind),
+						   RelationGetRelationName(rel));
+
+		/* Creating statistics on system catalogs is not allowed */
+		if (!allowSystemTableMods && IsSystemRelation(rel))
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("permission denied: \"%s\" is a system catalog",
+							RelationGetRelationName(rel))));
+	}
+
+	Assert(rel);
+	relid = RelationGetRelid(rel);
+
+	/*
+	 * If the node has a name, split it up and determine creation namespace.
+	 * If not (a possibility not considered by the grammar, but one which can
+	 * occur via the "CREATE TABLE ... (LIKE)" command), then we put the
+	 * object in the same namespace as the relation, and cons up a name for
+	 * it.
+	 */
+	if (stmt->defnames)
+		namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames,
+														&namestr);
+	else
+	{
+		namespaceId = RelationGetNamespace(rel);
+		namestr = ChooseExtendedStatisticName(RelationGetRelationName(rel),
+											  ChooseExtendedStatisticNameAddition(stmt->exprs),
+											  "stat",
+											  namespaceId);
+	}
+	namestrcpy(&stxname, namestr);
+
+	/*
+	 * Deal with the possibility that the statistics object already exists.
+	 */
+	if (SearchSysCacheExists2(STATEXTNAMENSP,
+							  CStringGetDatum(namestr),
+							  ObjectIdGetDatum(namespaceId)))
+	{
+		if (stmt->if_not_exists)
+		{
+			/*
+			 * Since stats objects aren't members of extensions (see comments
+			 * below), no need for checkMembershipInCurrentExtension here.
+			 */
+			ereport(NOTICE,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("statistics object \"%s\" already exists, skipping",
+							namestr)));
+			relation_close(rel, NoLock);
+			return InvalidObjectAddress;
+		}
+
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("statistics object \"%s\" already exists", namestr)));
+	}
+
+	/*
+	 * Make sure no more than STATS_MAX_DIMENSIONS columns are used. There
+	 * might be duplicates and so on, but we'll deal with those later.
+	 */
+	numcols = list_length(stmt->exprs);
+	if (numcols > STATS_MAX_DIMENSIONS)
+		ereport(ERROR,
+				(errcode(ERRCODE_TOO_MANY_COLUMNS),
+				 errmsg("cannot have more than %d columns in statistics",
+						STATS_MAX_DIMENSIONS)));
+
+	/*
+	 * Convert the expression list to a simple array of attnums, but also keep
+	 * a list of more complex expressions.  While at it, enforce some
+	 * constraints - we don't allow extended statistics on system attributes,
+	 * and we require the data type to have a less-than operator.
+	 *
+	 * There are many ways to "mask" a simple attribute reference as an
+	 * expression, for example "(a+0)" etc. We can't possibly detect all of
+	 * them, but we handle at least the simple case with the attribute in
+	 * parens. There'll always be a way around this, if the user is determined
+	 * (like the "(a+0)" example), but this makes it somewhat consistent with
+	 * how indexes treat attributes/expressions.
+	 */
+	foreach(cell, stmt->exprs)
+	{
+		StatsElem  *selem = lfirst_node(StatsElem, cell);
+
+		if (selem->name)		/* column reference */
+		{
+			char	   *attname;
+			HeapTuple	atttuple;
+			Form_pg_attribute attForm;
+			TypeCacheEntry *type;
+
+			attname = selem->name;
+
+			atttuple = SearchSysCacheAttName(relid, attname);
+			if (!HeapTupleIsValid(atttuple))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_COLUMN),
+						 errmsg("column \"%s\" does not exist",
+								attname)));
+			attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
+
+			/* Disallow use of system attributes in extended stats */
+			if (attForm->attnum <= 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("statistics creation on system columns is not supported")));
+
+			/* Disallow data types without a less-than operator */
+			type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
+			if (type->lt_opr == InvalidOid)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
+								attname, format_type_be(attForm->atttypid))));
+
+			attnums[nattnums] = attForm->attnum;
+			nattnums++;
+			ReleaseSysCache(atttuple);
+		}
+		else if (IsA(selem->expr, Var)) /* column reference in parens */
+		{
+			Var		   *var = (Var *) selem->expr;
+			TypeCacheEntry *type;
+
+			/* Disallow use of system attributes in extended stats */
+			if (var->varattno <= 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("statistics creation on system columns is not supported")));
+
+			/* Disallow data types without a less-than operator */
+			type = lookup_type_cache(var->vartype, TYPECACHE_LT_OPR);
+			if (type->lt_opr == InvalidOid)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
+								get_attname(relid, var->varattno, false), format_type_be(var->vartype))));
+
+			attnums[nattnums] = var->varattno;
+			nattnums++;
+		}
+		else					/* expression */
+		{
+			Node	   *expr = selem->expr;
+			Oid			atttype;
+			TypeCacheEntry *type;
+			Bitmapset  *attnums = NULL;
+			int			k;
+
+			Assert(expr != NULL);
+
+			/* Disallow expressions referencing system attributes. */
+			pull_varattnos(expr, 1, &attnums);
+
+			k = -1;
+			while ((k = bms_next_member(attnums, k)) >= 0)
+			{
+				AttrNumber	attnum = k + FirstLowInvalidHeapAttributeNumber;
+
+				if (attnum <= 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("statistics creation on system columns is not supported")));
+			}
+
+			/*
+			 * Disallow data types without a less-than operator.
+			 *
+			 * We ignore this for statistics on a single expression, in which
+			 * case we'll build the regular statistics only (and that code can
+			 * deal with such data types).
+			 */
+			if (list_length(stmt->exprs) > 1)
+			{
+				atttype = exprType(expr);
+				type = lookup_type_cache(atttype, TYPECACHE_LT_OPR);
+				if (type->lt_opr == InvalidOid)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("expression cannot be used in multivariate statistics because its type %s has no default btree operator class",
+									format_type_be(atttype))));
+			}
+
+			stxexprs = lappend(stxexprs, expr);
+		}
+	}
+
+	/*
+	 * Parse the statistics kinds.
+	 *
+	 * First check that if this is the case with a single expression, there
+	 * are no statistics kinds specified (we don't allow that for the simple
+	 * CREATE STATISTICS form).
+	 */
+	if ((list_length(stmt->exprs) == 1) && (list_length(stxexprs) == 1))
+	{
+		/* statistics kinds not specified */
+		if (list_length(stmt->stat_types) > 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("when building statistics on a single expression, statistics kinds may not be specified")));
+	}
+
+	/* OK, let's check that we recognize the statistics kinds. */
+	build_ndistinct = false;
+	build_dependencies = false;
+	build_mcv = false;
+	foreach(cell, stmt->stat_types)
+	{
+		char	   *type = strVal(lfirst(cell));
+
+		if (strcmp(type, "ndistinct") == 0)
+		{
+			build_ndistinct = true;
+			requested_type = true;
+		}
+		else if (strcmp(type, "dependencies") == 0)
+		{
+			build_dependencies = true;
+			requested_type = true;
+		}
+		else if (strcmp(type, "mcv") == 0)
+		{
+			build_mcv = true;
+			requested_type = true;
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized statistics kind \"%s\"",
+							type)));
+	}
+
+	/*
+	 * If no statistic type was specified, build them all (but only when the
+	 * statistics is defined on more than one column/expression).
+	 */
+	if ((!requested_type) && (numcols >= 2))
+	{
+		build_ndistinct = true;
+		build_dependencies = true;
+		build_mcv = true;
+	}
+
+	/*
+	 * When there are non-trivial expressions, build the expression stats
+	 * automatically. This allows calculating good estimates for stats that
+	 * consider per-clause estimates (e.g. functional dependencies).
+	 */
+	build_expressions = (list_length(stxexprs) > 0);
+
+	/*
+	 * Check that at least two columns were specified in the statement, or
+	 * that we're building statistics on a single expression.
+	 */
+	if ((numcols < 2) && (list_length(stxexprs) != 1))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("extended statistics require at least 2 columns")));
+
+	/*
+	 * Sort the attnums, which makes detecting duplicates somewhat easier, and
+	 * it does not hurt (it does not matter for the contents, unlike for
+	 * indexes, for example).
+	 */
+	qsort(attnums, nattnums, sizeof(int16), compare_int16);
+
+	/*
+	 * Check for duplicates in the list of columns. The attnums are sorted so
+	 * just check consecutive elements.
+	 */
+	for (i = 1; i < nattnums; i++)
+	{
+		if (attnums[i] == attnums[i - 1])
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_COLUMN),
+					 errmsg("duplicate column name in statistics definition")));
+	}
+
+	/*
+	 * Check for duplicate expressions. We do two loops, counting the
+	 * occurrences of each expression. This is O(N^2) but we only allow small
+	 * number of expressions and it's not executed often.
+	 *
+	 * XXX We don't cross-check attributes and expressions, because it does
+	 * not seem worth it. In principle we could check that expressions don't
+	 * contain trivial attribute references like "(a)", but the reasoning is
+	 * similar to why we don't bother with extracting columns from
+	 * expressions. It's either expensive or very easy to defeat for
+	 * determined user, and there's no risk if we allow such statistics (the
+	 * statistics is useless, but harmless).
+	 */
+	foreach(cell, stxexprs)
+	{
+		Node	   *expr1 = (Node *) lfirst(cell);
+		int			cnt = 0;
+
+		foreach(cell2, stxexprs)
+		{
+			Node	   *expr2 = (Node *) lfirst(cell2);
+
+			if (equal(expr1, expr2))
+				cnt += 1;
+		}
+
+		/* every expression should find at least itself */
+		Assert(cnt >= 1);
+
+		if (cnt > 1)
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_COLUMN),
+					 errmsg("duplicate expression in statistics definition")));
+	}
+
+	/* Form an int2vector representation of the sorted column list */
+	stxkeys = buildint2vector(attnums, nattnums);
+
+	/* construct the char array of enabled statistic types */
+	ntypes = 0;
+	if (build_ndistinct)
+		types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT);
+	if (build_dependencies)
+		types[ntypes++] = CharGetDatum(STATS_EXT_DEPENDENCIES);
+	if (build_mcv)
+		types[ntypes++] = CharGetDatum(STATS_EXT_MCV);
+	if (build_expressions)
+		types[ntypes++] = CharGetDatum(STATS_EXT_EXPRESSIONS);
+	Assert(ntypes > 0 && ntypes <= lengthof(types));
+	stxkind = construct_array(types, ntypes, CHAROID, 1, true, TYPALIGN_CHAR);
+
+	/* convert the expressions (if any) to a text datum */
+	if (stxexprs != NIL)
+	{
+		char	   *exprsString;
+
+		exprsString = nodeToString(stxexprs);
+		exprsDatum = CStringGetTextDatum(exprsString);
+		pfree(exprsString);
+	}
+	else
+		exprsDatum = (Datum) 0;
+
+	statrel = table_open(StatisticExtRelationId, RowExclusiveLock);
+
+	/*
+	 * Everything seems fine, so let's build the pg_statistic_ext tuple.
+	 */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	statoid = GetNewOidWithIndex(statrel, StatisticExtOidIndexId,
+								 Anum_pg_statistic_ext_oid);
+	values[Anum_pg_statistic_ext_oid - 1] = ObjectIdGetDatum(statoid);
+	values[Anum_pg_statistic_ext_stxrelid - 1] = ObjectIdGetDatum(relid);
+	values[Anum_pg_statistic_ext_stxname - 1] = NameGetDatum(&stxname);
+	values[Anum_pg_statistic_ext_stxnamespace - 1] = ObjectIdGetDatum(namespaceId);
+	values[Anum_pg_statistic_ext_stxstattarget - 1] = Int32GetDatum(-1);
+	values[Anum_pg_statistic_ext_stxowner - 1] = ObjectIdGetDatum(stxowner);
+	values[Anum_pg_statistic_ext_stxkeys - 1] = PointerGetDatum(stxkeys);
+	values[Anum_pg_statistic_ext_stxkind - 1] = PointerGetDatum(stxkind);
+
+	values[Anum_pg_statistic_ext_stxexprs - 1] = exprsDatum;
+	if (exprsDatum == (Datum) 0)
+		nulls[Anum_pg_statistic_ext_stxexprs - 1] = true;
+
+	/* insert it into pg_statistic_ext */
+	htup = heap_form_tuple(statrel->rd_att, values, nulls);
+	CatalogTupleInsert(statrel, htup);
+	heap_freetuple(htup);
+
+	relation_close(statrel, RowExclusiveLock);
+
+	/*
+	 * We used to create the pg_statistic_ext_data tuple too, but it's not
+	 * clear what value should the stxdinherit flag have (it depends on
+	 * whether the rel is partitioned, contains data, etc.)
+	 */
+
+	InvokeObjectPostCreateHook(StatisticExtRelationId, statoid, 0);
+
+	/*
+	 * Invalidate relcache so that others see the new statistics object.
+	 */
+	CacheInvalidateRelcache(rel);
+
+	relation_close(rel, NoLock);
+
+	/*
+	 * Add an AUTO dependency on each column used in the stats, so that the
+	 * stats object goes away if any or all of them get dropped.
+	 */
+	ObjectAddressSet(myself, StatisticExtRelationId, statoid);
+
+	/* add dependencies for plain column references */
+	for (i = 0; i < nattnums; i++)
+	{
+		ObjectAddressSubSet(parentobject, RelationRelationId, relid, attnums[i]);
+		recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
+	}
+
+	/*
+	 * If there are no dependencies on a column, give the statistics object an
+	 * auto dependency on the whole table.  In most cases, this will be
+	 * redundant, but it might not be if the statistics expressions contain no
+	 * Vars (which might seem strange but possible). This is consistent with
+	 * what we do for indexes in index_create.
+	 *
+	 * XXX We intentionally don't consider the expressions before adding this
+	 * dependency, because recordDependencyOnSingleRelExpr may not create any
+	 * dependencies for whole-row Vars.
+	 */
+	if (!nattnums)
+	{
+		ObjectAddressSet(parentobject, RelationRelationId, relid);
+		recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
+	}
+
+	/*
+	 * Store dependencies on anything mentioned in statistics expressions,
+	 * just like we do for index expressions.
+	 */
+	if (stxexprs)
+		recordDependencyOnSingleRelExpr(&myself,
+										(Node *) stxexprs,
+										relid,
+										DEPENDENCY_NORMAL,
+										DEPENDENCY_AUTO, false);
+
+	/*
+	 * Also add dependencies on namespace and owner.  These are required
+	 * because the stats object might have a different namespace and/or owner
+	 * than the underlying table(s).
+	 */
+	ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId);
+	recordDependencyOn(&myself, &parentobject, DEPENDENCY_NORMAL);
+
+	recordDependencyOnOwner(StatisticExtRelationId, statoid, stxowner);
+
+	/*
+	 * XXX probably there should be a recordDependencyOnCurrentExtension call
+	 * here too, but we'd have to add support for ALTER EXTENSION ADD/DROP
+	 * STATISTICS, which is more work than it seems worth.
+	 */
+
+	/* Add any requested comment */
+	if (stmt->stxcomment != NULL)
+		CreateComments(statoid, StatisticExtRelationId, 0,
+					   stmt->stxcomment);
+
+	/* Return stats object's address */
+	return myself;
+}
+
+/*
+ *		ALTER STATISTICS
+ */
+ObjectAddress
+AlterStatistics(AlterStatsStmt *stmt)
+{
+	Relation	rel;
+	Oid			stxoid;
+	HeapTuple	oldtup;
+	HeapTuple	newtup;
+	Datum		repl_val[Natts_pg_statistic_ext];
+	bool		repl_null[Natts_pg_statistic_ext];
+	bool		repl_repl[Natts_pg_statistic_ext];
+	ObjectAddress address;
+	int			newtarget = stmt->stxstattarget;
+
+	/* Limit statistics target to a sane range */
+	if (newtarget < -1)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("statistics target %d is too low",
+						newtarget)));
+	}
+	else if (newtarget > 10000)
+	{
+		newtarget = 10000;
+		ereport(WARNING,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("lowering statistics target to %d",
+						newtarget)));
+	}
+
+	/* lookup OID of the statistics object */
+	stxoid = get_statistics_object_oid(stmt->defnames, stmt->missing_ok);
+
+	/*
+	 * If we got here and the OID is not valid, it means the statistics object
+	 * does not exist, but the command specified IF EXISTS. So report this as
+	 * a simple NOTICE and we're done.
+	 */
+	if (!OidIsValid(stxoid))
+	{
+		char	   *schemaname;
+		char	   *statname;
+
+		Assert(stmt->missing_ok);
+
+		DeconstructQualifiedName(stmt->defnames, &schemaname, &statname);
+
+		if (schemaname)
+			ereport(NOTICE,
+					(errmsg("statistics object \"%s.%s\" does not exist, skipping",
+							schemaname, statname)));
+		else
+			ereport(NOTICE,
+					(errmsg("statistics object \"%s\" does not exist, skipping",
+							statname)));
+
+		return InvalidObjectAddress;
+	}
+
+	/* Search pg_statistic_ext */
+	rel = table_open(StatisticExtRelationId, RowExclusiveLock);
+
+	oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(stxoid));
+	if (!HeapTupleIsValid(oldtup))
+		elog(ERROR, "cache lookup failed for extended statistics object %u", stxoid);
+
+	/* Must be owner of the existing statistics object */
+	if (!pg_statistics_object_ownercheck(stxoid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_STATISTIC_EXT,
+					   NameListToString(stmt->defnames));
+
+	/* Build new tuple. */
+	memset(repl_val, 0, sizeof(repl_val));
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+
+	/* replace the stxstattarget column */
+	repl_repl[Anum_pg_statistic_ext_stxstattarget - 1] = true;
+	repl_val[Anum_pg_statistic_ext_stxstattarget - 1] = Int32GetDatum(newtarget);
+
+	newtup = heap_modify_tuple(oldtup, RelationGetDescr(rel),
+							   repl_val, repl_null, repl_repl);
+
+	/* Update system catalog. */
+	CatalogTupleUpdate(rel, &newtup->t_self, newtup);
+
+	InvokeObjectPostAlterHook(StatisticExtRelationId, stxoid, 0);
+
+	ObjectAddressSet(address, StatisticExtRelationId, stxoid);
+
+	/*
+	 * NOTE: because we only support altering the statistics target, not the
+	 * other fields, there is no need to update dependencies.
+	 */
+
+	heap_freetuple(newtup);
+	ReleaseSysCache(oldtup);
+
+	table_close(rel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * Delete entry in pg_statistic_ext_data catalog. We don't know if the row
+ * exists, so don't error out.
+ */
+void
+RemoveStatisticsDataById(Oid statsOid, bool inh)
+{
+	Relation	relation;
+	HeapTuple	tup;
+
+	relation = table_open(StatisticExtDataRelationId, RowExclusiveLock);
+
+	tup = SearchSysCache2(STATEXTDATASTXOID, ObjectIdGetDatum(statsOid),
+						  BoolGetDatum(inh));
+
+	/* We don't know if the data row for inh value exists. */
+	if (HeapTupleIsValid(tup))
+	{
+		CatalogTupleDelete(relation, &tup->t_self);
+
+		ReleaseSysCache(tup);
+	}
+
+	table_close(relation, RowExclusiveLock);
+}
+
+/*
+ * Guts of statistics object deletion.
+ */
+void
+RemoveStatisticsById(Oid statsOid)
+{
+	Relation	relation;
+	HeapTuple	tup;
+	Form_pg_statistic_ext statext;
+	Oid			relid;
+
+	/*
+	 * First delete the pg_statistic_ext_data tuples holding the actual
+	 * statistical data. There might be data with/without inheritance, so
+	 * attempt deleting both.
+	 */
+	RemoveStatisticsDataById(statsOid, true);
+	RemoveStatisticsDataById(statsOid, false);
+
+	/*
+	 * Delete the pg_statistic_ext tuple.  Also send out a cache inval on the
+	 * associated table, so that dependent plans will be rebuilt.
+	 */
+	relation = table_open(StatisticExtRelationId, RowExclusiveLock);
+
+	tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid));
+
+	if (!HeapTupleIsValid(tup)) /* should not happen */
+		elog(ERROR, "cache lookup failed for statistics object %u", statsOid);
+
+	statext = (Form_pg_statistic_ext) GETSTRUCT(tup);
+	relid = statext->stxrelid;
+
+	CacheInvalidateRelcacheByRelid(relid);
+
+	CatalogTupleDelete(relation, &tup->t_self);
+
+	ReleaseSysCache(tup);
+
+	table_close(relation, RowExclusiveLock);
+}
+
+/*
+ * Select a nonconflicting name for a new statistics object.
+ *
+ * name1, name2, and label are used the same way as for makeObjectName(),
+ * except that the label can't be NULL; digits will be appended to the label
+ * if needed to create a name that is unique within the specified namespace.
+ *
+ * Returns a palloc'd string.
+ *
+ * Note: it is theoretically possible to get a collision anyway, if someone
+ * else chooses the same name concurrently.  This is fairly unlikely to be
+ * a problem in practice, especially if one is holding a share update
+ * exclusive lock on the relation identified by name1.  However, if choosing
+ * multiple names within a single command, you'd better create the new object
+ * and do CommandCounterIncrement before choosing the next one!
+ */
+static char *
+ChooseExtendedStatisticName(const char *name1, const char *name2,
+							const char *label, Oid namespaceid)
+{
+	int			pass = 0;
+	char	   *stxname = NULL;
+	char		modlabel[NAMEDATALEN];
+
+	/* try the unmodified label first */
+	strlcpy(modlabel, label, sizeof(modlabel));
+
+	for (;;)
+	{
+		Oid			existingstats;
+
+		stxname = makeObjectName(name1, name2, modlabel);
+
+		existingstats = GetSysCacheOid2(STATEXTNAMENSP, Anum_pg_statistic_ext_oid,
+										PointerGetDatum(stxname),
+										ObjectIdGetDatum(namespaceid));
+		if (!OidIsValid(existingstats))
+			break;
+
+		/* found a conflict, so try a new name component */
+		pfree(stxname);
+		snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
+	}
+
+	return stxname;
+}
+
+/*
+ * Generate "name2" for a new statistics object given the list of column
+ * names for it.  This will be passed to ChooseExtendedStatisticName along
+ * with the parent table name and a suitable label.
+ *
+ * We know that less than NAMEDATALEN characters will actually be used,
+ * so we can truncate the result once we've generated that many.
+ *
+ * XXX see also ChooseForeignKeyConstraintNameAddition and
+ * ChooseIndexNameAddition.
+ */
+static char *
+ChooseExtendedStatisticNameAddition(List *exprs)
+{
+	char		buf[NAMEDATALEN * 2];
+	int			buflen = 0;
+	ListCell   *lc;
+
+	buf[0] = '\0';
+	foreach(lc, exprs)
+	{
+		StatsElem  *selem = (StatsElem *) lfirst(lc);
+		const char *name;
+
+		/* It should be one of these, but just skip if it happens not to be */
+		if (!IsA(selem, StatsElem))
+			continue;
+
+		name = selem->name;
+
+		if (buflen > 0)
+			buf[buflen++] = '_';	/* insert _ between names */
+
+		/*
+		 * We use fixed 'expr' for expressions, which have empty column names.
+		 * For indexes this is handled in ChooseIndexColumnNames, but we have
+		 * no such function for stats and it does not seem worth adding. If a
+		 * better name is needed, the user can specify it explicitly.
+		 */
+		if (!name)
+			name = "expr";
+
+		/*
+		 * At this point we have buflen <= NAMEDATALEN.  name should be less
+		 * than NAMEDATALEN already, but use strlcpy for paranoia.
+		 */
+		strlcpy(buf + buflen, name, NAMEDATALEN);
+		buflen += strlen(buf + buflen);
+		if (buflen >= NAMEDATALEN)
+			break;
+	}
+	return pstrdup(buf);
+}
+
+/*
+ * StatisticsGetRelation: given a statistics object's OID, get the OID of
+ * the relation it is defined on.  Uses the system cache.
+ */
+Oid
+StatisticsGetRelation(Oid statId, bool missing_ok)
+{
+	HeapTuple	tuple;
+	Form_pg_statistic_ext stx;
+	Oid			result;
+
+	tuple = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statId));
+	if (!HeapTupleIsValid(tuple))
+	{
+		if (missing_ok)
+			return InvalidOid;
+		elog(ERROR, "cache lookup failed for statistics object %u", statId);
+	}
+	stx = (Form_pg_statistic_ext) GETSTRUCT(tuple);
+	Assert(stx->oid == statId);
+
+	result = stx->stxrelid;
+	ReleaseSysCache(tuple);
+	return result;
+}
diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
new file mode 100644
index 0000000..334717c
--- /dev/null
+++ b/src/backend/commands/subscriptioncmds.c
@@ -0,0 +1,1966 @@
+/*-------------------------------------------------------------------------
+ *
+ * subscriptioncmds.c
+ *		subscription catalog manipulation functions
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *		src/backend/commands/subscriptioncmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/objectaddress.h"
+#include "catalog/pg_subscription.h"
+#include "catalog/pg_subscription_rel.h"
+#include "catalog/pg_type.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/subscriptioncmds.h"
+#include "executor/executor.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "pgstat.h"
+#include "replication/logicallauncher.h"
+#include "replication/origin.h"
+#include "replication/slot.h"
+#include "replication/walreceiver.h"
+#include "replication/walsender.h"
+#include "replication/worker_internal.h"
+#include "storage/lmgr.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/guc.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/pg_lsn.h"
+#include "utils/syscache.h"
+
+/*
+ * Options that can be specified by the user in CREATE/ALTER SUBSCRIPTION
+ * command.
+ */
+#define SUBOPT_CONNECT				0x00000001
+#define SUBOPT_ENABLED				0x00000002
+#define SUBOPT_CREATE_SLOT			0x00000004
+#define SUBOPT_SLOT_NAME			0x00000008
+#define SUBOPT_COPY_DATA			0x00000010
+#define SUBOPT_SYNCHRONOUS_COMMIT	0x00000020
+#define SUBOPT_REFRESH				0x00000040
+#define SUBOPT_BINARY				0x00000080
+#define SUBOPT_STREAMING			0x00000100
+#define SUBOPT_TWOPHASE_COMMIT		0x00000200
+#define SUBOPT_DISABLE_ON_ERR		0x00000400
+#define SUBOPT_LSN					0x00000800
+
+/* check if the 'val' has 'bits' set */
+#define IsSet(val, bits)  (((val) & (bits)) == (bits))
+
+/*
+ * Structure to hold a bitmap representing the user-provided CREATE/ALTER
+ * SUBSCRIPTION command options and the parsed/default values of each of them.
+ */
+typedef struct SubOpts
+{
+	bits32		specified_opts;
+	char	   *slot_name;
+	char	   *synchronous_commit;
+	bool		connect;
+	bool		enabled;
+	bool		create_slot;
+	bool		copy_data;
+	bool		refresh;
+	bool		binary;
+	bool		streaming;
+	bool		twophase;
+	bool		disableonerr;
+	XLogRecPtr	lsn;
+} SubOpts;
+
+static List *fetch_table_list(WalReceiverConn *wrconn, List *publications);
+static void check_duplicates_in_publist(List *publist, Datum *datums);
+static List *merge_publications(List *oldpublist, List *newpublist, bool addpub, const char *subname);
+static void ReportSlotConnectionError(List *rstates, Oid subid, char *slotname, char *err);
+
+
+/*
+ * Common option parsing function for CREATE and ALTER SUBSCRIPTION commands.
+ *
+ * Since not all options can be specified in both commands, this function
+ * will report an error if mutually exclusive options are specified.
+ */
+static void
+parse_subscription_options(ParseState *pstate, List *stmt_options,
+						   bits32 supported_opts, SubOpts *opts)
+{
+	ListCell   *lc;
+
+	/* Start out with cleared opts. */
+	memset(opts, 0, sizeof(SubOpts));
+
+	/* caller must expect some option */
+	Assert(supported_opts != 0);
+
+	/* If connect option is supported, these others also need to be. */
+	Assert(!IsSet(supported_opts, SUBOPT_CONNECT) ||
+		   IsSet(supported_opts, SUBOPT_ENABLED | SUBOPT_CREATE_SLOT |
+				 SUBOPT_COPY_DATA));
+
+	/* Set default values for the boolean supported options. */
+	if (IsSet(supported_opts, SUBOPT_CONNECT))
+		opts->connect = true;
+	if (IsSet(supported_opts, SUBOPT_ENABLED))
+		opts->enabled = true;
+	if (IsSet(supported_opts, SUBOPT_CREATE_SLOT))
+		opts->create_slot = true;
+	if (IsSet(supported_opts, SUBOPT_COPY_DATA))
+		opts->copy_data = true;
+	if (IsSet(supported_opts, SUBOPT_REFRESH))
+		opts->refresh = true;
+	if (IsSet(supported_opts, SUBOPT_BINARY))
+		opts->binary = false;
+	if (IsSet(supported_opts, SUBOPT_STREAMING))
+		opts->streaming = false;
+	if (IsSet(supported_opts, SUBOPT_TWOPHASE_COMMIT))
+		opts->twophase = false;
+	if (IsSet(supported_opts, SUBOPT_DISABLE_ON_ERR))
+		opts->disableonerr = false;
+
+	/* Parse options */
+	foreach(lc, stmt_options)
+	{
+		DefElem    *defel = (DefElem *) lfirst(lc);
+
+		if (IsSet(supported_opts, SUBOPT_CONNECT) &&
+			strcmp(defel->defname, "connect") == 0)
+		{
+			if (IsSet(opts->specified_opts, SUBOPT_CONNECT))
+				errorConflictingDefElem(defel, pstate);
+
+			opts->specified_opts |= SUBOPT_CONNECT;
+			opts->connect = defGetBoolean(defel);
+		}
+		else if (IsSet(supported_opts, SUBOPT_ENABLED) &&
+				 strcmp(defel->defname, "enabled") == 0)
+		{
+			if (IsSet(opts->specified_opts, SUBOPT_ENABLED))
+				errorConflictingDefElem(defel, pstate);
+
+			opts->specified_opts |= SUBOPT_ENABLED;
+			opts->enabled = defGetBoolean(defel);
+		}
+		else if (IsSet(supported_opts, SUBOPT_CREATE_SLOT) &&
+				 strcmp(defel->defname, "create_slot") == 0)
+		{
+			if (IsSet(opts->specified_opts, SUBOPT_CREATE_SLOT))
+				errorConflictingDefElem(defel, pstate);
+
+			opts->specified_opts |= SUBOPT_CREATE_SLOT;
+			opts->create_slot = defGetBoolean(defel);
+		}
+		else if (IsSet(supported_opts, SUBOPT_SLOT_NAME) &&
+				 strcmp(defel->defname, "slot_name") == 0)
+		{
+			if (IsSet(opts->specified_opts, SUBOPT_SLOT_NAME))
+				errorConflictingDefElem(defel, pstate);
+
+			opts->specified_opts |= SUBOPT_SLOT_NAME;
+			opts->slot_name = defGetString(defel);
+
+			/* Setting slot_name = NONE is treated as no slot name. */
+			if (strcmp(opts->slot_name, "none") == 0)
+				opts->slot_name = NULL;
+			else
+				ReplicationSlotValidateName(opts->slot_name, ERROR);
+		}
+		else if (IsSet(supported_opts, SUBOPT_COPY_DATA) &&
+				 strcmp(defel->defname, "copy_data") == 0)
+		{
+			if (IsSet(opts->specified_opts, SUBOPT_COPY_DATA))
+				errorConflictingDefElem(defel, pstate);
+
+			opts->specified_opts |= SUBOPT_COPY_DATA;
+			opts->copy_data = defGetBoolean(defel);
+		}
+		else if (IsSet(supported_opts, SUBOPT_SYNCHRONOUS_COMMIT) &&
+				 strcmp(defel->defname, "synchronous_commit") == 0)
+		{
+			if (IsSet(opts->specified_opts, SUBOPT_SYNCHRONOUS_COMMIT))
+				errorConflictingDefElem(defel, pstate);
+
+			opts->specified_opts |= SUBOPT_SYNCHRONOUS_COMMIT;
+			opts->synchronous_commit = defGetString(defel);
+
+			/* Test if the given value is valid for synchronous_commit GUC. */
+			(void) set_config_option("synchronous_commit", opts->synchronous_commit,
+									 PGC_BACKEND, PGC_S_TEST, GUC_ACTION_SET,
+									 false, 0, false);
+		}
+		else if (IsSet(supported_opts, SUBOPT_REFRESH) &&
+				 strcmp(defel->defname, "refresh") == 0)
+		{
+			if (IsSet(opts->specified_opts, SUBOPT_REFRESH))
+				errorConflictingDefElem(defel, pstate);
+
+			opts->specified_opts |= SUBOPT_REFRESH;
+			opts->refresh = defGetBoolean(defel);
+		}
+		else if (IsSet(supported_opts, SUBOPT_BINARY) &&
+				 strcmp(defel->defname, "binary") == 0)
+		{
+			if (IsSet(opts->specified_opts, SUBOPT_BINARY))
+				errorConflictingDefElem(defel, pstate);
+
+			opts->specified_opts |= SUBOPT_BINARY;
+			opts->binary = defGetBoolean(defel);
+		}
+		else if (IsSet(supported_opts, SUBOPT_STREAMING) &&
+				 strcmp(defel->defname, "streaming") == 0)
+		{
+			if (IsSet(opts->specified_opts, SUBOPT_STREAMING))
+				errorConflictingDefElem(defel, pstate);
+
+			opts->specified_opts |= SUBOPT_STREAMING;
+			opts->streaming = defGetBoolean(defel);
+		}
+		else if (strcmp(defel->defname, "two_phase") == 0)
+		{
+			/*
+			 * Do not allow toggling of two_phase option. Doing so could cause
+			 * missing of transactions and lead to an inconsistent replica.
+			 * See comments atop worker.c
+			 *
+			 * Note: Unsupported twophase indicates that this call originated
+			 * from AlterSubscription.
+			 */
+			if (!IsSet(supported_opts, SUBOPT_TWOPHASE_COMMIT))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("unrecognized subscription parameter: \"%s\"", defel->defname)));
+
+			if (IsSet(opts->specified_opts, SUBOPT_TWOPHASE_COMMIT))
+				errorConflictingDefElem(defel, pstate);
+
+			opts->specified_opts |= SUBOPT_TWOPHASE_COMMIT;
+			opts->twophase = defGetBoolean(defel);
+		}
+		else if (IsSet(supported_opts, SUBOPT_DISABLE_ON_ERR) &&
+				 strcmp(defel->defname, "disable_on_error") == 0)
+		{
+			if (IsSet(opts->specified_opts, SUBOPT_DISABLE_ON_ERR))
+				errorConflictingDefElem(defel, pstate);
+
+			opts->specified_opts |= SUBOPT_DISABLE_ON_ERR;
+			opts->disableonerr = defGetBoolean(defel);
+		}
+		else if (IsSet(supported_opts, SUBOPT_LSN) &&
+				 strcmp(defel->defname, "lsn") == 0)
+		{
+			char	   *lsn_str = defGetString(defel);
+			XLogRecPtr	lsn;
+
+			if (IsSet(opts->specified_opts, SUBOPT_LSN))
+				errorConflictingDefElem(defel, pstate);
+
+			/* Setting lsn = NONE is treated as resetting LSN */
+			if (strcmp(lsn_str, "none") == 0)
+				lsn = InvalidXLogRecPtr;
+			else
+			{
+				/* Parse the argument as LSN */
+				lsn = DatumGetLSN(DirectFunctionCall1(pg_lsn_in,
+													  CStringGetDatum(lsn_str)));
+
+				if (XLogRecPtrIsInvalid(lsn))
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+							 errmsg("invalid WAL location (LSN): %s", lsn_str)));
+			}
+
+			opts->specified_opts |= SUBOPT_LSN;
+			opts->lsn = lsn;
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized subscription parameter: \"%s\"", defel->defname)));
+	}
+
+	/*
+	 * We've been explicitly asked to not connect, that requires some
+	 * additional processing.
+	 */
+	if (!opts->connect && IsSet(supported_opts, SUBOPT_CONNECT))
+	{
+		/* Check for incompatible options from the user. */
+		if (opts->enabled &&
+			IsSet(opts->specified_opts, SUBOPT_ENABLED))
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+			/*- translator: both %s are strings of the form "option = value" */
+					 errmsg("%s and %s are mutually exclusive options",
+							"connect = false", "enabled = true")));
+
+		if (opts->create_slot &&
+			IsSet(opts->specified_opts, SUBOPT_CREATE_SLOT))
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("%s and %s are mutually exclusive options",
+							"connect = false", "create_slot = true")));
+
+		if (opts->copy_data &&
+			IsSet(opts->specified_opts, SUBOPT_COPY_DATA))
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("%s and %s are mutually exclusive options",
+							"connect = false", "copy_data = true")));
+
+		/* Change the defaults of other options. */
+		opts->enabled = false;
+		opts->create_slot = false;
+		opts->copy_data = false;
+	}
+
+	/*
+	 * Do additional checking for disallowed combination when slot_name = NONE
+	 * was used.
+	 */
+	if (!opts->slot_name &&
+		IsSet(opts->specified_opts, SUBOPT_SLOT_NAME))
+	{
+		if (opts->enabled)
+		{
+			if (IsSet(opts->specified_opts, SUBOPT_ENABLED))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+				/*- translator: both %s are strings of the form "option = value" */
+						 errmsg("%s and %s are mutually exclusive options",
+								"slot_name = NONE", "enabled = true")));
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+				/*- translator: both %s are strings of the form "option = value" */
+						 errmsg("subscription with %s must also set %s",
+								"slot_name = NONE", "enabled = false")));
+		}
+
+		if (opts->create_slot)
+		{
+			if (IsSet(opts->specified_opts, SUBOPT_CREATE_SLOT))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+				/*- translator: both %s are strings of the form "option = value" */
+						 errmsg("%s and %s are mutually exclusive options",
+								"slot_name = NONE", "create_slot = true")));
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+				/*- translator: both %s are strings of the form "option = value" */
+						 errmsg("subscription with %s must also set %s",
+								"slot_name = NONE", "create_slot = false")));
+		}
+	}
+}
+
+/*
+ * Add publication names from the list to a string.
+ */
+static void
+get_publications_str(List *publications, StringInfo dest, bool quote_literal)
+{
+	ListCell   *lc;
+	bool		first = true;
+
+	Assert(list_length(publications) > 0);
+
+	foreach(lc, publications)
+	{
+		char	   *pubname = strVal(lfirst(lc));
+
+		if (first)
+			first = false;
+		else
+			appendStringInfoString(dest, ", ");
+
+		if (quote_literal)
+			appendStringInfoString(dest, quote_literal_cstr(pubname));
+		else
+		{
+			appendStringInfoChar(dest, '"');
+			appendStringInfoString(dest, pubname);
+			appendStringInfoChar(dest, '"');
+		}
+	}
+}
+
+/*
+ * Check that the specified publications are present on the publisher.
+ */
+static void
+check_publications(WalReceiverConn *wrconn, List *publications)
+{
+	WalRcvExecResult *res;
+	StringInfo	cmd;
+	TupleTableSlot *slot;
+	List	   *publicationsCopy = NIL;
+	Oid			tableRow[1] = {TEXTOID};
+
+	cmd = makeStringInfo();
+	appendStringInfoString(cmd, "SELECT t.pubname FROM\n"
+						   " pg_catalog.pg_publication t WHERE\n"
+						   " t.pubname IN (");
+	get_publications_str(publications, cmd, true);
+	appendStringInfoChar(cmd, ')');
+
+	res = walrcv_exec(wrconn, cmd->data, 1, tableRow);
+	pfree(cmd->data);
+	pfree(cmd);
+
+	if (res->status != WALRCV_OK_TUPLES)
+		ereport(ERROR,
+				errmsg("could not receive list of publications from the publisher: %s",
+					   res->err));
+
+	publicationsCopy = list_copy(publications);
+
+	/* Process publication(s). */
+	slot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple);
+	while (tuplestore_gettupleslot(res->tuplestore, true, false, slot))
+	{
+		char	   *pubname;
+		bool		isnull;
+
+		pubname = TextDatumGetCString(slot_getattr(slot, 1, &isnull));
+		Assert(!isnull);
+
+		/* Delete the publication present in publisher from the list. */
+		publicationsCopy = list_delete(publicationsCopy, makeString(pubname));
+		ExecClearTuple(slot);
+	}
+
+	ExecDropSingleTupleTableSlot(slot);
+
+	walrcv_clear_result(res);
+
+	if (list_length(publicationsCopy))
+	{
+		/* Prepare the list of non-existent publication(s) for error message. */
+		StringInfo	pubnames = makeStringInfo();
+
+		get_publications_str(publicationsCopy, pubnames, false);
+		ereport(WARNING,
+				errcode(ERRCODE_UNDEFINED_OBJECT),
+				errmsg_plural("publication %s does not exist on the publisher",
+							  "publications %s do not exist on the publisher",
+							  list_length(publicationsCopy),
+							  pubnames->data));
+	}
+}
+
+/*
+ * Auxiliary function to build a text array out of a list of String nodes.
+ */
+static Datum
+publicationListToArray(List *publist)
+{
+	ArrayType  *arr;
+	Datum	   *datums;
+	MemoryContext memcxt;
+	MemoryContext oldcxt;
+
+	/* Create memory context for temporary allocations. */
+	memcxt = AllocSetContextCreate(CurrentMemoryContext,
+								   "publicationListToArray to array",
+								   ALLOCSET_DEFAULT_SIZES);
+	oldcxt = MemoryContextSwitchTo(memcxt);
+
+	datums = (Datum *) palloc(sizeof(Datum) * list_length(publist));
+
+	check_duplicates_in_publist(publist, datums);
+
+	MemoryContextSwitchTo(oldcxt);
+
+	arr = construct_array(datums, list_length(publist),
+						  TEXTOID, -1, false, TYPALIGN_INT);
+
+	MemoryContextDelete(memcxt);
+
+	return PointerGetDatum(arr);
+}
+
+/*
+ * Create new subscription.
+ */
+ObjectAddress
+CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt,
+				   bool isTopLevel)
+{
+	Relation	rel;
+	ObjectAddress myself;
+	Oid			subid;
+	bool		nulls[Natts_pg_subscription];
+	Datum		values[Natts_pg_subscription];
+	Oid			owner = GetUserId();
+	HeapTuple	tup;
+	char	   *conninfo;
+	char		originname[NAMEDATALEN];
+	List	   *publications;
+	bits32		supported_opts;
+	SubOpts		opts = {0};
+
+	/*
+	 * Parse and check options.
+	 *
+	 * Connection and publication should not be specified here.
+	 */
+	supported_opts = (SUBOPT_CONNECT | SUBOPT_ENABLED | SUBOPT_CREATE_SLOT |
+					  SUBOPT_SLOT_NAME | SUBOPT_COPY_DATA |
+					  SUBOPT_SYNCHRONOUS_COMMIT | SUBOPT_BINARY |
+					  SUBOPT_STREAMING | SUBOPT_TWOPHASE_COMMIT |
+					  SUBOPT_DISABLE_ON_ERR);
+	parse_subscription_options(pstate, stmt->options, supported_opts, &opts);
+
+	/*
+	 * Since creating a replication slot is not transactional, rolling back
+	 * the transaction leaves the created replication slot.  So we cannot run
+	 * CREATE SUBSCRIPTION inside a transaction block if creating a
+	 * replication slot.
+	 */
+	if (opts.create_slot)
+		PreventInTransactionBlock(isTopLevel, "CREATE SUBSCRIPTION ... WITH (create_slot = true)");
+
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be superuser to create subscriptions")));
+
+	/*
+	 * If built with appropriate switch, whine when regression-testing
+	 * conventions for subscription names are violated.
+	 */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+	if (strncmp(stmt->subname, "regress_", 8) != 0)
+		elog(WARNING, "subscriptions created by regression test cases should have names starting with \"regress_\"");
+#endif
+
+	rel = table_open(SubscriptionRelationId, RowExclusiveLock);
+
+	/* Check if name is used */
+	subid = GetSysCacheOid2(SUBSCRIPTIONNAME, Anum_pg_subscription_oid,
+							MyDatabaseId, CStringGetDatum(stmt->subname));
+	if (OidIsValid(subid))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("subscription \"%s\" already exists",
+						stmt->subname)));
+	}
+
+	if (!IsSet(opts.specified_opts, SUBOPT_SLOT_NAME) &&
+		opts.slot_name == NULL)
+		opts.slot_name = stmt->subname;
+
+	/* The default for synchronous_commit of subscriptions is off. */
+	if (opts.synchronous_commit == NULL)
+		opts.synchronous_commit = "off";
+
+	conninfo = stmt->conninfo;
+	publications = stmt->publication;
+
+	/* Load the library providing us libpq calls. */
+	load_file("libpqwalreceiver", false);
+
+	/* Check the connection info string. */
+	walrcv_check_conninfo(conninfo);
+
+	/* Everything ok, form a new tuple. */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	subid = GetNewOidWithIndex(rel, SubscriptionObjectIndexId,
+							   Anum_pg_subscription_oid);
+	values[Anum_pg_subscription_oid - 1] = ObjectIdGetDatum(subid);
+	values[Anum_pg_subscription_subdbid - 1] = ObjectIdGetDatum(MyDatabaseId);
+	values[Anum_pg_subscription_subskiplsn - 1] = LSNGetDatum(InvalidXLogRecPtr);
+	values[Anum_pg_subscription_subname - 1] =
+		DirectFunctionCall1(namein, CStringGetDatum(stmt->subname));
+	values[Anum_pg_subscription_subowner - 1] = ObjectIdGetDatum(owner);
+	values[Anum_pg_subscription_subenabled - 1] = BoolGetDatum(opts.enabled);
+	values[Anum_pg_subscription_subbinary - 1] = BoolGetDatum(opts.binary);
+	values[Anum_pg_subscription_substream - 1] = BoolGetDatum(opts.streaming);
+	values[Anum_pg_subscription_subtwophasestate - 1] =
+		CharGetDatum(opts.twophase ?
+					 LOGICALREP_TWOPHASE_STATE_PENDING :
+					 LOGICALREP_TWOPHASE_STATE_DISABLED);
+	values[Anum_pg_subscription_subdisableonerr - 1] = BoolGetDatum(opts.disableonerr);
+	values[Anum_pg_subscription_subconninfo - 1] =
+		CStringGetTextDatum(conninfo);
+	if (opts.slot_name)
+		values[Anum_pg_subscription_subslotname - 1] =
+			DirectFunctionCall1(namein, CStringGetDatum(opts.slot_name));
+	else
+		nulls[Anum_pg_subscription_subslotname - 1] = true;
+	values[Anum_pg_subscription_subsynccommit - 1] =
+		CStringGetTextDatum(opts.synchronous_commit);
+	values[Anum_pg_subscription_subpublications - 1] =
+		publicationListToArray(publications);
+
+	tup = heap_form_tuple(RelationGetDescr(rel), values, nulls);
+
+	/* Insert tuple into catalog. */
+	CatalogTupleInsert(rel, tup);
+	heap_freetuple(tup);
+
+	recordDependencyOnOwner(SubscriptionRelationId, subid, owner);
+
+	snprintf(originname, sizeof(originname), "pg_%u", subid);
+	replorigin_create(originname);
+
+	/*
+	 * Connect to remote side to execute requested commands and fetch table
+	 * info.
+	 */
+	if (opts.connect)
+	{
+		char	   *err;
+		WalReceiverConn *wrconn;
+		List	   *tables;
+		ListCell   *lc;
+		char		table_state;
+
+		/* Try to connect to the publisher. */
+		wrconn = walrcv_connect(conninfo, true, stmt->subname, &err);
+		if (!wrconn)
+			ereport(ERROR,
+					(errcode(ERRCODE_CONNECTION_FAILURE),
+					 errmsg("could not connect to the publisher: %s", err)));
+
+		PG_TRY();
+		{
+			check_publications(wrconn, publications);
+
+			/*
+			 * Set sync state based on if we were asked to do data copy or
+			 * not.
+			 */
+			table_state = opts.copy_data ? SUBREL_STATE_INIT : SUBREL_STATE_READY;
+
+			/*
+			 * Get the table list from publisher and build local table status
+			 * info.
+			 */
+			tables = fetch_table_list(wrconn, publications);
+			foreach(lc, tables)
+			{
+				RangeVar   *rv = (RangeVar *) lfirst(lc);
+				Oid			relid;
+
+				relid = RangeVarGetRelid(rv, AccessShareLock, false);
+
+				/* Check for supported relkind. */
+				CheckSubscriptionRelkind(get_rel_relkind(relid),
+										 rv->schemaname, rv->relname);
+
+				AddSubscriptionRelState(subid, relid, table_state,
+										InvalidXLogRecPtr);
+			}
+
+			/*
+			 * If requested, create permanent slot for the subscription. We
+			 * won't use the initial snapshot for anything, so no need to
+			 * export it.
+			 */
+			if (opts.create_slot)
+			{
+				bool		twophase_enabled = false;
+
+				Assert(opts.slot_name);
+
+				/*
+				 * Even if two_phase is set, don't create the slot with
+				 * two-phase enabled. Will enable it once all the tables are
+				 * synced and ready. This avoids race-conditions like prepared
+				 * transactions being skipped due to changes not being applied
+				 * due to checks in should_apply_changes_for_rel() when
+				 * tablesync for the corresponding tables are in progress. See
+				 * comments atop worker.c.
+				 *
+				 * Note that if tables were specified but copy_data is false
+				 * then it is safe to enable two_phase up-front because those
+				 * tables are already initially in READY state. When the
+				 * subscription has no tables, we leave the twophase state as
+				 * PENDING, to allow ALTER SUBSCRIPTION ... REFRESH
+				 * PUBLICATION to work.
+				 */
+				if (opts.twophase && !opts.copy_data && tables != NIL)
+					twophase_enabled = true;
+
+				walrcv_create_slot(wrconn, opts.slot_name, false, twophase_enabled,
+								   CRS_NOEXPORT_SNAPSHOT, NULL);
+
+				if (twophase_enabled)
+					UpdateTwoPhaseState(subid, LOGICALREP_TWOPHASE_STATE_ENABLED);
+
+				ereport(NOTICE,
+						(errmsg("created replication slot \"%s\" on publisher",
+								opts.slot_name)));
+			}
+		}
+		PG_FINALLY();
+		{
+			walrcv_disconnect(wrconn);
+		}
+		PG_END_TRY();
+	}
+	else
+		ereport(WARNING,
+		/* translator: %s is an SQL ALTER statement */
+				(errmsg("tables were not subscribed, you will have to run %s to subscribe the tables",
+						"ALTER SUBSCRIPTION ... REFRESH PUBLICATION")));
+
+	table_close(rel, RowExclusiveLock);
+
+	pgstat_create_subscription(subid);
+
+	if (opts.enabled)
+		ApplyLauncherWakeupAtCommit();
+
+	ObjectAddressSet(myself, SubscriptionRelationId, subid);
+
+	InvokeObjectPostCreateHook(SubscriptionRelationId, subid, 0);
+
+	return myself;
+}
+
+static void
+AlterSubscription_refresh(Subscription *sub, bool copy_data,
+						  List *validate_publications)
+{
+	char	   *err;
+	List	   *pubrel_names;
+	List	   *subrel_states;
+	Oid		   *subrel_local_oids;
+	Oid		   *pubrel_local_oids;
+	ListCell   *lc;
+	int			off;
+	int			remove_rel_len;
+	Relation	rel = NULL;
+	typedef struct SubRemoveRels
+	{
+		Oid			relid;
+		char		state;
+	} SubRemoveRels;
+	SubRemoveRels *sub_remove_rels;
+	WalReceiverConn *wrconn;
+
+	/* Load the library providing us libpq calls. */
+	load_file("libpqwalreceiver", false);
+
+	/* Try to connect to the publisher. */
+	wrconn = walrcv_connect(sub->conninfo, true, sub->name, &err);
+	if (!wrconn)
+		ereport(ERROR,
+				(errcode(ERRCODE_CONNECTION_FAILURE),
+				 errmsg("could not connect to the publisher: %s", err)));
+
+	PG_TRY();
+	{
+		if (validate_publications)
+			check_publications(wrconn, validate_publications);
+
+		/* Get the table list from publisher. */
+		pubrel_names = fetch_table_list(wrconn, sub->publications);
+
+		/* Get local table list. */
+		subrel_states = GetSubscriptionRelations(sub->oid);
+
+		/*
+		 * Build qsorted array of local table oids for faster lookup. This can
+		 * potentially contain all tables in the database so speed of lookup
+		 * is important.
+		 */
+		subrel_local_oids = palloc(list_length(subrel_states) * sizeof(Oid));
+		off = 0;
+		foreach(lc, subrel_states)
+		{
+			SubscriptionRelState *relstate = (SubscriptionRelState *) lfirst(lc);
+
+			subrel_local_oids[off++] = relstate->relid;
+		}
+		qsort(subrel_local_oids, list_length(subrel_states),
+			  sizeof(Oid), oid_cmp);
+
+		/*
+		 * Rels that we want to remove from subscription and drop any slots
+		 * and origins corresponding to them.
+		 */
+		sub_remove_rels = palloc(list_length(subrel_states) * sizeof(SubRemoveRels));
+
+		/*
+		 * Walk over the remote tables and try to match them to locally known
+		 * tables. If the table is not known locally create a new state for
+		 * it.
+		 *
+		 * Also builds array of local oids of remote tables for the next step.
+		 */
+		off = 0;
+		pubrel_local_oids = palloc(list_length(pubrel_names) * sizeof(Oid));
+
+		foreach(lc, pubrel_names)
+		{
+			RangeVar   *rv = (RangeVar *) lfirst(lc);
+			Oid			relid;
+
+			relid = RangeVarGetRelid(rv, AccessShareLock, false);
+
+			/* Check for supported relkind. */
+			CheckSubscriptionRelkind(get_rel_relkind(relid),
+									 rv->schemaname, rv->relname);
+
+			pubrel_local_oids[off++] = relid;
+
+			if (!bsearch(&relid, subrel_local_oids,
+						 list_length(subrel_states), sizeof(Oid), oid_cmp))
+			{
+				AddSubscriptionRelState(sub->oid, relid,
+										copy_data ? SUBREL_STATE_INIT : SUBREL_STATE_READY,
+										InvalidXLogRecPtr);
+				ereport(DEBUG1,
+						(errmsg_internal("table \"%s.%s\" added to subscription \"%s\"",
+										 rv->schemaname, rv->relname, sub->name)));
+			}
+		}
+
+		/*
+		 * Next remove state for tables we should not care about anymore using
+		 * the data we collected above
+		 */
+		qsort(pubrel_local_oids, list_length(pubrel_names),
+			  sizeof(Oid), oid_cmp);
+
+		remove_rel_len = 0;
+		for (off = 0; off < list_length(subrel_states); off++)
+		{
+			Oid			relid = subrel_local_oids[off];
+
+			if (!bsearch(&relid, pubrel_local_oids,
+						 list_length(pubrel_names), sizeof(Oid), oid_cmp))
+			{
+				char		state;
+				XLogRecPtr	statelsn;
+
+				/*
+				 * Lock pg_subscription_rel with AccessExclusiveLock to
+				 * prevent any race conditions with the apply worker
+				 * re-launching workers at the same time this code is trying
+				 * to remove those tables.
+				 *
+				 * Even if new worker for this particular rel is restarted it
+				 * won't be able to make any progress as we hold exclusive
+				 * lock on subscription_rel till the transaction end. It will
+				 * simply exit as there is no corresponding rel entry.
+				 *
+				 * This locking also ensures that the state of rels won't
+				 * change till we are done with this refresh operation.
+				 */
+				if (!rel)
+					rel = table_open(SubscriptionRelRelationId, AccessExclusiveLock);
+
+				/* Last known rel state. */
+				state = GetSubscriptionRelState(sub->oid, relid, &statelsn);
+
+				sub_remove_rels[remove_rel_len].relid = relid;
+				sub_remove_rels[remove_rel_len++].state = state;
+
+				RemoveSubscriptionRel(sub->oid, relid);
+
+				logicalrep_worker_stop(sub->oid, relid);
+
+				/*
+				 * For READY state, we would have already dropped the
+				 * tablesync origin.
+				 */
+				if (state != SUBREL_STATE_READY)
+				{
+					char		originname[NAMEDATALEN];
+
+					/*
+					 * Drop the tablesync's origin tracking if exists.
+					 *
+					 * It is possible that the origin is not yet created for
+					 * tablesync worker, this can happen for the states before
+					 * SUBREL_STATE_FINISHEDCOPY. The apply worker can also
+					 * concurrently try to drop the origin and by this time
+					 * the origin might be already removed. For these reasons,
+					 * passing missing_ok = true.
+					 */
+					ReplicationOriginNameForTablesync(sub->oid, relid, originname,
+													  sizeof(originname));
+					replorigin_drop_by_name(originname, true, false);
+				}
+
+				ereport(DEBUG1,
+						(errmsg_internal("table \"%s.%s\" removed from subscription \"%s\"",
+										 get_namespace_name(get_rel_namespace(relid)),
+										 get_rel_name(relid),
+										 sub->name)));
+			}
+		}
+
+		/*
+		 * Drop the tablesync slots associated with removed tables. This has
+		 * to be at the end because otherwise if there is an error while doing
+		 * the database operations we won't be able to rollback dropped slots.
+		 */
+		for (off = 0; off < remove_rel_len; off++)
+		{
+			if (sub_remove_rels[off].state != SUBREL_STATE_READY &&
+				sub_remove_rels[off].state != SUBREL_STATE_SYNCDONE)
+			{
+				char		syncslotname[NAMEDATALEN] = {0};
+
+				/*
+				 * For READY/SYNCDONE states we know the tablesync slot has
+				 * already been dropped by the tablesync worker.
+				 *
+				 * For other states, there is no certainty, maybe the slot
+				 * does not exist yet. Also, if we fail after removing some of
+				 * the slots, next time, it will again try to drop already
+				 * dropped slots and fail. For these reasons, we allow
+				 * missing_ok = true for the drop.
+				 */
+				ReplicationSlotNameForTablesync(sub->oid, sub_remove_rels[off].relid,
+												syncslotname, sizeof(syncslotname));
+				ReplicationSlotDropAtPubNode(wrconn, syncslotname, true);
+			}
+		}
+	}
+	PG_FINALLY();
+	{
+		walrcv_disconnect(wrconn);
+	}
+	PG_END_TRY();
+
+	if (rel)
+		table_close(rel, NoLock);
+}
+
+/*
+ * Alter the existing subscription.
+ */
+ObjectAddress
+AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt,
+				  bool isTopLevel)
+{
+	Relation	rel;
+	ObjectAddress myself;
+	bool		nulls[Natts_pg_subscription];
+	bool		replaces[Natts_pg_subscription];
+	Datum		values[Natts_pg_subscription];
+	HeapTuple	tup;
+	Oid			subid;
+	bool		update_tuple = false;
+	Subscription *sub;
+	Form_pg_subscription form;
+	bits32		supported_opts;
+	SubOpts		opts = {0};
+
+	rel = table_open(SubscriptionRelationId, RowExclusiveLock);
+
+	/* Fetch the existing tuple. */
+	tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, MyDatabaseId,
+							  CStringGetDatum(stmt->subname));
+
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("subscription \"%s\" does not exist",
+						stmt->subname)));
+
+	form = (Form_pg_subscription) GETSTRUCT(tup);
+	subid = form->oid;
+
+	/* must be owner */
+	if (!pg_subscription_ownercheck(subid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SUBSCRIPTION,
+					   stmt->subname);
+
+	sub = GetSubscription(subid, false);
+
+	/* Lock the subscription so nobody else can do anything with it. */
+	LockSharedObject(SubscriptionRelationId, subid, 0, AccessExclusiveLock);
+
+	/* Form a new tuple. */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+	memset(replaces, false, sizeof(replaces));
+
+	switch (stmt->kind)
+	{
+		case ALTER_SUBSCRIPTION_OPTIONS:
+			{
+				supported_opts = (SUBOPT_SLOT_NAME |
+								  SUBOPT_SYNCHRONOUS_COMMIT | SUBOPT_BINARY |
+								  SUBOPT_STREAMING | SUBOPT_DISABLE_ON_ERR);
+
+				parse_subscription_options(pstate, stmt->options,
+										   supported_opts, &opts);
+
+				if (IsSet(opts.specified_opts, SUBOPT_SLOT_NAME))
+				{
+					/*
+					 * The subscription must be disabled to allow slot_name as
+					 * 'none', otherwise, the apply worker will repeatedly try
+					 * to stream the data using that slot_name which neither
+					 * exists on the publisher nor the user will be allowed to
+					 * create it.
+					 */
+					if (sub->enabled && !opts.slot_name)
+						ereport(ERROR,
+								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+								 errmsg("cannot set %s for enabled subscription",
+										"slot_name = NONE")));
+
+					if (opts.slot_name)
+						values[Anum_pg_subscription_subslotname - 1] =
+							DirectFunctionCall1(namein, CStringGetDatum(opts.slot_name));
+					else
+						nulls[Anum_pg_subscription_subslotname - 1] = true;
+					replaces[Anum_pg_subscription_subslotname - 1] = true;
+				}
+
+				if (opts.synchronous_commit)
+				{
+					values[Anum_pg_subscription_subsynccommit - 1] =
+						CStringGetTextDatum(opts.synchronous_commit);
+					replaces[Anum_pg_subscription_subsynccommit - 1] = true;
+				}
+
+				if (IsSet(opts.specified_opts, SUBOPT_BINARY))
+				{
+					values[Anum_pg_subscription_subbinary - 1] =
+						BoolGetDatum(opts.binary);
+					replaces[Anum_pg_subscription_subbinary - 1] = true;
+				}
+
+				if (IsSet(opts.specified_opts, SUBOPT_STREAMING))
+				{
+					values[Anum_pg_subscription_substream - 1] =
+						BoolGetDatum(opts.streaming);
+					replaces[Anum_pg_subscription_substream - 1] = true;
+				}
+
+				if (IsSet(opts.specified_opts, SUBOPT_DISABLE_ON_ERR))
+				{
+					values[Anum_pg_subscription_subdisableonerr - 1]
+						= BoolGetDatum(opts.disableonerr);
+					replaces[Anum_pg_subscription_subdisableonerr - 1]
+						= true;
+				}
+
+				update_tuple = true;
+				break;
+			}
+
+		case ALTER_SUBSCRIPTION_ENABLED:
+			{
+				parse_subscription_options(pstate, stmt->options,
+										   SUBOPT_ENABLED, &opts);
+				Assert(IsSet(opts.specified_opts, SUBOPT_ENABLED));
+
+				if (!sub->slotname && opts.enabled)
+					ereport(ERROR,
+							(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+							 errmsg("cannot enable subscription that does not have a slot name")));
+
+				values[Anum_pg_subscription_subenabled - 1] =
+					BoolGetDatum(opts.enabled);
+				replaces[Anum_pg_subscription_subenabled - 1] = true;
+
+				if (opts.enabled)
+					ApplyLauncherWakeupAtCommit();
+
+				update_tuple = true;
+				break;
+			}
+
+		case ALTER_SUBSCRIPTION_CONNECTION:
+			/* Load the library providing us libpq calls. */
+			load_file("libpqwalreceiver", false);
+			/* Check the connection info string. */
+			walrcv_check_conninfo(stmt->conninfo);
+
+			values[Anum_pg_subscription_subconninfo - 1] =
+				CStringGetTextDatum(stmt->conninfo);
+			replaces[Anum_pg_subscription_subconninfo - 1] = true;
+			update_tuple = true;
+			break;
+
+		case ALTER_SUBSCRIPTION_SET_PUBLICATION:
+			{
+				supported_opts = SUBOPT_COPY_DATA | SUBOPT_REFRESH;
+				parse_subscription_options(pstate, stmt->options,
+										   supported_opts, &opts);
+
+				values[Anum_pg_subscription_subpublications - 1] =
+					publicationListToArray(stmt->publication);
+				replaces[Anum_pg_subscription_subpublications - 1] = true;
+
+				update_tuple = true;
+
+				/* Refresh if user asked us to. */
+				if (opts.refresh)
+				{
+					if (!sub->enabled)
+						ereport(ERROR,
+								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+								 errmsg("ALTER SUBSCRIPTION with refresh is not allowed for disabled subscriptions"),
+								 errhint("Use ALTER SUBSCRIPTION ... SET PUBLICATION ... WITH (refresh = false).")));
+
+					/*
+					 * See ALTER_SUBSCRIPTION_REFRESH for details why this is
+					 * not allowed.
+					 */
+					if (sub->twophasestate == LOGICALREP_TWOPHASE_STATE_ENABLED && opts.copy_data)
+						ereport(ERROR,
+								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+								 errmsg("ALTER SUBSCRIPTION with refresh and copy_data is not allowed when two_phase is enabled"),
+								 errhint("Use ALTER SUBSCRIPTION ... SET PUBLICATION with refresh = false, or with copy_data = false, or use DROP/CREATE SUBSCRIPTION.")));
+
+					PreventInTransactionBlock(isTopLevel, "ALTER SUBSCRIPTION with refresh");
+
+					/* Make sure refresh sees the new list of publications. */
+					sub->publications = stmt->publication;
+
+					AlterSubscription_refresh(sub, opts.copy_data,
+											  stmt->publication);
+				}
+
+				break;
+			}
+
+		case ALTER_SUBSCRIPTION_ADD_PUBLICATION:
+		case ALTER_SUBSCRIPTION_DROP_PUBLICATION:
+			{
+				List	   *publist;
+				bool		isadd = stmt->kind == ALTER_SUBSCRIPTION_ADD_PUBLICATION;
+
+				supported_opts = SUBOPT_REFRESH | SUBOPT_COPY_DATA;
+				parse_subscription_options(pstate, stmt->options,
+										   supported_opts, &opts);
+
+				publist = merge_publications(sub->publications, stmt->publication, isadd, stmt->subname);
+				values[Anum_pg_subscription_subpublications - 1] =
+					publicationListToArray(publist);
+				replaces[Anum_pg_subscription_subpublications - 1] = true;
+
+				update_tuple = true;
+
+				/* Refresh if user asked us to. */
+				if (opts.refresh)
+				{
+					/* We only need to validate user specified publications. */
+					List	   *validate_publications = (isadd) ? stmt->publication : NULL;
+
+					if (!sub->enabled)
+						ereport(ERROR,
+								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+								 errmsg("ALTER SUBSCRIPTION with refresh is not allowed for disabled subscriptions"),
+						/* translator: %s is an SQL ALTER command */
+								 errhint("Use %s instead.",
+										 isadd ?
+										 "ALTER SUBSCRIPTION ... ADD PUBLICATION ... WITH (refresh = false)" :
+										 "ALTER SUBSCRIPTION ... DROP PUBLICATION ... WITH (refresh = false)")));
+
+					/*
+					 * See ALTER_SUBSCRIPTION_REFRESH for details why this is
+					 * not allowed.
+					 */
+					if (sub->twophasestate == LOGICALREP_TWOPHASE_STATE_ENABLED && opts.copy_data)
+						ereport(ERROR,
+								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+								 errmsg("ALTER SUBSCRIPTION with refresh and copy_data is not allowed when two_phase is enabled"),
+						/* translator: %s is an SQL ALTER command */
+								 errhint("Use %s with refresh = false, or with copy_data = false, or use DROP/CREATE SUBSCRIPTION.",
+										 isadd ?
+										 "ALTER SUBSCRIPTION ... ADD PUBLICATION" :
+										 "ALTER SUBSCRIPTION ... DROP PUBLICATION")));
+
+					PreventInTransactionBlock(isTopLevel, "ALTER SUBSCRIPTION with refresh");
+
+					/* Refresh the new list of publications. */
+					sub->publications = publist;
+
+					AlterSubscription_refresh(sub, opts.copy_data,
+											  validate_publications);
+				}
+
+				break;
+			}
+
+		case ALTER_SUBSCRIPTION_REFRESH:
+			{
+				if (!sub->enabled)
+					ereport(ERROR,
+							(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+							 errmsg("ALTER SUBSCRIPTION ... REFRESH is not allowed for disabled subscriptions")));
+
+				parse_subscription_options(pstate, stmt->options,
+										   SUBOPT_COPY_DATA, &opts);
+
+				/*
+				 * The subscription option "two_phase" requires that
+				 * replication has passed the initial table synchronization
+				 * phase before the two_phase becomes properly enabled.
+				 *
+				 * But, having reached this two-phase commit "enabled" state
+				 * we must not allow any subsequent table initialization to
+				 * occur. So the ALTER SUBSCRIPTION ... REFRESH is disallowed
+				 * when the user had requested two_phase = on mode.
+				 *
+				 * The exception to this restriction is when copy_data =
+				 * false, because when copy_data is false the tablesync will
+				 * start already in READY state and will exit directly without
+				 * doing anything.
+				 *
+				 * For more details see comments atop worker.c.
+				 */
+				if (sub->twophasestate == LOGICALREP_TWOPHASE_STATE_ENABLED && opts.copy_data)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("ALTER SUBSCRIPTION ... REFRESH with copy_data is not allowed when two_phase is enabled"),
+							 errhint("Use ALTER SUBSCRIPTION ... REFRESH with copy_data = false, or use DROP/CREATE SUBSCRIPTION.")));
+
+				PreventInTransactionBlock(isTopLevel, "ALTER SUBSCRIPTION ... REFRESH");
+
+				AlterSubscription_refresh(sub, opts.copy_data, NULL);
+
+				break;
+			}
+
+		case ALTER_SUBSCRIPTION_SKIP:
+			{
+				parse_subscription_options(pstate, stmt->options, SUBOPT_LSN, &opts);
+
+				/* ALTER SUBSCRIPTION ... SKIP supports only LSN option */
+				Assert(IsSet(opts.specified_opts, SUBOPT_LSN));
+
+				if (!superuser())
+					ereport(ERROR,
+							(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+							 errmsg("must be superuser to skip transaction")));
+
+				/*
+				 * If the user sets subskiplsn, we do a sanity check to make
+				 * sure that the specified LSN is a probable value.
+				 */
+				if (!XLogRecPtrIsInvalid(opts.lsn))
+				{
+					RepOriginId originid;
+					char		originname[NAMEDATALEN];
+					XLogRecPtr	remote_lsn;
+
+					snprintf(originname, sizeof(originname), "pg_%u", subid);
+					originid = replorigin_by_name(originname, false);
+					remote_lsn = replorigin_get_progress(originid, false);
+
+					/* Check the given LSN is at least a future LSN */
+					if (!XLogRecPtrIsInvalid(remote_lsn) && opts.lsn < remote_lsn)
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+								 errmsg("skip WAL location (LSN %X/%X) must be greater than origin LSN %X/%X",
+										LSN_FORMAT_ARGS(opts.lsn),
+										LSN_FORMAT_ARGS(remote_lsn))));
+				}
+
+				values[Anum_pg_subscription_subskiplsn - 1] = LSNGetDatum(opts.lsn);
+				replaces[Anum_pg_subscription_subskiplsn - 1] = true;
+
+				update_tuple = true;
+				break;
+			}
+
+		default:
+			elog(ERROR, "unrecognized ALTER SUBSCRIPTION kind %d",
+				 stmt->kind);
+	}
+
+	/* Update the catalog if needed. */
+	if (update_tuple)
+	{
+		tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls,
+								replaces);
+
+		CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+		heap_freetuple(tup);
+	}
+
+	table_close(rel, RowExclusiveLock);
+
+	ObjectAddressSet(myself, SubscriptionRelationId, subid);
+
+	InvokeObjectPostAlterHook(SubscriptionRelationId, subid, 0);
+
+	return myself;
+}
+
+/*
+ * Drop a subscription
+ */
+void
+DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel)
+{
+	Relation	rel;
+	ObjectAddress myself;
+	HeapTuple	tup;
+	Oid			subid;
+	Datum		datum;
+	bool		isnull;
+	char	   *subname;
+	char	   *conninfo;
+	char	   *slotname;
+	List	   *subworkers;
+	ListCell   *lc;
+	char		originname[NAMEDATALEN];
+	char	   *err = NULL;
+	WalReceiverConn *wrconn;
+	Form_pg_subscription form;
+	List	   *rstates;
+
+	/*
+	 * Lock pg_subscription with AccessExclusiveLock to ensure that the
+	 * launcher doesn't restart new worker during dropping the subscription
+	 */
+	rel = table_open(SubscriptionRelationId, AccessExclusiveLock);
+
+	tup = SearchSysCache2(SUBSCRIPTIONNAME, MyDatabaseId,
+						  CStringGetDatum(stmt->subname));
+
+	if (!HeapTupleIsValid(tup))
+	{
+		table_close(rel, NoLock);
+
+		if (!stmt->missing_ok)
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("subscription \"%s\" does not exist",
+							stmt->subname)));
+		else
+			ereport(NOTICE,
+					(errmsg("subscription \"%s\" does not exist, skipping",
+							stmt->subname)));
+
+		return;
+	}
+
+	form = (Form_pg_subscription) GETSTRUCT(tup);
+	subid = form->oid;
+
+	/* must be owner */
+	if (!pg_subscription_ownercheck(subid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SUBSCRIPTION,
+					   stmt->subname);
+
+	/* DROP hook for the subscription being removed */
+	InvokeObjectDropHook(SubscriptionRelationId, subid, 0);
+
+	/*
+	 * Lock the subscription so nobody else can do anything with it (including
+	 * the replication workers).
+	 */
+	LockSharedObject(SubscriptionRelationId, subid, 0, AccessExclusiveLock);
+
+	/* Get subname */
+	datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup,
+							Anum_pg_subscription_subname, &isnull);
+	Assert(!isnull);
+	subname = pstrdup(NameStr(*DatumGetName(datum)));
+
+	/* Get conninfo */
+	datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup,
+							Anum_pg_subscription_subconninfo, &isnull);
+	Assert(!isnull);
+	conninfo = TextDatumGetCString(datum);
+
+	/* Get slotname */
+	datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup,
+							Anum_pg_subscription_subslotname, &isnull);
+	if (!isnull)
+		slotname = pstrdup(NameStr(*DatumGetName(datum)));
+	else
+		slotname = NULL;
+
+	/*
+	 * Since dropping a replication slot is not transactional, the replication
+	 * slot stays dropped even if the transaction rolls back.  So we cannot
+	 * run DROP SUBSCRIPTION inside a transaction block if dropping the
+	 * replication slot.  Also, in this case, we report a message for dropping
+	 * the subscription to the cumulative stats system.
+	 *
+	 * XXX The command name should really be something like "DROP SUBSCRIPTION
+	 * of a subscription that is associated with a replication slot", but we
+	 * don't have the proper facilities for that.
+	 */
+	if (slotname)
+		PreventInTransactionBlock(isTopLevel, "DROP SUBSCRIPTION");
+
+	ObjectAddressSet(myself, SubscriptionRelationId, subid);
+	EventTriggerSQLDropAddObject(&myself, true, true);
+
+	/* Remove the tuple from catalog. */
+	CatalogTupleDelete(rel, &tup->t_self);
+
+	ReleaseSysCache(tup);
+
+	/*
+	 * Stop all the subscription workers immediately.
+	 *
+	 * This is necessary if we are dropping the replication slot, so that the
+	 * slot becomes accessible.
+	 *
+	 * It is also necessary if the subscription is disabled and was disabled
+	 * in the same transaction.  Then the workers haven't seen the disabling
+	 * yet and will still be running, leading to hangs later when we want to
+	 * drop the replication origin.  If the subscription was disabled before
+	 * this transaction, then there shouldn't be any workers left, so this
+	 * won't make a difference.
+	 *
+	 * New workers won't be started because we hold an exclusive lock on the
+	 * subscription till the end of the transaction.
+	 */
+	LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+	subworkers = logicalrep_workers_find(subid, false);
+	LWLockRelease(LogicalRepWorkerLock);
+	foreach(lc, subworkers)
+	{
+		LogicalRepWorker *w = (LogicalRepWorker *) lfirst(lc);
+
+		logicalrep_worker_stop(w->subid, w->relid);
+	}
+	list_free(subworkers);
+
+	/*
+	 * Cleanup of tablesync replication origins.
+	 *
+	 * Any READY-state relations would already have dealt with clean-ups.
+	 *
+	 * Note that the state can't change because we have already stopped both
+	 * the apply and tablesync workers and they can't restart because of
+	 * exclusive lock on the subscription.
+	 */
+	rstates = GetSubscriptionNotReadyRelations(subid);
+	foreach(lc, rstates)
+	{
+		SubscriptionRelState *rstate = (SubscriptionRelState *) lfirst(lc);
+		Oid			relid = rstate->relid;
+
+		/* Only cleanup resources of tablesync workers */
+		if (!OidIsValid(relid))
+			continue;
+
+		/*
+		 * Drop the tablesync's origin tracking if exists.
+		 *
+		 * It is possible that the origin is not yet created for tablesync
+		 * worker so passing missing_ok = true. This can happen for the states
+		 * before SUBREL_STATE_FINISHEDCOPY.
+		 */
+		ReplicationOriginNameForTablesync(subid, relid, originname,
+										  sizeof(originname));
+		replorigin_drop_by_name(originname, true, false);
+	}
+
+	/* Clean up dependencies */
+	deleteSharedDependencyRecordsFor(SubscriptionRelationId, subid, 0);
+
+	/* Remove any associated relation synchronization states. */
+	RemoveSubscriptionRel(subid, InvalidOid);
+
+	/* Remove the origin tracking if exists. */
+	snprintf(originname, sizeof(originname), "pg_%u", subid);
+	replorigin_drop_by_name(originname, true, false);
+
+	/*
+	 * Tell the cumulative stats system that the subscription is getting
+	 * dropped.
+	 */
+	pgstat_drop_subscription(subid);
+
+	/*
+	 * If there is no slot associated with the subscription, we can finish
+	 * here.
+	 */
+	if (!slotname && rstates == NIL)
+	{
+		table_close(rel, NoLock);
+		return;
+	}
+
+	/*
+	 * Try to acquire the connection necessary for dropping slots.
+	 *
+	 * Note: If the slotname is NONE/NULL then we allow the command to finish
+	 * and users need to manually cleanup the apply and tablesync worker slots
+	 * later.
+	 *
+	 * This has to be at the end because otherwise if there is an error while
+	 * doing the database operations we won't be able to rollback dropped
+	 * slot.
+	 */
+	load_file("libpqwalreceiver", false);
+
+	wrconn = walrcv_connect(conninfo, true, subname, &err);
+	if (wrconn == NULL)
+	{
+		if (!slotname)
+		{
+			/* be tidy */
+			list_free(rstates);
+			table_close(rel, NoLock);
+			return;
+		}
+		else
+		{
+			ReportSlotConnectionError(rstates, subid, slotname, err);
+		}
+	}
+
+	PG_TRY();
+	{
+		foreach(lc, rstates)
+		{
+			SubscriptionRelState *rstate = (SubscriptionRelState *) lfirst(lc);
+			Oid			relid = rstate->relid;
+
+			/* Only cleanup resources of tablesync workers */
+			if (!OidIsValid(relid))
+				continue;
+
+			/*
+			 * Drop the tablesync slots associated with removed tables.
+			 *
+			 * For SYNCDONE/READY states, the tablesync slot is known to have
+			 * already been dropped by the tablesync worker.
+			 *
+			 * For other states, there is no certainty, maybe the slot does
+			 * not exist yet. Also, if we fail after removing some of the
+			 * slots, next time, it will again try to drop already dropped
+			 * slots and fail. For these reasons, we allow missing_ok = true
+			 * for the drop.
+			 */
+			if (rstate->state != SUBREL_STATE_SYNCDONE)
+			{
+				char		syncslotname[NAMEDATALEN] = {0};
+
+				ReplicationSlotNameForTablesync(subid, relid, syncslotname,
+												sizeof(syncslotname));
+				ReplicationSlotDropAtPubNode(wrconn, syncslotname, true);
+			}
+		}
+
+		list_free(rstates);
+
+		/*
+		 * If there is a slot associated with the subscription, then drop the
+		 * replication slot at the publisher.
+		 */
+		if (slotname)
+			ReplicationSlotDropAtPubNode(wrconn, slotname, false);
+	}
+	PG_FINALLY();
+	{
+		walrcv_disconnect(wrconn);
+	}
+	PG_END_TRY();
+
+	table_close(rel, NoLock);
+}
+
+/*
+ * Drop the replication slot at the publisher node using the replication
+ * connection.
+ *
+ * missing_ok - if true then only issue a LOG message if the slot doesn't
+ * exist.
+ */
+void
+ReplicationSlotDropAtPubNode(WalReceiverConn *wrconn, char *slotname, bool missing_ok)
+{
+	StringInfoData cmd;
+
+	Assert(wrconn);
+
+	load_file("libpqwalreceiver", false);
+
+	initStringInfo(&cmd);
+	appendStringInfo(&cmd, "DROP_REPLICATION_SLOT %s WAIT", quote_identifier(slotname));
+
+	PG_TRY();
+	{
+		WalRcvExecResult *res;
+
+		res = walrcv_exec(wrconn, cmd.data, 0, NULL);
+
+		if (res->status == WALRCV_OK_COMMAND)
+		{
+			/* NOTICE. Success. */
+			ereport(NOTICE,
+					(errmsg("dropped replication slot \"%s\" on publisher",
+							slotname)));
+		}
+		else if (res->status == WALRCV_ERROR &&
+				 missing_ok &&
+				 res->sqlstate == ERRCODE_UNDEFINED_OBJECT)
+		{
+			/* LOG. Error, but missing_ok = true. */
+			ereport(LOG,
+					(errmsg("could not drop replication slot \"%s\" on publisher: %s",
+							slotname, res->err)));
+		}
+		else
+		{
+			/* ERROR. */
+			ereport(ERROR,
+					(errcode(ERRCODE_CONNECTION_FAILURE),
+					 errmsg("could not drop replication slot \"%s\" on publisher: %s",
+							slotname, res->err)));
+		}
+
+		walrcv_clear_result(res);
+	}
+	PG_FINALLY();
+	{
+		pfree(cmd.data);
+	}
+	PG_END_TRY();
+}
+
+/*
+ * Internal workhorse for changing a subscription owner
+ */
+static void
+AlterSubscriptionOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId)
+{
+	Form_pg_subscription form;
+
+	form = (Form_pg_subscription) GETSTRUCT(tup);
+
+	if (form->subowner == newOwnerId)
+		return;
+
+	if (!pg_subscription_ownercheck(form->oid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SUBSCRIPTION,
+					   NameStr(form->subname));
+
+	/* New owner must be a superuser */
+	if (!superuser_arg(newOwnerId))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied to change owner of subscription \"%s\"",
+						NameStr(form->subname)),
+				 errhint("The owner of a subscription must be a superuser.")));
+
+	form->subowner = newOwnerId;
+	CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+	/* Update owner dependency reference */
+	changeDependencyOnOwner(SubscriptionRelationId,
+							form->oid,
+							newOwnerId);
+
+	InvokeObjectPostAlterHook(SubscriptionRelationId,
+							  form->oid, 0);
+
+	ApplyLauncherWakeupAtCommit();
+}
+
+/*
+ * Change subscription owner -- by name
+ */
+ObjectAddress
+AlterSubscriptionOwner(const char *name, Oid newOwnerId)
+{
+	Oid			subid;
+	HeapTuple	tup;
+	Relation	rel;
+	ObjectAddress address;
+	Form_pg_subscription form;
+
+	rel = table_open(SubscriptionRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, MyDatabaseId,
+							  CStringGetDatum(name));
+
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("subscription \"%s\" does not exist", name)));
+
+	form = (Form_pg_subscription) GETSTRUCT(tup);
+	subid = form->oid;
+
+	AlterSubscriptionOwner_internal(rel, tup, newOwnerId);
+
+	ObjectAddressSet(address, SubscriptionRelationId, subid);
+
+	heap_freetuple(tup);
+
+	table_close(rel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * Change subscription owner -- by OID
+ */
+void
+AlterSubscriptionOwner_oid(Oid subid, Oid newOwnerId)
+{
+	HeapTuple	tup;
+	Relation	rel;
+
+	rel = table_open(SubscriptionRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(SUBSCRIPTIONOID, ObjectIdGetDatum(subid));
+
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("subscription with OID %u does not exist", subid)));
+
+	AlterSubscriptionOwner_internal(rel, tup, newOwnerId);
+
+	heap_freetuple(tup);
+
+	table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Get the list of tables which belong to specified publications on the
+ * publisher connection.
+ *
+ * Note that we don't support the case where the column list is different for
+ * the same table in different publications to avoid sending unwanted column
+ * information for some of the rows. This can happen when both the column
+ * list and row filter are specified for different publications.
+ */
+static List *
+fetch_table_list(WalReceiverConn *wrconn, List *publications)
+{
+	WalRcvExecResult *res;
+	StringInfoData cmd;
+	TupleTableSlot *slot;
+	Oid			tableRow[3] = {TEXTOID, TEXTOID, NAMEARRAYOID};
+	List	   *tablelist = NIL;
+	bool		check_columnlist = (walrcv_server_version(wrconn) >= 150000);
+
+	initStringInfo(&cmd);
+	appendStringInfoString(&cmd, "SELECT DISTINCT t.schemaname, t.tablename \n");
+
+	/* Get column lists for each relation if the publisher supports it */
+	if (check_columnlist)
+		appendStringInfoString(&cmd, ", t.attnames\n");
+
+	appendStringInfoString(&cmd, "FROM pg_catalog.pg_publication_tables t\n"
+						   " WHERE t.pubname IN (");
+	get_publications_str(publications, &cmd, true);
+	appendStringInfoChar(&cmd, ')');
+
+	res = walrcv_exec(wrconn, cmd.data, check_columnlist ? 3 : 2, tableRow);
+	pfree(cmd.data);
+
+	if (res->status != WALRCV_OK_TUPLES)
+		ereport(ERROR,
+				(errcode(ERRCODE_CONNECTION_FAILURE),
+				 errmsg("could not receive list of replicated tables from the publisher: %s",
+						res->err)));
+
+	/* Process tables. */
+	slot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple);
+	while (tuplestore_gettupleslot(res->tuplestore, true, false, slot))
+	{
+		char	   *nspname;
+		char	   *relname;
+		bool		isnull;
+		RangeVar   *rv;
+
+		nspname = TextDatumGetCString(slot_getattr(slot, 1, &isnull));
+		Assert(!isnull);
+		relname = TextDatumGetCString(slot_getattr(slot, 2, &isnull));
+		Assert(!isnull);
+
+		rv = makeRangeVar(nspname, relname, -1);
+
+		if (check_columnlist && list_member(tablelist, rv))
+			ereport(ERROR,
+					errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					errmsg("cannot use different column lists for table \"%s.%s\" in different publications",
+						   nspname, relname));
+		else
+			tablelist = lappend(tablelist, rv);
+
+		ExecClearTuple(slot);
+	}
+	ExecDropSingleTupleTableSlot(slot);
+
+	walrcv_clear_result(res);
+
+	return tablelist;
+}
+
+/*
+ * This is to report the connection failure while dropping replication slots.
+ * Here, we report the WARNING for all tablesync slots so that user can drop
+ * them manually, if required.
+ */
+static void
+ReportSlotConnectionError(List *rstates, Oid subid, char *slotname, char *err)
+{
+	ListCell   *lc;
+
+	foreach(lc, rstates)
+	{
+		SubscriptionRelState *rstate = (SubscriptionRelState *) lfirst(lc);
+		Oid			relid = rstate->relid;
+
+		/* Only cleanup resources of tablesync workers */
+		if (!OidIsValid(relid))
+			continue;
+
+		/*
+		 * Caller needs to ensure that relstate doesn't change underneath us.
+		 * See DropSubscription where we get the relstates.
+		 */
+		if (rstate->state != SUBREL_STATE_SYNCDONE)
+		{
+			char		syncslotname[NAMEDATALEN] = {0};
+
+			ReplicationSlotNameForTablesync(subid, relid, syncslotname,
+											sizeof(syncslotname));
+			elog(WARNING, "could not drop tablesync replication slot \"%s\"",
+				 syncslotname);
+		}
+	}
+
+	ereport(ERROR,
+			(errcode(ERRCODE_CONNECTION_FAILURE),
+			 errmsg("could not connect to publisher when attempting to drop replication slot \"%s\": %s",
+					slotname, err),
+	/* translator: %s is an SQL ALTER command */
+			 errhint("Use %s to disable the subscription, and then use %s to disassociate it from the slot.",
+					 "ALTER SUBSCRIPTION ... DISABLE",
+					 "ALTER SUBSCRIPTION ... SET (slot_name = NONE)")));
+}
+
+/*
+ * Check for duplicates in the given list of publications and error out if
+ * found one.  Add publications to datums as text datums, if datums is not
+ * NULL.
+ */
+static void
+check_duplicates_in_publist(List *publist, Datum *datums)
+{
+	ListCell   *cell;
+	int			j = 0;
+
+	foreach(cell, publist)
+	{
+		char	   *name = strVal(lfirst(cell));
+		ListCell   *pcell;
+
+		foreach(pcell, publist)
+		{
+			char	   *pname = strVal(lfirst(pcell));
+
+			if (pcell == cell)
+				break;
+
+			if (strcmp(name, pname) == 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_DUPLICATE_OBJECT),
+						 errmsg("publication name \"%s\" used more than once",
+								pname)));
+		}
+
+		if (datums)
+			datums[j++] = CStringGetTextDatum(name);
+	}
+}
+
+/*
+ * Merge current subscription's publications and user-specified publications
+ * from ADD/DROP PUBLICATIONS.
+ *
+ * If addpub is true, we will add the list of publications into oldpublist.
+ * Otherwise, we will delete the list of publications from oldpublist.  The
+ * returned list is a copy, oldpublist itself is not changed.
+ *
+ * subname is the subscription name, for error messages.
+ */
+static List *
+merge_publications(List *oldpublist, List *newpublist, bool addpub, const char *subname)
+{
+	ListCell   *lc;
+
+	oldpublist = list_copy(oldpublist);
+
+	check_duplicates_in_publist(newpublist, NULL);
+
+	foreach(lc, newpublist)
+	{
+		char	   *name = strVal(lfirst(lc));
+		ListCell   *lc2;
+		bool		found = false;
+
+		foreach(lc2, oldpublist)
+		{
+			char	   *pubname = strVal(lfirst(lc2));
+
+			if (strcmp(name, pubname) == 0)
+			{
+				found = true;
+				if (addpub)
+					ereport(ERROR,
+							(errcode(ERRCODE_DUPLICATE_OBJECT),
+							 errmsg("publication \"%s\" is already in subscription \"%s\"",
+									name, subname)));
+				else
+					oldpublist = foreach_delete_current(oldpublist, lc2);
+
+				break;
+			}
+		}
+
+		if (addpub && !found)
+			oldpublist = lappend(oldpublist, makeString(name));
+		else if (!addpub && !found)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("publication \"%s\" is not in subscription \"%s\"",
+							name, subname)));
+	}
+
+	/*
+	 * XXX Probably no strong reason for this, but for now it's to make ALTER
+	 * SUBSCRIPTION ... DROP PUBLICATION consistent with SET PUBLICATION.
+	 */
+	if (!oldpublist)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("cannot drop all the publications from a subscription")));
+
+	return oldpublist;
+}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
new file mode 100644
index 0000000..97f9a22
--- /dev/null
+++ b/src/backend/commands/tablecmds.c
@@ -0,0 +1,19402 @@
+/*-------------------------------------------------------------------------
+ *
+ * tablecmds.c
+ *	  Commands for creating and altering table structures and settings
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/tablecmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/attmap.h"
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/heapam_xlog.h"
+#include "access/multixact.h"
+#include "access/reloptions.h"
+#include "access/relscan.h"
+#include "access/sysattr.h"
+#include "access/tableam.h"
+#include "access/toast_compression.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "access/xloginsert.h"
+#include "catalog/catalog.h"
+#include "catalog/heap.h"
+#include "catalog/index.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/partition.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_attrdef.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_constraint.h"
+#include "catalog/pg_depend.h"
+#include "catalog/pg_foreign_table.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_largeobject.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_statistic_ext.h"
+#include "catalog/pg_tablespace.h"
+#include "catalog/pg_trigger.h"
+#include "catalog/pg_type.h"
+#include "catalog/storage.h"
+#include "catalog/storage_xlog.h"
+#include "catalog/toasting.h"
+#include "commands/cluster.h"
+#include "commands/comment.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/policy.h"
+#include "commands/sequence.h"
+#include "commands/tablecmds.h"
+#include "commands/tablespace.h"
+#include "commands/trigger.h"
+#include "commands/typecmds.h"
+#include "commands/user.h"
+#include "executor/executor.h"
+#include "foreign/fdwapi.h"
+#include "foreign/foreign.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/parsenodes.h"
+#include "optimizer/optimizer.h"
+#include "parser/parse_clause.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_oper.h"
+#include "parser/parse_relation.h"
+#include "parser/parse_type.h"
+#include "parser/parse_utilcmd.h"
+#include "parser/parser.h"
+#include "partitioning/partbounds.h"
+#include "partitioning/partdesc.h"
+#include "pgstat.h"
+#include "rewrite/rewriteDefine.h"
+#include "rewrite/rewriteHandler.h"
+#include "rewrite/rewriteManip.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/lock.h"
+#include "storage/predicate.h"
+#include "storage/smgr.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/partcache.h"
+#include "utils/relcache.h"
+#include "utils/ruleutils.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/timestamp.h"
+#include "utils/typcache.h"
+
+/*
+ * ON COMMIT action list
+ */
+typedef struct OnCommitItem
+{
+	Oid			relid;			/* relid of relation */
+	OnCommitAction oncommit;	/* what to do at end of xact */
+
+	/*
+	 * If this entry was created during the current transaction,
+	 * creating_subid is the ID of the creating subxact; if created in a prior
+	 * transaction, creating_subid is zero.  If deleted during the current
+	 * transaction, deleting_subid is the ID of the deleting subxact; if no
+	 * deletion request is pending, deleting_subid is zero.
+	 */
+	SubTransactionId creating_subid;
+	SubTransactionId deleting_subid;
+} OnCommitItem;
+
+static List *on_commits = NIL;
+
+
+/*
+ * State information for ALTER TABLE
+ *
+ * The pending-work queue for an ALTER TABLE is a List of AlteredTableInfo
+ * structs, one for each table modified by the operation (the named table
+ * plus any child tables that are affected).  We save lists of subcommands
+ * to apply to this table (possibly modified by parse transformation steps);
+ * these lists will be executed in Phase 2.  If a Phase 3 step is needed,
+ * necessary information is stored in the constraints and newvals lists.
+ *
+ * Phase 2 is divided into multiple passes; subcommands are executed in
+ * a pass determined by subcommand type.
+ */
+
+#define AT_PASS_UNSET			-1	/* UNSET will cause ERROR */
+#define AT_PASS_DROP			0	/* DROP (all flavors) */
+#define AT_PASS_ALTER_TYPE		1	/* ALTER COLUMN TYPE */
+#define AT_PASS_OLD_INDEX		2	/* re-add existing indexes */
+#define AT_PASS_OLD_CONSTR		3	/* re-add existing constraints */
+/* We could support a RENAME COLUMN pass here, but not currently used */
+#define AT_PASS_ADD_COL			4	/* ADD COLUMN */
+#define AT_PASS_ADD_CONSTR		5	/* ADD constraints (initial examination) */
+#define AT_PASS_COL_ATTRS		6	/* set column attributes, eg NOT NULL */
+#define AT_PASS_ADD_INDEXCONSTR	7	/* ADD index-based constraints */
+#define AT_PASS_ADD_INDEX		8	/* ADD indexes */
+#define AT_PASS_ADD_OTHERCONSTR	9	/* ADD other constraints, defaults */
+#define AT_PASS_MISC			10	/* other stuff */
+#define AT_NUM_PASSES			11
+
+typedef struct AlteredTableInfo
+{
+	/* Information saved before any work commences: */
+	Oid			relid;			/* Relation to work on */
+	char		relkind;		/* Its relkind */
+	TupleDesc	oldDesc;		/* Pre-modification tuple descriptor */
+
+	/*
+	 * Transiently set during Phase 2, normally set to NULL.
+	 *
+	 * ATRewriteCatalogs sets this when it starts, and closes when ATExecCmd
+	 * returns control.  This can be exploited by ATExecCmd subroutines to
+	 * close/reopen across transaction boundaries.
+	 */
+	Relation	rel;
+
+	/* Information saved by Phase 1 for Phase 2: */
+	List	   *subcmds[AT_NUM_PASSES]; /* Lists of AlterTableCmd */
+	/* Information saved by Phases 1/2 for Phase 3: */
+	List	   *constraints;	/* List of NewConstraint */
+	List	   *newvals;		/* List of NewColumnValue */
+	List	   *afterStmts;		/* List of utility command parsetrees */
+	bool		verify_new_notnull; /* T if we should recheck NOT NULL */
+	int			rewrite;		/* Reason for forced rewrite, if any */
+	Oid			newAccessMethod;	/* new access method; 0 means no change */
+	Oid			newTableSpace;	/* new tablespace; 0 means no change */
+	bool		chgPersistence; /* T if SET LOGGED/UNLOGGED is used */
+	char		newrelpersistence;	/* if above is true */
+	Expr	   *partition_constraint;	/* for attach partition validation */
+	/* true, if validating default due to some other attach/detach */
+	bool		validate_default;
+	/* Objects to rebuild after completing ALTER TYPE operations */
+	List	   *changedConstraintOids;	/* OIDs of constraints to rebuild */
+	List	   *changedConstraintDefs;	/* string definitions of same */
+	List	   *changedIndexOids;	/* OIDs of indexes to rebuild */
+	List	   *changedIndexDefs;	/* string definitions of same */
+	char	   *replicaIdentityIndex;	/* index to reset as REPLICA IDENTITY */
+	char	   *clusterOnIndex; /* index to use for CLUSTER */
+	List	   *changedStatisticsOids;	/* OIDs of statistics to rebuild */
+	List	   *changedStatisticsDefs;	/* string definitions of same */
+} AlteredTableInfo;
+
+/* Struct describing one new constraint to check in Phase 3 scan */
+/* Note: new NOT NULL constraints are handled elsewhere */
+typedef struct NewConstraint
+{
+	char	   *name;			/* Constraint name, or NULL if none */
+	ConstrType	contype;		/* CHECK or FOREIGN */
+	Oid			refrelid;		/* PK rel, if FOREIGN */
+	Oid			refindid;		/* OID of PK's index, if FOREIGN */
+	Oid			conid;			/* OID of pg_constraint entry, if FOREIGN */
+	Node	   *qual;			/* Check expr or CONSTR_FOREIGN Constraint */
+	ExprState  *qualstate;		/* Execution state for CHECK expr */
+} NewConstraint;
+
+/*
+ * Struct describing one new column value that needs to be computed during
+ * Phase 3 copy (this could be either a new column with a non-null default, or
+ * a column that we're changing the type of).  Columns without such an entry
+ * are just copied from the old table during ATRewriteTable.  Note that the
+ * expr is an expression over *old* table values, except when is_generated
+ * is true; then it is an expression over columns of the *new* tuple.
+ */
+typedef struct NewColumnValue
+{
+	AttrNumber	attnum;			/* which column */
+	Expr	   *expr;			/* expression to compute */
+	ExprState  *exprstate;		/* execution state */
+	bool		is_generated;	/* is it a GENERATED expression? */
+} NewColumnValue;
+
+/*
+ * Error-reporting support for RemoveRelations
+ */
+struct dropmsgstrings
+{
+	char		kind;
+	int			nonexistent_code;
+	const char *nonexistent_msg;
+	const char *skipping_msg;
+	const char *nota_msg;
+	const char *drophint_msg;
+};
+
+static const struct dropmsgstrings dropmsgstringarray[] = {
+	{RELKIND_RELATION,
+		ERRCODE_UNDEFINED_TABLE,
+		gettext_noop("table \"%s\" does not exist"),
+		gettext_noop("table \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not a table"),
+	gettext_noop("Use DROP TABLE to remove a table.")},
+	{RELKIND_SEQUENCE,
+		ERRCODE_UNDEFINED_TABLE,
+		gettext_noop("sequence \"%s\" does not exist"),
+		gettext_noop("sequence \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not a sequence"),
+	gettext_noop("Use DROP SEQUENCE to remove a sequence.")},
+	{RELKIND_VIEW,
+		ERRCODE_UNDEFINED_TABLE,
+		gettext_noop("view \"%s\" does not exist"),
+		gettext_noop("view \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not a view"),
+	gettext_noop("Use DROP VIEW to remove a view.")},
+	{RELKIND_MATVIEW,
+		ERRCODE_UNDEFINED_TABLE,
+		gettext_noop("materialized view \"%s\" does not exist"),
+		gettext_noop("materialized view \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not a materialized view"),
+	gettext_noop("Use DROP MATERIALIZED VIEW to remove a materialized view.")},
+	{RELKIND_INDEX,
+		ERRCODE_UNDEFINED_OBJECT,
+		gettext_noop("index \"%s\" does not exist"),
+		gettext_noop("index \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not an index"),
+	gettext_noop("Use DROP INDEX to remove an index.")},
+	{RELKIND_COMPOSITE_TYPE,
+		ERRCODE_UNDEFINED_OBJECT,
+		gettext_noop("type \"%s\" does not exist"),
+		gettext_noop("type \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not a type"),
+	gettext_noop("Use DROP TYPE to remove a type.")},
+	{RELKIND_FOREIGN_TABLE,
+		ERRCODE_UNDEFINED_OBJECT,
+		gettext_noop("foreign table \"%s\" does not exist"),
+		gettext_noop("foreign table \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not a foreign table"),
+	gettext_noop("Use DROP FOREIGN TABLE to remove a foreign table.")},
+	{RELKIND_PARTITIONED_TABLE,
+		ERRCODE_UNDEFINED_TABLE,
+		gettext_noop("table \"%s\" does not exist"),
+		gettext_noop("table \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not a table"),
+	gettext_noop("Use DROP TABLE to remove a table.")},
+	{RELKIND_PARTITIONED_INDEX,
+		ERRCODE_UNDEFINED_OBJECT,
+		gettext_noop("index \"%s\" does not exist"),
+		gettext_noop("index \"%s\" does not exist, skipping"),
+		gettext_noop("\"%s\" is not an index"),
+	gettext_noop("Use DROP INDEX to remove an index.")},
+	{'\0', 0, NULL, NULL, NULL, NULL}
+};
+
+/* communication between RemoveRelations and RangeVarCallbackForDropRelation */
+struct DropRelationCallbackState
+{
+	/* These fields are set by RemoveRelations: */
+	char		expected_relkind;
+	LOCKMODE	heap_lockmode;
+	/* These fields are state to track which subsidiary locks are held: */
+	Oid			heapOid;
+	Oid			partParentOid;
+	/* These fields are passed back by RangeVarCallbackForDropRelation: */
+	char		actual_relkind;
+	char		actual_relpersistence;
+};
+
+/* Alter table target-type flags for ATSimplePermissions */
+#define		ATT_TABLE				0x0001
+#define		ATT_VIEW				0x0002
+#define		ATT_MATVIEW				0x0004
+#define		ATT_INDEX				0x0008
+#define		ATT_COMPOSITE_TYPE		0x0010
+#define		ATT_FOREIGN_TABLE		0x0020
+#define		ATT_PARTITIONED_INDEX	0x0040
+#define		ATT_SEQUENCE			0x0080
+
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables.  This is used for
+ * the elements in a hash table. It uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct ForeignTruncateInfo
+{
+	Oid			serverid;
+	List	   *rels;
+} ForeignTruncateInfo;
+
+/*
+ * Partition tables are expected to be dropped when the parent partitioned
+ * table gets dropped. Hence for partitioning we use AUTO dependency.
+ * Otherwise, for regular inheritance use NORMAL dependency.
+ */
+#define child_dependency_type(child_is_partition)	\
+	((child_is_partition) ? DEPENDENCY_AUTO : DEPENDENCY_NORMAL)
+
+static void truncate_check_rel(Oid relid, Form_pg_class reltuple);
+static void truncate_check_perms(Oid relid, Form_pg_class reltuple);
+static void truncate_check_activity(Relation rel);
+static void RangeVarCallbackForTruncate(const RangeVar *relation,
+										Oid relId, Oid oldRelId, void *arg);
+static List *MergeAttributes(List *schema, List *supers, char relpersistence,
+							 bool is_partition, List **supconstr);
+static bool MergeCheckConstraint(List *constraints, char *name, Node *expr);
+static void MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel);
+static void MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel);
+static void StoreCatalogInheritance(Oid relationId, List *supers,
+									bool child_is_partition);
+static void StoreCatalogInheritance1(Oid relationId, Oid parentOid,
+									 int32 seqNumber, Relation inhRelation,
+									 bool child_is_partition);
+static int	findAttrByName(const char *attributeName, List *schema);
+static void AlterIndexNamespaces(Relation classRel, Relation rel,
+								 Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved);
+static void AlterSeqNamespaces(Relation classRel, Relation rel,
+							   Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved,
+							   LOCKMODE lockmode);
+static ObjectAddress ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd,
+										   bool recurse, bool recursing, LOCKMODE lockmode);
+static bool ATExecAlterConstrRecurse(Constraint *cmdcon, Relation conrel, Relation tgrel,
+									 Relation rel, HeapTuple contuple, List **otherrelids,
+									 LOCKMODE lockmode);
+static ObjectAddress ATExecValidateConstraint(List **wqueue,
+											  Relation rel, char *constrName,
+											  bool recurse, bool recursing, LOCKMODE lockmode);
+static int	transformColumnNameList(Oid relId, List *colList,
+									int16 *attnums, Oid *atttypids);
+static int	transformFkeyGetPrimaryKey(Relation pkrel, Oid *indexOid,
+									   List **attnamelist,
+									   int16 *attnums, Oid *atttypids,
+									   Oid *opclasses);
+static Oid	transformFkeyCheckAttrs(Relation pkrel,
+									int numattrs, int16 *attnums,
+									Oid *opclasses);
+static void checkFkeyPermissions(Relation rel, int16 *attnums, int natts);
+static CoercionPathType findFkeyCast(Oid targetTypeId, Oid sourceTypeId,
+									 Oid *funcid);
+static void validateForeignKeyConstraint(char *conname,
+										 Relation rel, Relation pkrel,
+										 Oid pkindOid, Oid constraintOid);
+static void ATController(AlterTableStmt *parsetree,
+						 Relation rel, List *cmds, bool recurse, LOCKMODE lockmode,
+						 AlterTableUtilityContext *context);
+static void ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
+					  bool recurse, bool recursing, LOCKMODE lockmode,
+					  AlterTableUtilityContext *context);
+static void ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode,
+							  AlterTableUtilityContext *context);
+static void ATExecCmd(List **wqueue, AlteredTableInfo *tab,
+					  AlterTableCmd *cmd, LOCKMODE lockmode, int cur_pass,
+					  AlterTableUtilityContext *context);
+static AlterTableCmd *ATParseTransformCmd(List **wqueue, AlteredTableInfo *tab,
+										  Relation rel, AlterTableCmd *cmd,
+										  bool recurse, LOCKMODE lockmode,
+										  int cur_pass,
+										  AlterTableUtilityContext *context);
+static void ATRewriteTables(AlterTableStmt *parsetree,
+							List **wqueue, LOCKMODE lockmode,
+							AlterTableUtilityContext *context);
+static void ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode);
+static AlteredTableInfo *ATGetQueueEntry(List **wqueue, Relation rel);
+static void ATSimplePermissions(AlterTableType cmdtype, Relation rel, int allowed_targets);
+static void ATSimpleRecursion(List **wqueue, Relation rel,
+							  AlterTableCmd *cmd, bool recurse, LOCKMODE lockmode,
+							  AlterTableUtilityContext *context);
+static void ATCheckPartitionsNotInUse(Relation rel, LOCKMODE lockmode);
+static void ATTypedTableRecursion(List **wqueue, Relation rel, AlterTableCmd *cmd,
+								  LOCKMODE lockmode,
+								  AlterTableUtilityContext *context);
+static List *find_typed_table_dependencies(Oid typeOid, const char *typeName,
+										   DropBehavior behavior);
+static void ATPrepAddColumn(List **wqueue, Relation rel, bool recurse, bool recursing,
+							bool is_view, AlterTableCmd *cmd, LOCKMODE lockmode,
+							AlterTableUtilityContext *context);
+static ObjectAddress ATExecAddColumn(List **wqueue, AlteredTableInfo *tab,
+									 Relation rel, AlterTableCmd **cmd,
+									 bool recurse, bool recursing,
+									 LOCKMODE lockmode, int cur_pass,
+									 AlterTableUtilityContext *context);
+static bool check_for_column_name_collision(Relation rel, const char *colname,
+											bool if_not_exists);
+static void add_column_datatype_dependency(Oid relid, int32 attnum, Oid typid);
+static void add_column_collation_dependency(Oid relid, int32 attnum, Oid collid);
+static void ATPrepDropNotNull(Relation rel, bool recurse, bool recursing);
+static ObjectAddress ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode);
+static void ATPrepSetNotNull(List **wqueue, Relation rel,
+							 AlterTableCmd *cmd, bool recurse, bool recursing,
+							 LOCKMODE lockmode,
+							 AlterTableUtilityContext *context);
+static ObjectAddress ATExecSetNotNull(AlteredTableInfo *tab, Relation rel,
+									  const char *colName, LOCKMODE lockmode);
+static void ATExecCheckNotNull(AlteredTableInfo *tab, Relation rel,
+							   const char *colName, LOCKMODE lockmode);
+static bool NotNullImpliedByRelConstraints(Relation rel, Form_pg_attribute attr);
+static bool ConstraintImpliedByRelConstraint(Relation scanrel,
+											 List *testConstraint, List *provenConstraint);
+static ObjectAddress ATExecColumnDefault(Relation rel, const char *colName,
+										 Node *newDefault, LOCKMODE lockmode);
+static ObjectAddress ATExecCookedColumnDefault(Relation rel, AttrNumber attnum,
+											   Node *newDefault);
+static ObjectAddress ATExecAddIdentity(Relation rel, const char *colName,
+									   Node *def, LOCKMODE lockmode);
+static ObjectAddress ATExecSetIdentity(Relation rel, const char *colName,
+									   Node *def, LOCKMODE lockmode);
+static ObjectAddress ATExecDropIdentity(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode);
+static void ATPrepDropExpression(Relation rel, AlterTableCmd *cmd, bool recurse, bool recursing, LOCKMODE lockmode);
+static ObjectAddress ATExecDropExpression(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode);
+static ObjectAddress ATExecSetStatistics(Relation rel, const char *colName, int16 colNum,
+										 Node *newValue, LOCKMODE lockmode);
+static ObjectAddress ATExecSetOptions(Relation rel, const char *colName,
+									  Node *options, bool isReset, LOCKMODE lockmode);
+static ObjectAddress ATExecSetStorage(Relation rel, const char *colName,
+									  Node *newValue, LOCKMODE lockmode);
+static void ATPrepDropColumn(List **wqueue, Relation rel, bool recurse, bool recursing,
+							 AlterTableCmd *cmd, LOCKMODE lockmode,
+							 AlterTableUtilityContext *context);
+static ObjectAddress ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
+									  DropBehavior behavior,
+									  bool recurse, bool recursing,
+									  bool missing_ok, LOCKMODE lockmode,
+									  ObjectAddresses *addrs);
+static ObjectAddress ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
+									IndexStmt *stmt, bool is_rebuild, LOCKMODE lockmode);
+static ObjectAddress ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
+										 CreateStatsStmt *stmt, bool is_rebuild, LOCKMODE lockmode);
+static ObjectAddress ATExecAddConstraint(List **wqueue,
+										 AlteredTableInfo *tab, Relation rel,
+										 Constraint *newConstraint, bool recurse, bool is_readd,
+										 LOCKMODE lockmode);
+static char *ChooseForeignKeyConstraintNameAddition(List *colnames);
+static ObjectAddress ATExecAddIndexConstraint(AlteredTableInfo *tab, Relation rel,
+											  IndexStmt *stmt, LOCKMODE lockmode);
+static ObjectAddress ATAddCheckConstraint(List **wqueue,
+										  AlteredTableInfo *tab, Relation rel,
+										  Constraint *constr,
+										  bool recurse, bool recursing, bool is_readd,
+										  LOCKMODE lockmode);
+static ObjectAddress ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab,
+											   Relation rel, Constraint *fkconstraint,
+											   bool recurse, bool recursing,
+											   LOCKMODE lockmode);
+static ObjectAddress addFkRecurseReferenced(List **wqueue, Constraint *fkconstraint,
+											Relation rel, Relation pkrel, Oid indexOid, Oid parentConstr,
+											int numfks, int16 *pkattnum, int16 *fkattnum,
+											Oid *pfeqoperators, Oid *ppeqoperators, Oid *ffeqoperators,
+											int numfkdelsetcols, int16 *fkdelsetcols,
+											bool old_check_ok,
+											Oid parentDelTrigger, Oid parentUpdTrigger);
+static void validateFkOnDeleteSetColumns(int numfks, const int16 *fkattnums,
+										 int numfksetcols, const int16 *fksetcolsattnums,
+										 List *fksetcols);
+static void addFkRecurseReferencing(List **wqueue, Constraint *fkconstraint,
+									Relation rel, Relation pkrel, Oid indexOid, Oid parentConstr,
+									int numfks, int16 *pkattnum, int16 *fkattnum,
+									Oid *pfeqoperators, Oid *ppeqoperators, Oid *ffeqoperators,
+									int numfkdelsetcols, int16 *fkdelsetcols,
+									bool old_check_ok, LOCKMODE lockmode,
+									Oid parentInsTrigger, Oid parentUpdTrigger);
+static void CloneForeignKeyConstraints(List **wqueue, Relation parentRel,
+									   Relation partitionRel);
+static void CloneFkReferenced(Relation parentRel, Relation partitionRel);
+static void CloneFkReferencing(List **wqueue, Relation parentRel,
+							   Relation partRel);
+static void createForeignKeyCheckTriggers(Oid myRelOid, Oid refRelOid,
+										  Constraint *fkconstraint, Oid constraintOid,
+										  Oid indexOid,
+										  Oid parentInsTrigger, Oid parentUpdTrigger,
+										  Oid *insertTrigOid, Oid *updateTrigOid);
+static void createForeignKeyActionTriggers(Relation rel, Oid refRelOid,
+										   Constraint *fkconstraint, Oid constraintOid,
+										   Oid indexOid,
+										   Oid parentDelTrigger, Oid parentUpdTrigger,
+										   Oid *deleteTrigOid, Oid *updateTrigOid);
+static bool tryAttachPartitionForeignKey(ForeignKeyCacheInfo *fk,
+										 Oid partRelid,
+										 Oid parentConstrOid, int numfks,
+										 AttrNumber *mapped_conkey, AttrNumber *confkey,
+										 Oid *conpfeqop,
+										 Oid parentInsTrigger,
+										 Oid parentUpdTrigger,
+										 Relation trigrel);
+static void GetForeignKeyActionTriggers(Relation trigrel,
+										Oid conoid, Oid confrelid, Oid conrelid,
+										Oid *deleteTriggerOid,
+										Oid *updateTriggerOid);
+static void GetForeignKeyCheckTriggers(Relation trigrel,
+									   Oid conoid, Oid confrelid, Oid conrelid,
+									   Oid *insertTriggerOid,
+									   Oid *updateTriggerOid);
+static void ATExecDropConstraint(Relation rel, const char *constrName,
+								 DropBehavior behavior,
+								 bool recurse, bool recursing,
+								 bool missing_ok, LOCKMODE lockmode);
+static void ATPrepAlterColumnType(List **wqueue,
+								  AlteredTableInfo *tab, Relation rel,
+								  bool recurse, bool recursing,
+								  AlterTableCmd *cmd, LOCKMODE lockmode,
+								  AlterTableUtilityContext *context);
+static bool ATColumnChangeRequiresRewrite(Node *expr, AttrNumber varattno);
+static ObjectAddress ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel,
+										   AlterTableCmd *cmd, LOCKMODE lockmode);
+static void RememberConstraintForRebuilding(Oid conoid, AlteredTableInfo *tab);
+static void RememberIndexForRebuilding(Oid indoid, AlteredTableInfo *tab);
+static void RememberStatisticsForRebuilding(Oid indoid, AlteredTableInfo *tab);
+static void ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab,
+								   LOCKMODE lockmode);
+static void ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId,
+								 char *cmd, List **wqueue, LOCKMODE lockmode,
+								 bool rewrite);
+static void RebuildConstraintComment(AlteredTableInfo *tab, int pass,
+									 Oid objid, Relation rel, List *domname,
+									 const char *conname);
+static void TryReuseIndex(Oid oldId, IndexStmt *stmt);
+static void TryReuseForeignKey(Oid oldId, Constraint *con);
+static ObjectAddress ATExecAlterColumnGenericOptions(Relation rel, const char *colName,
+													 List *options, LOCKMODE lockmode);
+static void change_owner_fix_column_acls(Oid relationOid,
+										 Oid oldOwnerId, Oid newOwnerId);
+static void change_owner_recurse_to_sequences(Oid relationOid,
+											  Oid newOwnerId, LOCKMODE lockmode);
+static ObjectAddress ATExecClusterOn(Relation rel, const char *indexName,
+									 LOCKMODE lockmode);
+static void ATExecDropCluster(Relation rel, LOCKMODE lockmode);
+static void ATPrepSetAccessMethod(AlteredTableInfo *tab, Relation rel, const char *amname);
+static bool ATPrepChangePersistence(Relation rel, bool toLogged);
+static void ATPrepSetTableSpace(AlteredTableInfo *tab, Relation rel,
+								const char *tablespacename, LOCKMODE lockmode);
+static void ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode);
+static void ATExecSetTableSpaceNoStorage(Relation rel, Oid newTableSpace);
+static void ATExecSetRelOptions(Relation rel, List *defList,
+								AlterTableType operation,
+								LOCKMODE lockmode);
+static void ATExecEnableDisableTrigger(Relation rel, const char *trigname,
+									   char fires_when, bool skip_system, bool recurse,
+									   LOCKMODE lockmode);
+static void ATExecEnableDisableRule(Relation rel, const char *rulename,
+									char fires_when, LOCKMODE lockmode);
+static void ATPrepAddInherit(Relation child_rel);
+static ObjectAddress ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode);
+static ObjectAddress ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode);
+static void drop_parent_dependency(Oid relid, Oid refclassid, Oid refobjid,
+								   DependencyType deptype);
+static ObjectAddress ATExecAddOf(Relation rel, const TypeName *ofTypename, LOCKMODE lockmode);
+static void ATExecDropOf(Relation rel, LOCKMODE lockmode);
+static void ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKMODE lockmode);
+static void ATExecGenericOptions(Relation rel, List *options);
+static void ATExecSetRowSecurity(Relation rel, bool rls);
+static void ATExecForceNoForceRowSecurity(Relation rel, bool force_rls);
+static ObjectAddress ATExecSetCompression(AlteredTableInfo *tab, Relation rel,
+										  const char *column, Node *newValue, LOCKMODE lockmode);
+
+static void index_copy_data(Relation rel, RelFileNode newrnode);
+static const char *storage_name(char c);
+
+static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid,
+											Oid oldRelOid, void *arg);
+static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid,
+											 Oid oldrelid, void *arg);
+static PartitionSpec *transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy);
+static void ComputePartitionAttrs(ParseState *pstate, Relation rel, List *partParams, AttrNumber *partattrs,
+								  List **partexprs, Oid *partopclass, Oid *partcollation, char strategy);
+static void CreateInheritance(Relation child_rel, Relation parent_rel);
+static void RemoveInheritance(Relation child_rel, Relation parent_rel,
+							  bool allow_detached);
+static ObjectAddress ATExecAttachPartition(List **wqueue, Relation rel,
+										   PartitionCmd *cmd,
+										   AlterTableUtilityContext *context);
+static void AttachPartitionEnsureIndexes(Relation rel, Relation attachrel);
+static void QueuePartitionConstraintValidation(List **wqueue, Relation scanrel,
+											   List *partConstraint,
+											   bool validate_default);
+static void CloneRowTriggersToPartition(Relation parent, Relation partition);
+static void DetachAddConstraintIfNeeded(List **wqueue, Relation partRel);
+static void DropClonedTriggersFromPartition(Oid partitionId);
+static ObjectAddress ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab,
+										   Relation rel, RangeVar *name,
+										   bool concurrent);
+static void DetachPartitionFinalize(Relation rel, Relation partRel,
+									bool concurrent, Oid defaultPartOid);
+static ObjectAddress ATExecDetachPartitionFinalize(Relation rel, RangeVar *name);
+static ObjectAddress ATExecAttachPartitionIdx(List **wqueue, Relation rel,
+											  RangeVar *name);
+static void validatePartitionedIndex(Relation partedIdx, Relation partedTbl);
+static void refuseDupeIndexAttach(Relation parentIdx, Relation partIdx,
+								  Relation partitionTbl);
+static List *GetParentedForeignKeyRefs(Relation partition);
+static void ATDetachCheckNoForeignKeyRefs(Relation partition);
+static char GetAttributeCompression(Oid atttypid, char *compression);
+
+
+/* ----------------------------------------------------------------
+ *		DefineRelation
+ *				Creates a new relation.
+ *
+ * stmt carries parsetree information from an ordinary CREATE TABLE statement.
+ * The other arguments are used to extend the behavior for other cases:
+ * relkind: relkind to assign to the new relation
+ * ownerId: if not InvalidOid, use this as the new relation's owner.
+ * typaddress: if not null, it's set to the pg_type entry's address.
+ * queryString: for error reporting
+ *
+ * Note that permissions checks are done against current user regardless of
+ * ownerId.  A nonzero ownerId is used when someone is creating a relation
+ * "on behalf of" someone else, so we still want to see that the current user
+ * has permissions to do it.
+ *
+ * If successful, returns the address of the new relation.
+ * ----------------------------------------------------------------
+ */
+ObjectAddress
+DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
+			   ObjectAddress *typaddress, const char *queryString)
+{
+	char		relname[NAMEDATALEN];
+	Oid			namespaceId;
+	Oid			relationId;
+	Oid			tablespaceId;
+	Relation	rel;
+	TupleDesc	descriptor;
+	List	   *inheritOids;
+	List	   *old_constraints;
+	List	   *rawDefaults;
+	List	   *cookedDefaults;
+	Datum		reloptions;
+	ListCell   *listptr;
+	AttrNumber	attnum;
+	bool		partitioned;
+	static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
+	Oid			ofTypeId;
+	ObjectAddress address;
+	LOCKMODE	parentLockmode;
+	const char *accessMethod = NULL;
+	Oid			accessMethodId = InvalidOid;
+
+	/*
+	 * Truncate relname to appropriate length (probably a waste of time, as
+	 * parser should have done this already).
+	 */
+	strlcpy(relname, stmt->relation->relname, NAMEDATALEN);
+
+	/*
+	 * Check consistency of arguments
+	 */
+	if (stmt->oncommit != ONCOMMIT_NOOP
+		&& stmt->relation->relpersistence != RELPERSISTENCE_TEMP)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("ON COMMIT can only be used on temporary tables")));
+
+	if (stmt->partspec != NULL)
+	{
+		if (relkind != RELKIND_RELATION)
+			elog(ERROR, "unexpected relkind: %d", (int) relkind);
+
+		relkind = RELKIND_PARTITIONED_TABLE;
+		partitioned = true;
+	}
+	else
+		partitioned = false;
+
+	/*
+	 * Look up the namespace in which we are supposed to create the relation,
+	 * check we have permission to create there, lock it against concurrent
+	 * drop, and mark stmt->relation as RELPERSISTENCE_TEMP if a temporary
+	 * namespace is selected.
+	 */
+	namespaceId =
+		RangeVarGetAndCheckCreationNamespace(stmt->relation, NoLock, NULL);
+
+	/*
+	 * Security check: disallow creating temp tables from security-restricted
+	 * code.  This is needed because calling code might not expect untrusted
+	 * tables to appear in pg_temp at the front of its search path.
+	 */
+	if (stmt->relation->relpersistence == RELPERSISTENCE_TEMP
+		&& InSecurityRestrictedOperation())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("cannot create temporary table within security-restricted operation")));
+
+	/*
+	 * Determine the lockmode to use when scanning parents.  A self-exclusive
+	 * lock is needed here.
+	 *
+	 * For regular inheritance, if two backends attempt to add children to the
+	 * same parent simultaneously, and that parent has no pre-existing
+	 * children, then both will attempt to update the parent's relhassubclass
+	 * field, leading to a "tuple concurrently updated" error.  Also, this
+	 * interlocks against a concurrent ANALYZE on the parent table, which
+	 * might otherwise be attempting to clear the parent's relhassubclass
+	 * field, if its previous children were recently dropped.
+	 *
+	 * If the child table is a partition, then we instead grab an exclusive
+	 * lock on the parent because its partition descriptor will be changed by
+	 * addition of the new partition.
+	 */
+	parentLockmode = (stmt->partbound != NULL ? AccessExclusiveLock :
+					  ShareUpdateExclusiveLock);
+
+	/* Determine the list of OIDs of the parents. */
+	inheritOids = NIL;
+	foreach(listptr, stmt->inhRelations)
+	{
+		RangeVar   *rv = (RangeVar *) lfirst(listptr);
+		Oid			parentOid;
+
+		parentOid = RangeVarGetRelid(rv, parentLockmode, false);
+
+		/*
+		 * Reject duplications in the list of parents.
+		 */
+		if (list_member_oid(inheritOids, parentOid))
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_TABLE),
+					 errmsg("relation \"%s\" would be inherited from more than once",
+							get_rel_name(parentOid))));
+
+		inheritOids = lappend_oid(inheritOids, parentOid);
+	}
+
+	/*
+	 * Select tablespace to use: an explicitly indicated one, or (in the case
+	 * of a partitioned table) the parent's, if it has one.
+	 */
+	if (stmt->tablespacename)
+	{
+		tablespaceId = get_tablespace_oid(stmt->tablespacename, false);
+
+		if (partitioned && tablespaceId == MyDatabaseTableSpace)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot specify default tablespace for partitioned relations")));
+	}
+	else if (stmt->partbound)
+	{
+		/*
+		 * For partitions, when no other tablespace is specified, we default
+		 * the tablespace to the parent partitioned table's.
+		 */
+		Assert(list_length(inheritOids) == 1);
+		tablespaceId = get_rel_tablespace(linitial_oid(inheritOids));
+	}
+	else
+		tablespaceId = InvalidOid;
+
+	/* still nothing? use the default */
+	if (!OidIsValid(tablespaceId))
+		tablespaceId = GetDefaultTablespace(stmt->relation->relpersistence,
+											partitioned);
+
+	/* Check permissions except when using database's default */
+	if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
+	{
+		AclResult	aclresult;
+
+		aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
+										   ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_TABLESPACE,
+						   get_tablespace_name(tablespaceId));
+	}
+
+	/* In all cases disallow placing user relations in pg_global */
+	if (tablespaceId == GLOBALTABLESPACE_OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("only shared relations can be placed in pg_global tablespace")));
+
+	/* Identify user ID that will own the table */
+	if (!OidIsValid(ownerId))
+		ownerId = GetUserId();
+
+	/*
+	 * Parse and validate reloptions, if any.
+	 */
+	reloptions = transformRelOptions((Datum) 0, stmt->options, NULL, validnsps,
+									 true, false);
+
+	switch (relkind)
+	{
+		case RELKIND_VIEW:
+			(void) view_reloptions(reloptions, true);
+			break;
+		case RELKIND_PARTITIONED_TABLE:
+			(void) partitioned_table_reloptions(reloptions, true);
+			break;
+		default:
+			(void) heap_reloptions(relkind, reloptions, true);
+	}
+
+	if (stmt->ofTypename)
+	{
+		AclResult	aclresult;
+
+		ofTypeId = typenameTypeId(NULL, stmt->ofTypename);
+
+		aclresult = pg_type_aclcheck(ofTypeId, GetUserId(), ACL_USAGE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error_type(aclresult, ofTypeId);
+	}
+	else
+		ofTypeId = InvalidOid;
+
+	/*
+	 * Look up inheritance ancestors and generate relation schema, including
+	 * inherited attributes.  (Note that stmt->tableElts is destructively
+	 * modified by MergeAttributes.)
+	 */
+	stmt->tableElts =
+		MergeAttributes(stmt->tableElts, inheritOids,
+						stmt->relation->relpersistence,
+						stmt->partbound != NULL,
+						&old_constraints);
+
+	/*
+	 * Create a tuple descriptor from the relation schema.  Note that this
+	 * deals with column names, types, and NOT NULL constraints, but not
+	 * default values or CHECK constraints; we handle those below.
+	 */
+	descriptor = BuildDescForRelation(stmt->tableElts);
+
+	/*
+	 * Find columns with default values and prepare for insertion of the
+	 * defaults.  Pre-cooked (that is, inherited) defaults go into a list of
+	 * CookedConstraint structs that we'll pass to heap_create_with_catalog,
+	 * while raw defaults go into a list of RawColumnDefault structs that will
+	 * be processed by AddRelationNewConstraints.  (We can't deal with raw
+	 * expressions until we can do transformExpr.)
+	 *
+	 * We can set the atthasdef flags now in the tuple descriptor; this just
+	 * saves StoreAttrDefault from having to do an immediate update of the
+	 * pg_attribute rows.
+	 */
+	rawDefaults = NIL;
+	cookedDefaults = NIL;
+	attnum = 0;
+
+	foreach(listptr, stmt->tableElts)
+	{
+		ColumnDef  *colDef = lfirst(listptr);
+		Form_pg_attribute attr;
+
+		attnum++;
+		attr = TupleDescAttr(descriptor, attnum - 1);
+
+		if (colDef->raw_default != NULL)
+		{
+			RawColumnDefault *rawEnt;
+
+			Assert(colDef->cooked_default == NULL);
+
+			rawEnt = (RawColumnDefault *) palloc(sizeof(RawColumnDefault));
+			rawEnt->attnum = attnum;
+			rawEnt->raw_default = colDef->raw_default;
+			rawEnt->missingMode = false;
+			rawEnt->generated = colDef->generated;
+			rawDefaults = lappend(rawDefaults, rawEnt);
+			attr->atthasdef = true;
+		}
+		else if (colDef->cooked_default != NULL)
+		{
+			CookedConstraint *cooked;
+
+			cooked = (CookedConstraint *) palloc(sizeof(CookedConstraint));
+			cooked->contype = CONSTR_DEFAULT;
+			cooked->conoid = InvalidOid;	/* until created */
+			cooked->name = NULL;
+			cooked->attnum = attnum;
+			cooked->expr = colDef->cooked_default;
+			cooked->skip_validation = false;
+			cooked->is_local = true;	/* not used for defaults */
+			cooked->inhcount = 0;	/* ditto */
+			cooked->is_no_inherit = false;
+			cookedDefaults = lappend(cookedDefaults, cooked);
+			attr->atthasdef = true;
+		}
+
+		if (colDef->identity)
+			attr->attidentity = colDef->identity;
+
+		if (colDef->generated)
+			attr->attgenerated = colDef->generated;
+
+		if (colDef->compression)
+			attr->attcompression = GetAttributeCompression(attr->atttypid,
+														   colDef->compression);
+	}
+
+	/*
+	 * If the statement hasn't specified an access method, but we're defining
+	 * a type of relation that needs one, use the default.
+	 */
+	if (stmt->accessMethod != NULL)
+	{
+		accessMethod = stmt->accessMethod;
+
+		if (partitioned)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("specifying a table access method is not supported on a partitioned table")));
+	}
+	else if (RELKIND_HAS_TABLE_AM(relkind))
+		accessMethod = default_table_access_method;
+
+	/* look up the access method, verify it is for a table */
+	if (accessMethod != NULL)
+		accessMethodId = get_table_am_oid(accessMethod, false);
+
+	/*
+	 * Create the relation.  Inherited defaults and constraints are passed in
+	 * for immediate handling --- since they don't need parsing, they can be
+	 * stored immediately.
+	 */
+	relationId = heap_create_with_catalog(relname,
+										  namespaceId,
+										  tablespaceId,
+										  InvalidOid,
+										  InvalidOid,
+										  ofTypeId,
+										  ownerId,
+										  accessMethodId,
+										  descriptor,
+										  list_concat(cookedDefaults,
+													  old_constraints),
+										  relkind,
+										  stmt->relation->relpersistence,
+										  false,
+										  false,
+										  stmt->oncommit,
+										  reloptions,
+										  true,
+										  allowSystemTableMods,
+										  false,
+										  InvalidOid,
+										  typaddress);
+
+	/*
+	 * We must bump the command counter to make the newly-created relation
+	 * tuple visible for opening.
+	 */
+	CommandCounterIncrement();
+
+	/*
+	 * Open the new relation and acquire exclusive lock on it.  This isn't
+	 * really necessary for locking out other backends (since they can't see
+	 * the new rel anyway until we commit), but it keeps the lock manager from
+	 * complaining about deadlock risks.
+	 */
+	rel = relation_open(relationId, AccessExclusiveLock);
+
+	/*
+	 * Now add any newly specified column default and generation expressions
+	 * to the new relation.  These are passed to us in the form of raw
+	 * parsetrees; we need to transform them to executable expression trees
+	 * before they can be added. The most convenient way to do that is to
+	 * apply the parser's transformExpr routine, but transformExpr doesn't
+	 * work unless we have a pre-existing relation. So, the transformation has
+	 * to be postponed to this final step of CREATE TABLE.
+	 *
+	 * This needs to be before processing the partitioning clauses because
+	 * those could refer to generated columns.
+	 */
+	if (rawDefaults)
+		AddRelationNewConstraints(rel, rawDefaults, NIL,
+								  true, true, false, queryString);
+
+	/*
+	 * Make column generation expressions visible for use by partitioning.
+	 */
+	CommandCounterIncrement();
+
+	/* Process and store partition bound, if any. */
+	if (stmt->partbound)
+	{
+		PartitionBoundSpec *bound;
+		ParseState *pstate;
+		Oid			parentId = linitial_oid(inheritOids),
+					defaultPartOid;
+		Relation	parent,
+					defaultRel = NULL;
+		ParseNamespaceItem *nsitem;
+
+		/* Already have strong enough lock on the parent */
+		parent = table_open(parentId, NoLock);
+
+		/*
+		 * We are going to try to validate the partition bound specification
+		 * against the partition key of parentRel, so it better have one.
+		 */
+		if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("\"%s\" is not partitioned",
+							RelationGetRelationName(parent))));
+
+		/*
+		 * The partition constraint of the default partition depends on the
+		 * partition bounds of every other partition. It is possible that
+		 * another backend might be about to execute a query on the default
+		 * partition table, and that the query relies on previously cached
+		 * default partition constraints. We must therefore take a table lock
+		 * strong enough to prevent all queries on the default partition from
+		 * proceeding until we commit and send out a shared-cache-inval notice
+		 * that will make them update their index lists.
+		 *
+		 * Order of locking: The relation being added won't be visible to
+		 * other backends until it is committed, hence here in
+		 * DefineRelation() the order of locking the default partition and the
+		 * relation being added does not matter. But at all other places we
+		 * need to lock the default relation before we lock the relation being
+		 * added or removed i.e. we should take the lock in same order at all
+		 * the places such that lock parent, lock default partition and then
+		 * lock the partition so as to avoid a deadlock.
+		 */
+		defaultPartOid =
+			get_default_oid_from_partdesc(RelationGetPartitionDesc(parent,
+																   true));
+		if (OidIsValid(defaultPartOid))
+			defaultRel = table_open(defaultPartOid, AccessExclusiveLock);
+
+		/* Transform the bound values */
+		pstate = make_parsestate(NULL);
+		pstate->p_sourcetext = queryString;
+
+		/*
+		 * Add an nsitem containing this relation, so that transformExpr
+		 * called on partition bound expressions is able to report errors
+		 * using a proper context.
+		 */
+		nsitem = addRangeTableEntryForRelation(pstate, rel, AccessShareLock,
+											   NULL, false, false);
+		addNSItemToQuery(pstate, nsitem, false, true, true);
+
+		bound = transformPartitionBound(pstate, parent, stmt->partbound);
+
+		/*
+		 * Check first that the new partition's bound is valid and does not
+		 * overlap with any of existing partitions of the parent.
+		 */
+		check_new_partition_bound(relname, parent, bound, pstate);
+
+		/*
+		 * If the default partition exists, its partition constraints will
+		 * change after the addition of this new partition such that it won't
+		 * allow any row that qualifies for this new partition. So, check that
+		 * the existing data in the default partition satisfies the constraint
+		 * as it will exist after adding this partition.
+		 */
+		if (OidIsValid(defaultPartOid))
+		{
+			check_default_partition_contents(parent, defaultRel, bound);
+			/* Keep the lock until commit. */
+			table_close(defaultRel, NoLock);
+		}
+
+		/* Update the pg_class entry. */
+		StorePartitionBound(rel, parent, bound);
+
+		table_close(parent, NoLock);
+	}
+
+	/* Store inheritance information for new rel. */
+	StoreCatalogInheritance(relationId, inheritOids, stmt->partbound != NULL);
+
+	/*
+	 * Process the partitioning specification (if any) and store the partition
+	 * key information into the catalog.
+	 */
+	if (partitioned)
+	{
+		ParseState *pstate;
+		char		strategy;
+		int			partnatts;
+		AttrNumber	partattrs[PARTITION_MAX_KEYS];
+		Oid			partopclass[PARTITION_MAX_KEYS];
+		Oid			partcollation[PARTITION_MAX_KEYS];
+		List	   *partexprs = NIL;
+
+		pstate = make_parsestate(NULL);
+		pstate->p_sourcetext = queryString;
+
+		partnatts = list_length(stmt->partspec->partParams);
+
+		/* Protect fixed-size arrays here and in executor */
+		if (partnatts > PARTITION_MAX_KEYS)
+			ereport(ERROR,
+					(errcode(ERRCODE_TOO_MANY_COLUMNS),
+					 errmsg("cannot partition using more than %d columns",
+							PARTITION_MAX_KEYS)));
+
+		/*
+		 * We need to transform the raw parsetrees corresponding to partition
+		 * expressions into executable expression trees.  Like column defaults
+		 * and CHECK constraints, we could not have done the transformation
+		 * earlier.
+		 */
+		stmt->partspec = transformPartitionSpec(rel, stmt->partspec,
+												&strategy);
+
+		ComputePartitionAttrs(pstate, rel, stmt->partspec->partParams,
+							  partattrs, &partexprs, partopclass,
+							  partcollation, strategy);
+
+		StorePartitionKey(rel, strategy, partnatts, partattrs, partexprs,
+						  partopclass, partcollation);
+
+		/* make it all visible */
+		CommandCounterIncrement();
+	}
+
+	/*
+	 * If we're creating a partition, create now all the indexes, triggers,
+	 * FKs defined in the parent.
+	 *
+	 * We can't do it earlier, because DefineIndex wants to know the partition
+	 * key which we just stored.
+	 */
+	if (stmt->partbound)
+	{
+		Oid			parentId = linitial_oid(inheritOids);
+		Relation	parent;
+		List	   *idxlist;
+		ListCell   *cell;
+
+		/* Already have strong enough lock on the parent */
+		parent = table_open(parentId, NoLock);
+		idxlist = RelationGetIndexList(parent);
+
+		/*
+		 * For each index in the parent table, create one in the partition
+		 */
+		foreach(cell, idxlist)
+		{
+			Relation	idxRel = index_open(lfirst_oid(cell), AccessShareLock);
+			AttrMap    *attmap;
+			IndexStmt  *idxstmt;
+			Oid			constraintOid;
+
+			if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+			{
+				if (idxRel->rd_index->indisunique)
+					ereport(ERROR,
+							(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+							 errmsg("cannot create foreign partition of partitioned table \"%s\"",
+									RelationGetRelationName(parent)),
+							 errdetail("Table \"%s\" contains indexes that are unique.",
+									   RelationGetRelationName(parent))));
+				else
+				{
+					index_close(idxRel, AccessShareLock);
+					continue;
+				}
+			}
+
+			attmap = build_attrmap_by_name(RelationGetDescr(rel),
+										   RelationGetDescr(parent));
+			idxstmt =
+				generateClonedIndexStmt(NULL, idxRel,
+										attmap, &constraintOid);
+			DefineIndex(RelationGetRelid(rel),
+						idxstmt,
+						InvalidOid,
+						RelationGetRelid(idxRel),
+						constraintOid,
+						false, false, false, false, false);
+
+			index_close(idxRel, AccessShareLock);
+		}
+
+		list_free(idxlist);
+
+		/*
+		 * If there are any row-level triggers, clone them to the new
+		 * partition.
+		 */
+		if (parent->trigdesc != NULL)
+			CloneRowTriggersToPartition(parent, rel);
+
+		/*
+		 * And foreign keys too.  Note that because we're freshly creating the
+		 * table, there is no need to verify these new constraints.
+		 */
+		CloneForeignKeyConstraints(NULL, parent, rel);
+
+		table_close(parent, NoLock);
+	}
+
+	/*
+	 * Now add any newly specified CHECK constraints to the new relation. Same
+	 * as for defaults above, but these need to come after partitioning is set
+	 * up.
+	 */
+	if (stmt->constraints)
+		AddRelationNewConstraints(rel, NIL, stmt->constraints,
+								  true, true, false, queryString);
+
+	ObjectAddressSet(address, RelationRelationId, relationId);
+
+	/*
+	 * Clean up.  We keep lock on new relation (although it shouldn't be
+	 * visible to anyone else anyway, until commit).
+	 */
+	relation_close(rel, NoLock);
+
+	return address;
+}
+
+/*
+ * Emit the right error or warning message for a "DROP" command issued on a
+ * non-existent relation
+ */
+static void
+DropErrorMsgNonExistent(RangeVar *rel, char rightkind, bool missing_ok)
+{
+	const struct dropmsgstrings *rentry;
+
+	if (rel->schemaname != NULL &&
+		!OidIsValid(LookupNamespaceNoError(rel->schemaname)))
+	{
+		if (!missing_ok)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_SCHEMA),
+					 errmsg("schema \"%s\" does not exist", rel->schemaname)));
+		}
+		else
+		{
+			ereport(NOTICE,
+					(errmsg("schema \"%s\" does not exist, skipping",
+							rel->schemaname)));
+		}
+		return;
+	}
+
+	for (rentry = dropmsgstringarray; rentry->kind != '\0'; rentry++)
+	{
+		if (rentry->kind == rightkind)
+		{
+			if (!missing_ok)
+			{
+				ereport(ERROR,
+						(errcode(rentry->nonexistent_code),
+						 errmsg(rentry->nonexistent_msg, rel->relname)));
+			}
+			else
+			{
+				ereport(NOTICE, (errmsg(rentry->skipping_msg, rel->relname)));
+				break;
+			}
+		}
+	}
+
+	Assert(rentry->kind != '\0');	/* Should be impossible */
+}
+
+/*
+ * Emit the right error message for a "DROP" command issued on a
+ * relation of the wrong type
+ */
+static void
+DropErrorMsgWrongType(const char *relname, char wrongkind, char rightkind)
+{
+	const struct dropmsgstrings *rentry;
+	const struct dropmsgstrings *wentry;
+
+	for (rentry = dropmsgstringarray; rentry->kind != '\0'; rentry++)
+		if (rentry->kind == rightkind)
+			break;
+	Assert(rentry->kind != '\0');
+
+	for (wentry = dropmsgstringarray; wentry->kind != '\0'; wentry++)
+		if (wentry->kind == wrongkind)
+			break;
+	/* wrongkind could be something we don't have in our table... */
+
+	ereport(ERROR,
+			(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+			 errmsg(rentry->nota_msg, relname),
+			 (wentry->kind != '\0') ? errhint("%s", _(wentry->drophint_msg)) : 0));
+}
+
+/*
+ * RemoveRelations
+ *		Implements DROP TABLE, DROP INDEX, DROP SEQUENCE, DROP VIEW,
+ *		DROP MATERIALIZED VIEW, DROP FOREIGN TABLE
+ */
+void
+RemoveRelations(DropStmt *drop)
+{
+	ObjectAddresses *objects;
+	char		relkind;
+	ListCell   *cell;
+	int			flags = 0;
+	LOCKMODE	lockmode = AccessExclusiveLock;
+
+	/* DROP CONCURRENTLY uses a weaker lock, and has some restrictions */
+	if (drop->concurrent)
+	{
+		/*
+		 * Note that for temporary relations this lock may get upgraded later
+		 * on, but as no other session can access a temporary relation, this
+		 * is actually fine.
+		 */
+		lockmode = ShareUpdateExclusiveLock;
+		Assert(drop->removeType == OBJECT_INDEX);
+		if (list_length(drop->objects) != 1)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("DROP INDEX CONCURRENTLY does not support dropping multiple objects")));
+		if (drop->behavior == DROP_CASCADE)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("DROP INDEX CONCURRENTLY does not support CASCADE")));
+	}
+
+	/*
+	 * First we identify all the relations, then we delete them in a single
+	 * performMultipleDeletions() call.  This is to avoid unwanted DROP
+	 * RESTRICT errors if one of the relations depends on another.
+	 */
+
+	/* Determine required relkind */
+	switch (drop->removeType)
+	{
+		case OBJECT_TABLE:
+			relkind = RELKIND_RELATION;
+			break;
+
+		case OBJECT_INDEX:
+			relkind = RELKIND_INDEX;
+			break;
+
+		case OBJECT_SEQUENCE:
+			relkind = RELKIND_SEQUENCE;
+			break;
+
+		case OBJECT_VIEW:
+			relkind = RELKIND_VIEW;
+			break;
+
+		case OBJECT_MATVIEW:
+			relkind = RELKIND_MATVIEW;
+			break;
+
+		case OBJECT_FOREIGN_TABLE:
+			relkind = RELKIND_FOREIGN_TABLE;
+			break;
+
+		default:
+			elog(ERROR, "unrecognized drop object type: %d",
+				 (int) drop->removeType);
+			relkind = 0;		/* keep compiler quiet */
+			break;
+	}
+
+	/* Lock and validate each relation; build a list of object addresses */
+	objects = new_object_addresses();
+
+	foreach(cell, drop->objects)
+	{
+		RangeVar   *rel = makeRangeVarFromNameList((List *) lfirst(cell));
+		Oid			relOid;
+		ObjectAddress obj;
+		struct DropRelationCallbackState state;
+
+		/*
+		 * These next few steps are a great deal like relation_openrv, but we
+		 * don't bother building a relcache entry since we don't need it.
+		 *
+		 * Check for shared-cache-inval messages before trying to access the
+		 * relation.  This is needed to cover the case where the name
+		 * identifies a rel that has been dropped and recreated since the
+		 * start of our transaction: if we don't flush the old syscache entry,
+		 * then we'll latch onto that entry and suffer an error later.
+		 */
+		AcceptInvalidationMessages();
+
+		/* Look up the appropriate relation using namespace search. */
+		state.expected_relkind = relkind;
+		state.heap_lockmode = drop->concurrent ?
+			ShareUpdateExclusiveLock : AccessExclusiveLock;
+		/* We must initialize these fields to show that no locks are held: */
+		state.heapOid = InvalidOid;
+		state.partParentOid = InvalidOid;
+
+		relOid = RangeVarGetRelidExtended(rel, lockmode, RVR_MISSING_OK,
+										  RangeVarCallbackForDropRelation,
+										  (void *) &state);
+
+		/* Not there? */
+		if (!OidIsValid(relOid))
+		{
+			DropErrorMsgNonExistent(rel, relkind, drop->missing_ok);
+			continue;
+		}
+
+		/*
+		 * Decide if concurrent mode needs to be used here or not.  The
+		 * callback retrieved the rel's persistence for us.
+		 */
+		if (drop->concurrent &&
+			state.actual_relpersistence != RELPERSISTENCE_TEMP)
+		{
+			Assert(list_length(drop->objects) == 1 &&
+				   drop->removeType == OBJECT_INDEX);
+			flags |= PERFORM_DELETION_CONCURRENTLY;
+		}
+
+		/*
+		 * Concurrent index drop cannot be used with partitioned indexes,
+		 * either.
+		 */
+		if ((flags & PERFORM_DELETION_CONCURRENTLY) != 0 &&
+			state.actual_relkind == RELKIND_PARTITIONED_INDEX)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot drop partitioned index \"%s\" concurrently",
+							rel->relname)));
+
+		/*
+		 * If we're told to drop a partitioned index, we must acquire lock on
+		 * all the children of its parent partitioned table before proceeding.
+		 * Otherwise we'd try to lock the child index partitions before their
+		 * tables, leading to potential deadlock against other sessions that
+		 * will lock those objects in the other order.
+		 */
+		if (state.actual_relkind == RELKIND_PARTITIONED_INDEX)
+			(void) find_all_inheritors(state.heapOid,
+									   state.heap_lockmode,
+									   NULL);
+
+		/* OK, we're ready to delete this one */
+		obj.classId = RelationRelationId;
+		obj.objectId = relOid;
+		obj.objectSubId = 0;
+
+		add_exact_object_address(&obj, objects);
+	}
+
+	performMultipleDeletions(objects, drop->behavior, flags);
+
+	free_object_addresses(objects);
+}
+
+/*
+ * Before acquiring a table lock, check whether we have sufficient rights.
+ * In the case of DROP INDEX, also try to lock the table before the index.
+ * Also, if the table to be dropped is a partition, we try to lock the parent
+ * first.
+ */
+static void
+RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
+								void *arg)
+{
+	HeapTuple	tuple;
+	struct DropRelationCallbackState *state;
+	char		expected_relkind;
+	bool		is_partition;
+	Form_pg_class classform;
+	LOCKMODE	heap_lockmode;
+	bool		invalid_system_index = false;
+
+	state = (struct DropRelationCallbackState *) arg;
+	heap_lockmode = state->heap_lockmode;
+
+	/*
+	 * If we previously locked some other index's heap, and the name we're
+	 * looking up no longer refers to that relation, release the now-useless
+	 * lock.
+	 */
+	if (relOid != oldRelOid && OidIsValid(state->heapOid))
+	{
+		UnlockRelationOid(state->heapOid, heap_lockmode);
+		state->heapOid = InvalidOid;
+	}
+
+	/*
+	 * Similarly, if we previously locked some other partition's heap, and the
+	 * name we're looking up no longer refers to that relation, release the
+	 * now-useless lock.
+	 */
+	if (relOid != oldRelOid && OidIsValid(state->partParentOid))
+	{
+		UnlockRelationOid(state->partParentOid, AccessExclusiveLock);
+		state->partParentOid = InvalidOid;
+	}
+
+	/* Didn't find a relation, so no need for locking or permission checks. */
+	if (!OidIsValid(relOid))
+		return;
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relOid));
+	if (!HeapTupleIsValid(tuple))
+		return;					/* concurrently dropped, so nothing to do */
+	classform = (Form_pg_class) GETSTRUCT(tuple);
+	is_partition = classform->relispartition;
+
+	/* Pass back some data to save lookups in RemoveRelations */
+	state->actual_relkind = classform->relkind;
+	state->actual_relpersistence = classform->relpersistence;
+
+	/*
+	 * Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE,
+	 * but RemoveRelations() can only pass one relkind for a given relation.
+	 * It chooses RELKIND_RELATION for both regular and partitioned tables.
+	 * That means we must be careful before giving the wrong type error when
+	 * the relation is RELKIND_PARTITIONED_TABLE.  An equivalent problem
+	 * exists with indexes.
+	 */
+	if (classform->relkind == RELKIND_PARTITIONED_TABLE)
+		expected_relkind = RELKIND_RELATION;
+	else if (classform->relkind == RELKIND_PARTITIONED_INDEX)
+		expected_relkind = RELKIND_INDEX;
+	else
+		expected_relkind = classform->relkind;
+
+	if (state->expected_relkind != expected_relkind)
+		DropErrorMsgWrongType(rel->relname, classform->relkind,
+							  state->expected_relkind);
+
+	/* Allow DROP to either table owner or schema owner */
+	if (!pg_class_ownercheck(relOid, GetUserId()) &&
+		!pg_namespace_ownercheck(classform->relnamespace, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER,
+					   get_relkind_objtype(classform->relkind),
+					   rel->relname);
+
+	/*
+	 * Check the case of a system index that might have been invalidated by a
+	 * failed concurrent process and allow its drop. For the time being, this
+	 * only concerns indexes of toast relations that became invalid during a
+	 * REINDEX CONCURRENTLY process.
+	 */
+	if (IsSystemClass(relOid, classform) && classform->relkind == RELKIND_INDEX)
+	{
+		HeapTuple	locTuple;
+		Form_pg_index indexform;
+		bool		indisvalid;
+
+		locTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(relOid));
+		if (!HeapTupleIsValid(locTuple))
+		{
+			ReleaseSysCache(tuple);
+			return;
+		}
+
+		indexform = (Form_pg_index) GETSTRUCT(locTuple);
+		indisvalid = indexform->indisvalid;
+		ReleaseSysCache(locTuple);
+
+		/* Mark object as being an invalid index of system catalogs */
+		if (!indisvalid)
+			invalid_system_index = true;
+	}
+
+	/* In the case of an invalid index, it is fine to bypass this check */
+	if (!invalid_system_index && !allowSystemTableMods && IsSystemClass(relOid, classform))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						rel->relname)));
+
+	ReleaseSysCache(tuple);
+
+	/*
+	 * In DROP INDEX, attempt to acquire lock on the parent table before
+	 * locking the index.  index_drop() will need this anyway, and since
+	 * regular queries lock tables before their indexes, we risk deadlock if
+	 * we do it the other way around.  No error if we don't find a pg_index
+	 * entry, though --- the relation may have been dropped.  Note that this
+	 * code will execute for either plain or partitioned indexes.
+	 */
+	if (expected_relkind == RELKIND_INDEX &&
+		relOid != oldRelOid)
+	{
+		state->heapOid = IndexGetRelation(relOid, true);
+		if (OidIsValid(state->heapOid))
+			LockRelationOid(state->heapOid, heap_lockmode);
+	}
+
+	/*
+	 * Similarly, if the relation is a partition, we must acquire lock on its
+	 * parent before locking the partition.  That's because queries lock the
+	 * parent before its partitions, so we risk deadlock if we do it the other
+	 * way around.
+	 */
+	if (is_partition && relOid != oldRelOid)
+	{
+		state->partParentOid = get_partition_parent(relOid, true);
+		if (OidIsValid(state->partParentOid))
+			LockRelationOid(state->partParentOid, AccessExclusiveLock);
+	}
+}
+
+/*
+ * ExecuteTruncate
+ *		Executes a TRUNCATE command.
+ *
+ * This is a multi-relation truncate.  We first open and grab exclusive
+ * lock on all relations involved, checking permissions and otherwise
+ * verifying that the relation is OK for truncation.  Note that if relations
+ * are foreign tables, at this stage, we have not yet checked that their
+ * foreign data in external data sources are OK for truncation.  These are
+ * checked when foreign data are actually truncated later.  In CASCADE mode,
+ * relations having FK references to the targeted relations are automatically
+ * added to the group; in RESTRICT mode, we check that all FK references are
+ * internal to the group that's being truncated.  Finally all the relations
+ * are truncated and reindexed.
+ */
+void
+ExecuteTruncate(TruncateStmt *stmt)
+{
+	List	   *rels = NIL;
+	List	   *relids = NIL;
+	List	   *relids_logged = NIL;
+	ListCell   *cell;
+
+	/*
+	 * Open, exclusive-lock, and check all the explicitly-specified relations
+	 */
+	foreach(cell, stmt->relations)
+	{
+		RangeVar   *rv = lfirst(cell);
+		Relation	rel;
+		bool		recurse = rv->inh;
+		Oid			myrelid;
+		LOCKMODE	lockmode = AccessExclusiveLock;
+
+		myrelid = RangeVarGetRelidExtended(rv, lockmode,
+										   0, RangeVarCallbackForTruncate,
+										   NULL);
+
+		/* don't throw error for "TRUNCATE foo, foo" */
+		if (list_member_oid(relids, myrelid))
+			continue;
+
+		/* open the relation, we already hold a lock on it */
+		rel = table_open(myrelid, NoLock);
+
+		/*
+		 * RangeVarGetRelidExtended() has done most checks with its callback,
+		 * but other checks with the now-opened Relation remain.
+		 */
+		truncate_check_activity(rel);
+
+		rels = lappend(rels, rel);
+		relids = lappend_oid(relids, myrelid);
+
+		/* Log this relation only if needed for logical decoding */
+		if (RelationIsLogicallyLogged(rel))
+			relids_logged = lappend_oid(relids_logged, myrelid);
+
+		if (recurse)
+		{
+			ListCell   *child;
+			List	   *children;
+
+			children = find_all_inheritors(myrelid, lockmode, NULL);
+
+			foreach(child, children)
+			{
+				Oid			childrelid = lfirst_oid(child);
+
+				if (list_member_oid(relids, childrelid))
+					continue;
+
+				/* find_all_inheritors already got lock */
+				rel = table_open(childrelid, NoLock);
+
+				/*
+				 * It is possible that the parent table has children that are
+				 * temp tables of other backends.  We cannot safely access
+				 * such tables (because of buffering issues), and the best
+				 * thing to do is to silently ignore them.  Note that this
+				 * check is the same as one of the checks done in
+				 * truncate_check_activity() called below, still it is kept
+				 * here for simplicity.
+				 */
+				if (RELATION_IS_OTHER_TEMP(rel))
+				{
+					table_close(rel, lockmode);
+					continue;
+				}
+
+				/*
+				 * Inherited TRUNCATE commands perform access permission
+				 * checks on the parent table only. So we skip checking the
+				 * children's permissions and don't call
+				 * truncate_check_perms() here.
+				 */
+				truncate_check_rel(RelationGetRelid(rel), rel->rd_rel);
+				truncate_check_activity(rel);
+
+				rels = lappend(rels, rel);
+				relids = lappend_oid(relids, childrelid);
+
+				/* Log this relation only if needed for logical decoding */
+				if (RelationIsLogicallyLogged(rel))
+					relids_logged = lappend_oid(relids_logged, childrelid);
+			}
+		}
+		else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot truncate only a partitioned table"),
+					 errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
+	}
+
+	ExecuteTruncateGuts(rels, relids, relids_logged,
+						stmt->behavior, stmt->restart_seqs);
+
+	/* And close the rels */
+	foreach(cell, rels)
+	{
+		Relation	rel = (Relation) lfirst(cell);
+
+		table_close(rel, NoLock);
+	}
+}
+
+/*
+ * ExecuteTruncateGuts
+ *
+ * Internal implementation of TRUNCATE.  This is called by the actual TRUNCATE
+ * command (see above) as well as replication subscribers that execute a
+ * replicated TRUNCATE action.
+ *
+ * explicit_rels is the list of Relations to truncate that the command
+ * specified.  relids is the list of Oids corresponding to explicit_rels.
+ * relids_logged is the list of Oids (a subset of relids) that require
+ * WAL-logging.  This is all a bit redundant, but the existing callers have
+ * this information handy in this form.
+ */
+void
+ExecuteTruncateGuts(List *explicit_rels,
+					List *relids,
+					List *relids_logged,
+					DropBehavior behavior, bool restart_seqs)
+{
+	List	   *rels;
+	List	   *seq_relids = NIL;
+	HTAB	   *ft_htab = NULL;
+	EState	   *estate;
+	ResultRelInfo *resultRelInfos;
+	ResultRelInfo *resultRelInfo;
+	SubTransactionId mySubid;
+	ListCell   *cell;
+	Oid		   *logrelids;
+
+	/*
+	 * Check the explicitly-specified relations.
+	 *
+	 * In CASCADE mode, suck in all referencing relations as well.  This
+	 * requires multiple iterations to find indirectly-dependent relations. At
+	 * each phase, we need to exclusive-lock new rels before looking for their
+	 * dependencies, else we might miss something.  Also, we check each rel as
+	 * soon as we open it, to avoid a faux pas such as holding lock for a long
+	 * time on a rel we have no permissions for.
+	 */
+	rels = list_copy(explicit_rels);
+	if (behavior == DROP_CASCADE)
+	{
+		for (;;)
+		{
+			List	   *newrelids;
+
+			newrelids = heap_truncate_find_FKs(relids);
+			if (newrelids == NIL)
+				break;			/* nothing else to add */
+
+			foreach(cell, newrelids)
+			{
+				Oid			relid = lfirst_oid(cell);
+				Relation	rel;
+
+				rel = table_open(relid, AccessExclusiveLock);
+				ereport(NOTICE,
+						(errmsg("truncate cascades to table \"%s\"",
+								RelationGetRelationName(rel))));
+				truncate_check_rel(relid, rel->rd_rel);
+				truncate_check_perms(relid, rel->rd_rel);
+				truncate_check_activity(rel);
+				rels = lappend(rels, rel);
+				relids = lappend_oid(relids, relid);
+
+				/* Log this relation only if needed for logical decoding */
+				if (RelationIsLogicallyLogged(rel))
+					relids_logged = lappend_oid(relids_logged, relid);
+			}
+		}
+	}
+
+	/*
+	 * Check foreign key references.  In CASCADE mode, this should be
+	 * unnecessary since we just pulled in all the references; but as a
+	 * cross-check, do it anyway if in an Assert-enabled build.
+	 */
+#ifdef USE_ASSERT_CHECKING
+	heap_truncate_check_FKs(rels, false);
+#else
+	if (behavior == DROP_RESTRICT)
+		heap_truncate_check_FKs(rels, false);
+#endif
+
+	/*
+	 * If we are asked to restart sequences, find all the sequences, lock them
+	 * (we need AccessExclusiveLock for ResetSequence), and check permissions.
+	 * We want to do this early since it's pointless to do all the truncation
+	 * work only to fail on sequence permissions.
+	 */
+	if (restart_seqs)
+	{
+		foreach(cell, rels)
+		{
+			Relation	rel = (Relation) lfirst(cell);
+			List	   *seqlist = getOwnedSequences(RelationGetRelid(rel));
+			ListCell   *seqcell;
+
+			foreach(seqcell, seqlist)
+			{
+				Oid			seq_relid = lfirst_oid(seqcell);
+				Relation	seq_rel;
+
+				seq_rel = relation_open(seq_relid, AccessExclusiveLock);
+
+				/* This check must match AlterSequence! */
+				if (!pg_class_ownercheck(seq_relid, GetUserId()))
+					aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SEQUENCE,
+								   RelationGetRelationName(seq_rel));
+
+				seq_relids = lappend_oid(seq_relids, seq_relid);
+
+				relation_close(seq_rel, NoLock);
+			}
+		}
+	}
+
+	/* Prepare to catch AFTER triggers. */
+	AfterTriggerBeginQuery();
+
+	/*
+	 * To fire triggers, we'll need an EState as well as a ResultRelInfo for
+	 * each relation.  We don't need to call ExecOpenIndices, though.
+	 *
+	 * We put the ResultRelInfos in the es_opened_result_relations list, even
+	 * though we don't have a range table and don't populate the
+	 * es_result_relations array.  That's a bit bogus, but it's enough to make
+	 * ExecGetTriggerResultRel() find them.
+	 */
+	estate = CreateExecutorState();
+	resultRelInfos = (ResultRelInfo *)
+		palloc(list_length(rels) * sizeof(ResultRelInfo));
+	resultRelInfo = resultRelInfos;
+	foreach(cell, rels)
+	{
+		Relation	rel = (Relation) lfirst(cell);
+
+		InitResultRelInfo(resultRelInfo,
+						  rel,
+						  0,	/* dummy rangetable index */
+						  NULL,
+						  0);
+		estate->es_opened_result_relations =
+			lappend(estate->es_opened_result_relations, resultRelInfo);
+		resultRelInfo++;
+	}
+
+	/*
+	 * Process all BEFORE STATEMENT TRUNCATE triggers before we begin
+	 * truncating (this is because one of them might throw an error). Also, if
+	 * we were to allow them to prevent statement execution, that would need
+	 * to be handled here.
+	 */
+	resultRelInfo = resultRelInfos;
+	foreach(cell, rels)
+	{
+		ExecBSTruncateTriggers(estate, resultRelInfo);
+		resultRelInfo++;
+	}
+
+	/*
+	 * OK, truncate each table.
+	 */
+	mySubid = GetCurrentSubTransactionId();
+
+	foreach(cell, rels)
+	{
+		Relation	rel = (Relation) lfirst(cell);
+
+		/* Skip partitioned tables as there is nothing to do */
+		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+			continue;
+
+		/*
+		 * Build the lists of foreign tables belonging to each foreign server
+		 * and pass each list to the foreign data wrapper's callback function,
+		 * so that each server can truncate its all foreign tables in bulk.
+		 * Each list is saved as a single entry in a hash table that uses the
+		 * server OID as lookup key.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			Oid			serverid = GetForeignServerIdByRelId(RelationGetRelid(rel));
+			bool		found;
+			ForeignTruncateInfo *ft_info;
+
+			/* First time through, initialize hashtable for foreign tables */
+			if (!ft_htab)
+			{
+				HASHCTL		hctl;
+
+				memset(&hctl, 0, sizeof(HASHCTL));
+				hctl.keysize = sizeof(Oid);
+				hctl.entrysize = sizeof(ForeignTruncateInfo);
+				hctl.hcxt = CurrentMemoryContext;
+
+				ft_htab = hash_create("TRUNCATE for Foreign Tables",
+									  32,	/* start small and extend */
+									  &hctl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+			}
+
+			/* Find or create cached entry for the foreign table */
+			ft_info = hash_search(ft_htab, &serverid, HASH_ENTER, &found);
+			if (!found)
+			{
+				ft_info->serverid = serverid;
+				ft_info->rels = NIL;
+			}
+
+			/*
+			 * Save the foreign table in the entry of the server that the
+			 * foreign table belongs to.
+			 */
+			ft_info->rels = lappend(ft_info->rels, rel);
+			continue;
+		}
+
+		/*
+		 * Normally, we need a transaction-safe truncation here.  However, if
+		 * the table was either created in the current (sub)transaction or has
+		 * a new relfilenode in the current (sub)transaction, then we can just
+		 * truncate it in-place, because a rollback would cause the whole
+		 * table or the current physical file to be thrown away anyway.
+		 */
+		if (rel->rd_createSubid == mySubid ||
+			rel->rd_newRelfilenodeSubid == mySubid)
+		{
+			/* Immediate, non-rollbackable truncation is OK */
+			heap_truncate_one_rel(rel);
+		}
+		else
+		{
+			Oid			heap_relid;
+			Oid			toast_relid;
+			ReindexParams reindex_params = {0};
+
+			/*
+			 * This effectively deletes all rows in the table, and may be done
+			 * in a serializable transaction.  In that case we must record a
+			 * rw-conflict in to this transaction from each transaction
+			 * holding a predicate lock on the table.
+			 */
+			CheckTableForSerializableConflictIn(rel);
+
+			/*
+			 * Need the full transaction-safe pushups.
+			 *
+			 * Create a new empty storage file for the relation, and assign it
+			 * as the relfilenode value. The old storage file is scheduled for
+			 * deletion at commit.
+			 */
+			RelationSetNewRelfilenode(rel, rel->rd_rel->relpersistence);
+
+			heap_relid = RelationGetRelid(rel);
+
+			/*
+			 * The same for the toast table, if any.
+			 */
+			toast_relid = rel->rd_rel->reltoastrelid;
+			if (OidIsValid(toast_relid))
+			{
+				Relation	toastrel = relation_open(toast_relid,
+													 AccessExclusiveLock);
+
+				RelationSetNewRelfilenode(toastrel,
+										  toastrel->rd_rel->relpersistence);
+				table_close(toastrel, NoLock);
+			}
+
+			/*
+			 * Reconstruct the indexes to match, and we're done.
+			 */
+			reindex_relation(heap_relid, REINDEX_REL_PROCESS_TOAST,
+							 &reindex_params);
+		}
+
+		pgstat_count_truncate(rel);
+	}
+
+	/* Now go through the hash table, and truncate foreign tables */
+	if (ft_htab)
+	{
+		ForeignTruncateInfo *ft_info;
+		HASH_SEQ_STATUS seq;
+
+		hash_seq_init(&seq, ft_htab);
+
+		PG_TRY();
+		{
+			while ((ft_info = hash_seq_search(&seq)) != NULL)
+			{
+				FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->serverid);
+
+				/* truncate_check_rel() has checked that already */
+				Assert(routine->ExecForeignTruncate != NULL);
+
+				routine->ExecForeignTruncate(ft_info->rels,
+											 behavior,
+											 restart_seqs);
+			}
+		}
+		PG_FINALLY();
+		{
+			hash_destroy(ft_htab);
+		}
+		PG_END_TRY();
+	}
+
+	/*
+	 * Restart owned sequences if we were asked to.
+	 */
+	foreach(cell, seq_relids)
+	{
+		Oid			seq_relid = lfirst_oid(cell);
+
+		ResetSequence(seq_relid);
+	}
+
+	/*
+	 * Write a WAL record to allow this set of actions to be logically
+	 * decoded.
+	 *
+	 * Assemble an array of relids so we can write a single WAL record for the
+	 * whole action.
+	 */
+	if (list_length(relids_logged) > 0)
+	{
+		xl_heap_truncate xlrec;
+		int			i = 0;
+
+		/* should only get here if wal_level >= logical */
+		Assert(XLogLogicalInfoActive());
+
+		logrelids = palloc(list_length(relids_logged) * sizeof(Oid));
+		foreach(cell, relids_logged)
+			logrelids[i++] = lfirst_oid(cell);
+
+		xlrec.dbId = MyDatabaseId;
+		xlrec.nrelids = list_length(relids_logged);
+		xlrec.flags = 0;
+		if (behavior == DROP_CASCADE)
+			xlrec.flags |= XLH_TRUNCATE_CASCADE;
+		if (restart_seqs)
+			xlrec.flags |= XLH_TRUNCATE_RESTART_SEQS;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, SizeOfHeapTruncate);
+		XLogRegisterData((char *) logrelids, list_length(relids_logged) * sizeof(Oid));
+
+		XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
+
+		(void) XLogInsert(RM_HEAP_ID, XLOG_HEAP_TRUNCATE);
+	}
+
+	/*
+	 * Process all AFTER STATEMENT TRUNCATE triggers.
+	 */
+	resultRelInfo = resultRelInfos;
+	foreach(cell, rels)
+	{
+		ExecASTruncateTriggers(estate, resultRelInfo);
+		resultRelInfo++;
+	}
+
+	/* Handle queued AFTER triggers */
+	AfterTriggerEndQuery(estate);
+
+	/* We can clean up the EState now */
+	FreeExecutorState(estate);
+
+	/*
+	 * Close any rels opened by CASCADE (can't do this while EState still
+	 * holds refs)
+	 */
+	rels = list_difference_ptr(rels, explicit_rels);
+	foreach(cell, rels)
+	{
+		Relation	rel = (Relation) lfirst(cell);
+
+		table_close(rel, NoLock);
+	}
+}
+
+/*
+ * Check that a given relation is safe to truncate.  Subroutine for
+ * ExecuteTruncate() and RangeVarCallbackForTruncate().
+ */
+static void
+truncate_check_rel(Oid relid, Form_pg_class reltuple)
+{
+	char	   *relname = NameStr(reltuple->relname);
+
+	/*
+	 * Only allow truncate on regular tables, foreign tables using foreign
+	 * data wrappers supporting TRUNCATE and partitioned tables (although, the
+	 * latter are only being included here for the following checks; no
+	 * physical truncation will occur in their case.).
+	 */
+	if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		Oid			serverid = GetForeignServerIdByRelId(relid);
+		FdwRoutine *fdwroutine = GetFdwRoutineByServerId(serverid);
+
+		if (!fdwroutine->ExecForeignTruncate)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot truncate foreign table \"%s\"",
+							relname)));
+	}
+	else if (reltuple->relkind != RELKIND_RELATION &&
+			 reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a table", relname)));
+
+	/*
+	 * Most system catalogs can't be truncated at all, or at least not unless
+	 * allow_system_table_mods=on. As an exception, however, we allow
+	 * pg_largeobject to be truncated as part of pg_upgrade, because we need
+	 * to change its relfilenode to match the old cluster, and allowing a
+	 * TRUNCATE command to be executed is the easiest way of doing that.
+	 */
+	if (!allowSystemTableMods && IsSystemClass(relid, reltuple)
+		&& (!IsBinaryUpgrade || relid != LargeObjectRelationId))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						relname)));
+
+	InvokeObjectTruncateHook(relid);
+}
+
+/*
+ * Check that current user has the permission to truncate given relation.
+ */
+static void
+truncate_check_perms(Oid relid, Form_pg_class reltuple)
+{
+	char	   *relname = NameStr(reltuple->relname);
+	AclResult	aclresult;
+
+	/* Permissions checks */
+	aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_TRUNCATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, get_relkind_objtype(reltuple->relkind),
+					   relname);
+}
+
+/*
+ * Set of extra sanity checks to check if a given relation is safe to
+ * truncate.  This is split with truncate_check_rel() as
+ * RangeVarCallbackForTruncate() cannot open a Relation yet.
+ */
+static void
+truncate_check_activity(Relation rel)
+{
+	/*
+	 * Don't allow truncate on temp tables of other backends ... their local
+	 * buffer manager is not going to cope.
+	 */
+	if (RELATION_IS_OTHER_TEMP(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot truncate temporary tables of other sessions")));
+
+	/*
+	 * Also check for active uses of the relation in the current transaction,
+	 * including open scans and pending AFTER trigger events.
+	 */
+	CheckTableNotInUse(rel, "TRUNCATE");
+}
+
+/*
+ * storage_name
+ *	  returns the name corresponding to a typstorage/attstorage enum value
+ */
+static const char *
+storage_name(char c)
+{
+	switch (c)
+	{
+		case TYPSTORAGE_PLAIN:
+			return "PLAIN";
+		case TYPSTORAGE_EXTERNAL:
+			return "EXTERNAL";
+		case TYPSTORAGE_EXTENDED:
+			return "EXTENDED";
+		case TYPSTORAGE_MAIN:
+			return "MAIN";
+		default:
+			return "???";
+	}
+}
+
+/*----------
+ * MergeAttributes
+ *		Returns new schema given initial schema and superclasses.
+ *
+ * Input arguments:
+ * 'schema' is the column/attribute definition for the table. (It's a list
+ *		of ColumnDef's.) It is destructively changed.
+ * 'supers' is a list of OIDs of parent relations, already locked by caller.
+ * 'relpersistence' is the persistence type of the table.
+ * 'is_partition' tells if the table is a partition.
+ *
+ * Output arguments:
+ * 'supconstr' receives a list of constraints belonging to the parents,
+ *		updated as necessary to be valid for the child.
+ *
+ * Return value:
+ * Completed schema list.
+ *
+ * Notes:
+ *	  The order in which the attributes are inherited is very important.
+ *	  Intuitively, the inherited attributes should come first. If a table
+ *	  inherits from multiple parents, the order of those attributes are
+ *	  according to the order of the parents specified in CREATE TABLE.
+ *
+ *	  Here's an example:
+ *
+ *		create table person (name text, age int4, location point);
+ *		create table emp (salary int4, manager text) inherits(person);
+ *		create table student (gpa float8) inherits (person);
+ *		create table stud_emp (percent int4) inherits (emp, student);
+ *
+ *	  The order of the attributes of stud_emp is:
+ *
+ *							person {1:name, 2:age, 3:location}
+ *							/	 \
+ *			   {6:gpa}	student   emp {4:salary, 5:manager}
+ *							\	 /
+ *						   stud_emp {7:percent}
+ *
+ *	   If the same attribute name appears multiple times, then it appears
+ *	   in the result table in the proper location for its first appearance.
+ *
+ *	   Constraints (including NOT NULL constraints) for the child table
+ *	   are the union of all relevant constraints, from both the child schema
+ *	   and parent tables.
+ *
+ *	   The default value for a child column is defined as:
+ *		(1) If the child schema specifies a default, that value is used.
+ *		(2) If neither the child nor any parent specifies a default, then
+ *			the column will not have a default.
+ *		(3) If conflicting defaults are inherited from different parents
+ *			(and not overridden by the child), an error is raised.
+ *		(4) Otherwise the inherited default is used.
+ *		Rule (3) is new in Postgres 7.1; in earlier releases you got a
+ *		rather arbitrary choice of which parent default to use.
+ *----------
+ */
+static List *
+MergeAttributes(List *schema, List *supers, char relpersistence,
+				bool is_partition, List **supconstr)
+{
+	List	   *inhSchema = NIL;
+	List	   *constraints = NIL;
+	bool		have_bogus_defaults = false;
+	int			child_attno;
+	static Node bogus_marker = {0}; /* marks conflicting defaults */
+	List	   *saved_schema = NIL;
+	ListCell   *entry;
+
+	/*
+	 * Check for and reject tables with too many columns. We perform this
+	 * check relatively early for two reasons: (a) we don't run the risk of
+	 * overflowing an AttrNumber in subsequent code (b) an O(n^2) algorithm is
+	 * okay if we're processing <= 1600 columns, but could take minutes to
+	 * execute if the user attempts to create a table with hundreds of
+	 * thousands of columns.
+	 *
+	 * Note that we also need to check that we do not exceed this figure after
+	 * including columns from inherited relations.
+	 */
+	if (list_length(schema) > MaxHeapAttributeNumber)
+		ereport(ERROR,
+				(errcode(ERRCODE_TOO_MANY_COLUMNS),
+				 errmsg("tables can have at most %d columns",
+						MaxHeapAttributeNumber)));
+
+	/*
+	 * Check for duplicate names in the explicit list of attributes.
+	 *
+	 * Although we might consider merging such entries in the same way that we
+	 * handle name conflicts for inherited attributes, it seems to make more
+	 * sense to assume such conflicts are errors.
+	 *
+	 * We don't use foreach() here because we have two nested loops over the
+	 * schema list, with possible element deletions in the inner one.  If we
+	 * used foreach_delete_current() it could only fix up the state of one of
+	 * the loops, so it seems cleaner to use looping over list indexes for
+	 * both loops.  Note that any deletion will happen beyond where the outer
+	 * loop is, so its index never needs adjustment.
+	 */
+	for (int coldefpos = 0; coldefpos < list_length(schema); coldefpos++)
+	{
+		ColumnDef  *coldef = list_nth_node(ColumnDef, schema, coldefpos);
+
+		if (!is_partition && coldef->typeName == NULL)
+		{
+			/*
+			 * Typed table column option that does not belong to a column from
+			 * the type.  This works because the columns from the type come
+			 * first in the list.  (We omit this check for partition column
+			 * lists; those are processed separately below.)
+			 */
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column \"%s\" does not exist",
+							coldef->colname)));
+		}
+
+		/* restpos scans all entries beyond coldef; incr is in loop body */
+		for (int restpos = coldefpos + 1; restpos < list_length(schema);)
+		{
+			ColumnDef  *restdef = list_nth_node(ColumnDef, schema, restpos);
+
+			if (strcmp(coldef->colname, restdef->colname) == 0)
+			{
+				if (coldef->is_from_type)
+				{
+					/*
+					 * merge the column options into the column from the type
+					 */
+					coldef->is_not_null = restdef->is_not_null;
+					coldef->raw_default = restdef->raw_default;
+					coldef->cooked_default = restdef->cooked_default;
+					coldef->constraints = restdef->constraints;
+					coldef->is_from_type = false;
+					schema = list_delete_nth_cell(schema, restpos);
+				}
+				else
+					ereport(ERROR,
+							(errcode(ERRCODE_DUPLICATE_COLUMN),
+							 errmsg("column \"%s\" specified more than once",
+									coldef->colname)));
+			}
+			else
+				restpos++;
+		}
+	}
+
+	/*
+	 * In case of a partition, there are no new column definitions, only dummy
+	 * ColumnDefs created for column constraints.  Set them aside for now and
+	 * process them at the end.
+	 */
+	if (is_partition)
+	{
+		saved_schema = schema;
+		schema = NIL;
+	}
+
+	/*
+	 * Scan the parents left-to-right, and merge their attributes to form a
+	 * list of inherited attributes (inhSchema).  Also check to see if we need
+	 * to inherit an OID column.
+	 */
+	child_attno = 0;
+	foreach(entry, supers)
+	{
+		Oid			parent = lfirst_oid(entry);
+		Relation	relation;
+		TupleDesc	tupleDesc;
+		TupleConstr *constr;
+		AttrMap    *newattmap;
+		List	   *inherited_defaults;
+		List	   *cols_with_defaults;
+		AttrNumber	parent_attno;
+		ListCell   *lc1;
+		ListCell   *lc2;
+
+		/* caller already got lock */
+		relation = table_open(parent, NoLock);
+
+		/*
+		 * Check for active uses of the parent partitioned table in the
+		 * current transaction, such as being used in some manner by an
+		 * enclosing command.
+		 */
+		if (is_partition)
+			CheckTableNotInUse(relation, "CREATE TABLE .. PARTITION OF");
+
+		/*
+		 * We do not allow partitioned tables and partitions to participate in
+		 * regular inheritance.
+		 */
+		if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+			!is_partition)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot inherit from partitioned table \"%s\"",
+							RelationGetRelationName(relation))));
+		if (relation->rd_rel->relispartition && !is_partition)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot inherit from partition \"%s\"",
+							RelationGetRelationName(relation))));
+
+		if (relation->rd_rel->relkind != RELKIND_RELATION &&
+			relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+			relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("inherited relation \"%s\" is not a table or foreign table",
+							RelationGetRelationName(relation))));
+
+		/*
+		 * If the parent is permanent, so must be all of its partitions.  Note
+		 * that inheritance allows that case.
+		 */
+		if (is_partition &&
+			relation->rd_rel->relpersistence != RELPERSISTENCE_TEMP &&
+			relpersistence == RELPERSISTENCE_TEMP)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot create a temporary relation as partition of permanent relation \"%s\"",
+							RelationGetRelationName(relation))));
+
+		/* Permanent rels cannot inherit from temporary ones */
+		if (relpersistence != RELPERSISTENCE_TEMP &&
+			relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg(!is_partition
+							? "cannot inherit from temporary relation \"%s\""
+							: "cannot create a permanent relation as partition of temporary relation \"%s\"",
+							RelationGetRelationName(relation))));
+
+		/* If existing rel is temp, it must belong to this session */
+		if (relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+			!relation->rd_islocaltemp)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg(!is_partition
+							? "cannot inherit from temporary relation of another session"
+							: "cannot create as partition of temporary relation of another session")));
+
+		/*
+		 * We should have an UNDER permission flag for this, but for now,
+		 * demand that creator of a child table own the parent.
+		 */
+		if (!pg_class_ownercheck(RelationGetRelid(relation), GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(relation->rd_rel->relkind),
+						   RelationGetRelationName(relation));
+
+		tupleDesc = RelationGetDescr(relation);
+		constr = tupleDesc->constr;
+
+		/*
+		 * newattmap->attnums[] will contain the child-table attribute numbers
+		 * for the attributes of this parent table.  (They are not the same
+		 * for parents after the first one, nor if we have dropped columns.)
+		 */
+		newattmap = make_attrmap(tupleDesc->natts);
+
+		/* We can't process inherited defaults until newattmap is complete. */
+		inherited_defaults = cols_with_defaults = NIL;
+
+		for (parent_attno = 1; parent_attno <= tupleDesc->natts;
+			 parent_attno++)
+		{
+			Form_pg_attribute attribute = TupleDescAttr(tupleDesc,
+														parent_attno - 1);
+			char	   *attributeName = NameStr(attribute->attname);
+			int			exist_attno;
+			ColumnDef  *def;
+
+			/*
+			 * Ignore dropped columns in the parent.
+			 */
+			if (attribute->attisdropped)
+				continue;		/* leave newattmap->attnums entry as zero */
+
+			/*
+			 * Does it conflict with some previously inherited column?
+			 */
+			exist_attno = findAttrByName(attributeName, inhSchema);
+			if (exist_attno > 0)
+			{
+				Oid			defTypeId;
+				int32		deftypmod;
+				Oid			defCollId;
+
+				/*
+				 * Yes, try to merge the two column definitions. They must
+				 * have the same type, typmod, and collation.
+				 */
+				ereport(NOTICE,
+						(errmsg("merging multiple inherited definitions of column \"%s\"",
+								attributeName)));
+				def = (ColumnDef *) list_nth(inhSchema, exist_attno - 1);
+				typenameTypeIdAndMod(NULL, def->typeName, &defTypeId, &deftypmod);
+				if (defTypeId != attribute->atttypid ||
+					deftypmod != attribute->atttypmod)
+					ereport(ERROR,
+							(errcode(ERRCODE_DATATYPE_MISMATCH),
+							 errmsg("inherited column \"%s\" has a type conflict",
+									attributeName),
+							 errdetail("%s versus %s",
+									   format_type_with_typemod(defTypeId,
+																deftypmod),
+									   format_type_with_typemod(attribute->atttypid,
+																attribute->atttypmod))));
+				defCollId = GetColumnDefCollation(NULL, def, defTypeId);
+				if (defCollId != attribute->attcollation)
+					ereport(ERROR,
+							(errcode(ERRCODE_COLLATION_MISMATCH),
+							 errmsg("inherited column \"%s\" has a collation conflict",
+									attributeName),
+							 errdetail("\"%s\" versus \"%s\"",
+									   get_collation_name(defCollId),
+									   get_collation_name(attribute->attcollation))));
+
+				/* Copy/check storage parameter */
+				if (def->storage == 0)
+					def->storage = attribute->attstorage;
+				else if (def->storage != attribute->attstorage)
+					ereport(ERROR,
+							(errcode(ERRCODE_DATATYPE_MISMATCH),
+							 errmsg("inherited column \"%s\" has a storage parameter conflict",
+									attributeName),
+							 errdetail("%s versus %s",
+									   storage_name(def->storage),
+									   storage_name(attribute->attstorage))));
+
+				/* Copy/check compression parameter */
+				if (CompressionMethodIsValid(attribute->attcompression))
+				{
+					const char *compression =
+					GetCompressionMethodName(attribute->attcompression);
+
+					if (def->compression == NULL)
+						def->compression = pstrdup(compression);
+					else if (strcmp(def->compression, compression) != 0)
+						ereport(ERROR,
+								(errcode(ERRCODE_DATATYPE_MISMATCH),
+								 errmsg("column \"%s\" has a compression method conflict",
+										attributeName),
+								 errdetail("%s versus %s", def->compression, compression)));
+				}
+
+				def->inhcount++;
+				/* Merge of NOT NULL constraints = OR 'em together */
+				def->is_not_null |= attribute->attnotnull;
+				/* Default and other constraints are handled below */
+				newattmap->attnums[parent_attno - 1] = exist_attno;
+
+				/* Check for GENERATED conflicts */
+				if (def->generated != attribute->attgenerated)
+					ereport(ERROR,
+							(errcode(ERRCODE_DATATYPE_MISMATCH),
+							 errmsg("inherited column \"%s\" has a generation conflict",
+									attributeName)));
+			}
+			else
+			{
+				/*
+				 * No, create a new inherited column
+				 */
+				def = makeNode(ColumnDef);
+				def->colname = pstrdup(attributeName);
+				def->typeName = makeTypeNameFromOid(attribute->atttypid,
+													attribute->atttypmod);
+				def->inhcount = 1;
+				def->is_local = false;
+				def->is_not_null = attribute->attnotnull;
+				def->is_from_type = false;
+				def->storage = attribute->attstorage;
+				def->raw_default = NULL;
+				def->cooked_default = NULL;
+				def->generated = attribute->attgenerated;
+				def->collClause = NULL;
+				def->collOid = attribute->attcollation;
+				def->constraints = NIL;
+				def->location = -1;
+				if (CompressionMethodIsValid(attribute->attcompression))
+					def->compression =
+						pstrdup(GetCompressionMethodName(attribute->attcompression));
+				else
+					def->compression = NULL;
+				inhSchema = lappend(inhSchema, def);
+				newattmap->attnums[parent_attno - 1] = ++child_attno;
+			}
+
+			/*
+			 * Locate default if any
+			 */
+			if (attribute->atthasdef)
+			{
+				Node	   *this_default = NULL;
+
+				/* Find default in constraint structure */
+				if (constr != NULL)
+				{
+					AttrDefault *attrdef = constr->defval;
+
+					for (int i = 0; i < constr->num_defval; i++)
+					{
+						if (attrdef[i].adnum == parent_attno)
+						{
+							this_default = stringToNode(attrdef[i].adbin);
+							break;
+						}
+					}
+				}
+				if (this_default == NULL)
+					elog(ERROR, "default expression not found for attribute %d of relation \"%s\"",
+						 parent_attno, RelationGetRelationName(relation));
+
+				/*
+				 * If it's a GENERATED default, it might contain Vars that
+				 * need to be mapped to the inherited column(s)' new numbers.
+				 * We can't do that till newattmap is ready, so just remember
+				 * all the inherited default expressions for the moment.
+				 */
+				inherited_defaults = lappend(inherited_defaults, this_default);
+				cols_with_defaults = lappend(cols_with_defaults, def);
+			}
+		}
+
+		/*
+		 * Now process any inherited default expressions, adjusting attnos
+		 * using the completed newattmap map.
+		 */
+		forboth(lc1, inherited_defaults, lc2, cols_with_defaults)
+		{
+			Node	   *this_default = (Node *) lfirst(lc1);
+			ColumnDef  *def = (ColumnDef *) lfirst(lc2);
+			bool		found_whole_row;
+
+			/* Adjust Vars to match new table's column numbering */
+			this_default = map_variable_attnos(this_default,
+											   1, 0,
+											   newattmap,
+											   InvalidOid, &found_whole_row);
+
+			/*
+			 * For the moment we have to reject whole-row variables.  We could
+			 * convert them, if we knew the new table's rowtype OID, but that
+			 * hasn't been assigned yet.  (A variable could only appear in a
+			 * generation expression, so the error message is correct.)
+			 */
+			if (found_whole_row)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot convert whole-row table reference"),
+						 errdetail("Generation expression for column \"%s\" contains a whole-row reference to table \"%s\".",
+								   def->colname,
+								   RelationGetRelationName(relation))));
+
+			/*
+			 * If we already had a default from some prior parent, check to
+			 * see if they are the same.  If so, no problem; if not, mark the
+			 * column as having a bogus default.  Below, we will complain if
+			 * the bogus default isn't overridden by the child schema.
+			 */
+			Assert(def->raw_default == NULL);
+			if (def->cooked_default == NULL)
+				def->cooked_default = this_default;
+			else if (!equal(def->cooked_default, this_default))
+			{
+				def->cooked_default = &bogus_marker;
+				have_bogus_defaults = true;
+			}
+		}
+
+		/*
+		 * Now copy the CHECK constraints of this parent, adjusting attnos
+		 * using the completed newattmap map.  Identically named constraints
+		 * are merged if possible, else we throw error.
+		 */
+		if (constr && constr->num_check > 0)
+		{
+			ConstrCheck *check = constr->check;
+			int			i;
+
+			for (i = 0; i < constr->num_check; i++)
+			{
+				char	   *name = check[i].ccname;
+				Node	   *expr;
+				bool		found_whole_row;
+
+				/* ignore if the constraint is non-inheritable */
+				if (check[i].ccnoinherit)
+					continue;
+
+				/* Adjust Vars to match new table's column numbering */
+				expr = map_variable_attnos(stringToNode(check[i].ccbin),
+										   1, 0,
+										   newattmap,
+										   InvalidOid, &found_whole_row);
+
+				/*
+				 * For the moment we have to reject whole-row variables. We
+				 * could convert them, if we knew the new table's rowtype OID,
+				 * but that hasn't been assigned yet.
+				 */
+				if (found_whole_row)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot convert whole-row table reference"),
+							 errdetail("Constraint \"%s\" contains a whole-row reference to table \"%s\".",
+									   name,
+									   RelationGetRelationName(relation))));
+
+				/* check for duplicate */
+				if (!MergeCheckConstraint(constraints, name, expr))
+				{
+					/* nope, this is a new one */
+					CookedConstraint *cooked;
+
+					cooked = (CookedConstraint *) palloc(sizeof(CookedConstraint));
+					cooked->contype = CONSTR_CHECK;
+					cooked->conoid = InvalidOid;	/* until created */
+					cooked->name = pstrdup(name);
+					cooked->attnum = 0; /* not used for constraints */
+					cooked->expr = expr;
+					cooked->skip_validation = false;
+					cooked->is_local = false;
+					cooked->inhcount = 1;
+					cooked->is_no_inherit = false;
+					constraints = lappend(constraints, cooked);
+				}
+			}
+		}
+
+		free_attrmap(newattmap);
+
+		/*
+		 * Close the parent rel, but keep our lock on it until xact commit.
+		 * That will prevent someone else from deleting or ALTERing the parent
+		 * before the child is committed.
+		 */
+		table_close(relation, NoLock);
+	}
+
+	/*
+	 * If we had no inherited attributes, the result schema is just the
+	 * explicitly declared columns.  Otherwise, we need to merge the declared
+	 * columns into the inherited schema list.  Although, we never have any
+	 * explicitly declared columns if the table is a partition.
+	 */
+	if (inhSchema != NIL)
+	{
+		int			schema_attno = 0;
+
+		foreach(entry, schema)
+		{
+			ColumnDef  *newdef = lfirst(entry);
+			char	   *attributeName = newdef->colname;
+			int			exist_attno;
+
+			schema_attno++;
+
+			/*
+			 * Does it conflict with some previously inherited column?
+			 */
+			exist_attno = findAttrByName(attributeName, inhSchema);
+			if (exist_attno > 0)
+			{
+				ColumnDef  *def;
+				Oid			defTypeId,
+							newTypeId;
+				int32		deftypmod,
+							newtypmod;
+				Oid			defcollid,
+							newcollid;
+
+				/*
+				 * Partitions have only one parent and have no column
+				 * definitions of their own, so conflict should never occur.
+				 */
+				Assert(!is_partition);
+
+				/*
+				 * Yes, try to merge the two column definitions. They must
+				 * have the same type, typmod, and collation.
+				 */
+				if (exist_attno == schema_attno)
+					ereport(NOTICE,
+							(errmsg("merging column \"%s\" with inherited definition",
+									attributeName)));
+				else
+					ereport(NOTICE,
+							(errmsg("moving and merging column \"%s\" with inherited definition", attributeName),
+							 errdetail("User-specified column moved to the position of the inherited column.")));
+				def = (ColumnDef *) list_nth(inhSchema, exist_attno - 1);
+				typenameTypeIdAndMod(NULL, def->typeName, &defTypeId, &deftypmod);
+				typenameTypeIdAndMod(NULL, newdef->typeName, &newTypeId, &newtypmod);
+				if (defTypeId != newTypeId || deftypmod != newtypmod)
+					ereport(ERROR,
+							(errcode(ERRCODE_DATATYPE_MISMATCH),
+							 errmsg("column \"%s\" has a type conflict",
+									attributeName),
+							 errdetail("%s versus %s",
+									   format_type_with_typemod(defTypeId,
+																deftypmod),
+									   format_type_with_typemod(newTypeId,
+																newtypmod))));
+				defcollid = GetColumnDefCollation(NULL, def, defTypeId);
+				newcollid = GetColumnDefCollation(NULL, newdef, newTypeId);
+				if (defcollid != newcollid)
+					ereport(ERROR,
+							(errcode(ERRCODE_COLLATION_MISMATCH),
+							 errmsg("column \"%s\" has a collation conflict",
+									attributeName),
+							 errdetail("\"%s\" versus \"%s\"",
+									   get_collation_name(defcollid),
+									   get_collation_name(newcollid))));
+
+				/*
+				 * Identity is never inherited.  The new column can have an
+				 * identity definition, so we always just take that one.
+				 */
+				def->identity = newdef->identity;
+
+				/* Copy storage parameter */
+				if (def->storage == 0)
+					def->storage = newdef->storage;
+				else if (newdef->storage != 0 && def->storage != newdef->storage)
+					ereport(ERROR,
+							(errcode(ERRCODE_DATATYPE_MISMATCH),
+							 errmsg("column \"%s\" has a storage parameter conflict",
+									attributeName),
+							 errdetail("%s versus %s",
+									   storage_name(def->storage),
+									   storage_name(newdef->storage))));
+
+				/* Copy compression parameter */
+				if (def->compression == NULL)
+					def->compression = newdef->compression;
+				else if (newdef->compression != NULL)
+				{
+					if (strcmp(def->compression, newdef->compression) != 0)
+						ereport(ERROR,
+								(errcode(ERRCODE_DATATYPE_MISMATCH),
+								 errmsg("column \"%s\" has a compression method conflict",
+										attributeName),
+								 errdetail("%s versus %s", def->compression, newdef->compression)));
+				}
+
+				/* Mark the column as locally defined */
+				def->is_local = true;
+				/* Merge of NOT NULL constraints = OR 'em together */
+				def->is_not_null |= newdef->is_not_null;
+
+				/*
+				 * Check for conflicts related to generated columns.
+				 *
+				 * If the parent column is generated, the child column must be
+				 * unadorned and will be made a generated column.  (We could
+				 * in theory allow the child column definition specifying the
+				 * exact same generation expression, but that's a bit
+				 * complicated to implement and doesn't seem very useful.)  We
+				 * also check that the child column doesn't specify a default
+				 * value or identity, which matches the rules for a single
+				 * column in parse_util.c.
+				 */
+				if (def->generated)
+				{
+					if (newdef->generated)
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
+								 errmsg("child column \"%s\" specifies generation expression",
+										def->colname),
+								 errhint("Omit the generation expression in the definition of the child table column to inherit the generation expression from the parent table.")));
+					if (newdef->raw_default && !newdef->generated)
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
+								 errmsg("column \"%s\" inherits from generated column but specifies default",
+										def->colname)));
+					if (newdef->identity)
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
+								 errmsg("column \"%s\" inherits from generated column but specifies identity",
+										def->colname)));
+				}
+
+				/*
+				 * If the parent column is not generated, then take whatever
+				 * the child column definition says.
+				 */
+				else
+				{
+					if (newdef->generated)
+						def->generated = newdef->generated;
+				}
+
+				/* If new def has a default, override previous default */
+				if (newdef->raw_default != NULL)
+				{
+					def->raw_default = newdef->raw_default;
+					def->cooked_default = newdef->cooked_default;
+				}
+			}
+			else
+			{
+				/*
+				 * No, attach new column to result schema
+				 */
+				inhSchema = lappend(inhSchema, newdef);
+			}
+		}
+
+		schema = inhSchema;
+
+		/*
+		 * Check that we haven't exceeded the legal # of columns after merging
+		 * in inherited columns.
+		 */
+		if (list_length(schema) > MaxHeapAttributeNumber)
+			ereport(ERROR,
+					(errcode(ERRCODE_TOO_MANY_COLUMNS),
+					 errmsg("tables can have at most %d columns",
+							MaxHeapAttributeNumber)));
+	}
+
+	/*
+	 * Now that we have the column definition list for a partition, we can
+	 * check whether the columns referenced in the column constraint specs
+	 * actually exist.  Also, we merge NOT NULL and defaults into each
+	 * corresponding column definition.
+	 */
+	if (is_partition)
+	{
+		foreach(entry, saved_schema)
+		{
+			ColumnDef  *restdef = lfirst(entry);
+			bool		found = false;
+			ListCell   *l;
+
+			foreach(l, schema)
+			{
+				ColumnDef  *coldef = lfirst(l);
+
+				if (strcmp(coldef->colname, restdef->colname) == 0)
+				{
+					found = true;
+					coldef->is_not_null |= restdef->is_not_null;
+
+					/*
+					 * Override the parent's default value for this column
+					 * (coldef->cooked_default) with the partition's local
+					 * definition (restdef->raw_default), if there's one. It
+					 * should be physically impossible to get a cooked default
+					 * in the local definition or a raw default in the
+					 * inherited definition, but make sure they're nulls, for
+					 * future-proofing.
+					 */
+					Assert(restdef->cooked_default == NULL);
+					Assert(coldef->raw_default == NULL);
+					if (restdef->raw_default)
+					{
+						coldef->raw_default = restdef->raw_default;
+						coldef->cooked_default = NULL;
+					}
+				}
+			}
+
+			/* complain for constraints on columns not in parent */
+			if (!found)
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_COLUMN),
+						 errmsg("column \"%s\" does not exist",
+								restdef->colname)));
+		}
+	}
+
+	/*
+	 * If we found any conflicting parent default values, check to make sure
+	 * they were overridden by the child.
+	 */
+	if (have_bogus_defaults)
+	{
+		foreach(entry, schema)
+		{
+			ColumnDef  *def = lfirst(entry);
+
+			if (def->cooked_default == &bogus_marker)
+			{
+				if (def->generated)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
+							 errmsg("column \"%s\" inherits conflicting generation expressions",
+									def->colname)));
+				else
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
+							 errmsg("column \"%s\" inherits conflicting default values",
+									def->colname),
+							 errhint("To resolve the conflict, specify a default explicitly.")));
+			}
+		}
+	}
+
+	*supconstr = constraints;
+	return schema;
+}
+
+
+/*
+ * MergeCheckConstraint
+ *		Try to merge an inherited CHECK constraint with previous ones
+ *
+ * If we inherit identically-named constraints from multiple parents, we must
+ * merge them, or throw an error if they don't have identical definitions.
+ *
+ * constraints is a list of CookedConstraint structs for previous constraints.
+ *
+ * Returns true if merged (constraint is a duplicate), or false if it's
+ * got a so-far-unique name, or throws error if conflict.
+ */
+static bool
+MergeCheckConstraint(List *constraints, char *name, Node *expr)
+{
+	ListCell   *lc;
+
+	foreach(lc, constraints)
+	{
+		CookedConstraint *ccon = (CookedConstraint *) lfirst(lc);
+
+		Assert(ccon->contype == CONSTR_CHECK);
+
+		/* Non-matching names never conflict */
+		if (strcmp(ccon->name, name) != 0)
+			continue;
+
+		if (equal(expr, ccon->expr))
+		{
+			/* OK to merge */
+			ccon->inhcount++;
+			return true;
+		}
+
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("check constraint name \"%s\" appears multiple times but with different expressions",
+						name)));
+	}
+
+	return false;
+}
+
+
+/*
+ * StoreCatalogInheritance
+ *		Updates the system catalogs with proper inheritance information.
+ *
+ * supers is a list of the OIDs of the new relation's direct ancestors.
+ */
+static void
+StoreCatalogInheritance(Oid relationId, List *supers,
+						bool child_is_partition)
+{
+	Relation	relation;
+	int32		seqNumber;
+	ListCell   *entry;
+
+	/*
+	 * sanity checks
+	 */
+	AssertArg(OidIsValid(relationId));
+
+	if (supers == NIL)
+		return;
+
+	/*
+	 * Store INHERITS information in pg_inherits using direct ancestors only.
+	 * Also enter dependencies on the direct ancestors, and make sure they are
+	 * marked with relhassubclass = true.
+	 *
+	 * (Once upon a time, both direct and indirect ancestors were found here
+	 * and then entered into pg_ipl.  Since that catalog doesn't exist
+	 * anymore, there's no need to look for indirect ancestors.)
+	 */
+	relation = table_open(InheritsRelationId, RowExclusiveLock);
+
+	seqNumber = 1;
+	foreach(entry, supers)
+	{
+		Oid			parentOid = lfirst_oid(entry);
+
+		StoreCatalogInheritance1(relationId, parentOid, seqNumber, relation,
+								 child_is_partition);
+		seqNumber++;
+	}
+
+	table_close(relation, RowExclusiveLock);
+}
+
+/*
+ * Make catalog entries showing relationId as being an inheritance child
+ * of parentOid.  inhRelation is the already-opened pg_inherits catalog.
+ */
+static void
+StoreCatalogInheritance1(Oid relationId, Oid parentOid,
+						 int32 seqNumber, Relation inhRelation,
+						 bool child_is_partition)
+{
+	ObjectAddress childobject,
+				parentobject;
+
+	/* store the pg_inherits row */
+	StoreSingleInheritance(relationId, parentOid, seqNumber);
+
+	/*
+	 * Store a dependency too
+	 */
+	parentobject.classId = RelationRelationId;
+	parentobject.objectId = parentOid;
+	parentobject.objectSubId = 0;
+	childobject.classId = RelationRelationId;
+	childobject.objectId = relationId;
+	childobject.objectSubId = 0;
+
+	recordDependencyOn(&childobject, &parentobject,
+					   child_dependency_type(child_is_partition));
+
+	/*
+	 * Post creation hook of this inheritance. Since object_access_hook
+	 * doesn't take multiple object identifiers, we relay oid of parent
+	 * relation using auxiliary_id argument.
+	 */
+	InvokeObjectPostAlterHookArg(InheritsRelationId,
+								 relationId, 0,
+								 parentOid, false);
+
+	/*
+	 * Mark the parent as having subclasses.
+	 */
+	SetRelationHasSubclass(parentOid, true);
+}
+
+/*
+ * Look for an existing schema entry with the given name.
+ *
+ * Returns the index (starting with 1) if attribute already exists in schema,
+ * 0 if it doesn't.
+ */
+static int
+findAttrByName(const char *attributeName, List *schema)
+{
+	ListCell   *s;
+	int			i = 1;
+
+	foreach(s, schema)
+	{
+		ColumnDef  *def = lfirst(s);
+
+		if (strcmp(attributeName, def->colname) == 0)
+			return i;
+
+		i++;
+	}
+	return 0;
+}
+
+
+/*
+ * SetRelationHasSubclass
+ *		Set the value of the relation's relhassubclass field in pg_class.
+ *
+ * NOTE: caller must be holding an appropriate lock on the relation.
+ * ShareUpdateExclusiveLock is sufficient.
+ *
+ * NOTE: an important side-effect of this operation is that an SI invalidation
+ * message is sent out to all backends --- including me --- causing plans
+ * referencing the relation to be rebuilt with the new list of children.
+ * This must happen even if we find that no change is needed in the pg_class
+ * row.
+ */
+void
+SetRelationHasSubclass(Oid relationId, bool relhassubclass)
+{
+	Relation	relationRelation;
+	HeapTuple	tuple;
+	Form_pg_class classtuple;
+
+	/*
+	 * Fetch a modifiable copy of the tuple, modify it, update pg_class.
+	 */
+	relationRelation = table_open(RelationRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relationId));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u", relationId);
+	classtuple = (Form_pg_class) GETSTRUCT(tuple);
+
+	if (classtuple->relhassubclass != relhassubclass)
+	{
+		classtuple->relhassubclass = relhassubclass;
+		CatalogTupleUpdate(relationRelation, &tuple->t_self, tuple);
+	}
+	else
+	{
+		/* no need to change tuple, but force relcache rebuild anyway */
+		CacheInvalidateRelcacheByTuple(tuple);
+	}
+
+	heap_freetuple(tuple);
+	table_close(relationRelation, RowExclusiveLock);
+}
+
+/*
+ * CheckRelationTableSpaceMove
+ *		Check if relation can be moved to new tablespace.
+ *
+ * NOTE: The caller must hold AccessExclusiveLock on the relation.
+ *
+ * Returns true if the relation can be moved to the new tablespace; raises
+ * an error if it is not possible to do the move; returns false if the move
+ * would have no effect.
+ */
+bool
+CheckRelationTableSpaceMove(Relation rel, Oid newTableSpaceId)
+{
+	Oid			oldTableSpaceId;
+
+	/*
+	 * No work if no change in tablespace.  Note that MyDatabaseTableSpace is
+	 * stored as 0.
+	 */
+	oldTableSpaceId = rel->rd_rel->reltablespace;
+	if (newTableSpaceId == oldTableSpaceId ||
+		(newTableSpaceId == MyDatabaseTableSpace && oldTableSpaceId == 0))
+		return false;
+
+	/*
+	 * We cannot support moving mapped relations into different tablespaces.
+	 * (In particular this eliminates all shared catalogs.)
+	 */
+	if (RelationIsMapped(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot move system relation \"%s\"",
+						RelationGetRelationName(rel))));
+
+	/* Cannot move a non-shared relation into pg_global */
+	if (newTableSpaceId == GLOBALTABLESPACE_OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("only shared relations can be placed in pg_global tablespace")));
+
+	/*
+	 * Do not allow moving temp tables of other backends ... their local
+	 * buffer manager is not going to cope.
+	 */
+	if (RELATION_IS_OTHER_TEMP(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot move temporary tables of other sessions")));
+
+	return true;
+}
+
+/*
+ * SetRelationTableSpace
+ *		Set new reltablespace and relfilenode in pg_class entry.
+ *
+ * newTableSpaceId is the new tablespace for the relation, and
+ * newRelFileNode its new filenode.  If newRelFileNode is InvalidOid,
+ * this field is not updated.
+ *
+ * NOTE: The caller must hold AccessExclusiveLock on the relation.
+ *
+ * The caller of this routine had better check if a relation can be
+ * moved to this new tablespace by calling CheckRelationTableSpaceMove()
+ * first, and is responsible for making the change visible with
+ * CommandCounterIncrement().
+ */
+void
+SetRelationTableSpace(Relation rel,
+					  Oid newTableSpaceId,
+					  Oid newRelFileNode)
+{
+	Relation	pg_class;
+	HeapTuple	tuple;
+	Form_pg_class rd_rel;
+	Oid			reloid = RelationGetRelid(rel);
+
+	Assert(CheckRelationTableSpaceMove(rel, newTableSpaceId));
+
+	/* Get a modifiable copy of the relation's pg_class row. */
+	pg_class = table_open(RelationRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(reloid));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u", reloid);
+	rd_rel = (Form_pg_class) GETSTRUCT(tuple);
+
+	/* Update the pg_class row. */
+	rd_rel->reltablespace = (newTableSpaceId == MyDatabaseTableSpace) ?
+		InvalidOid : newTableSpaceId;
+	if (OidIsValid(newRelFileNode))
+		rd_rel->relfilenode = newRelFileNode;
+	CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
+
+	/*
+	 * Record dependency on tablespace.  This is only required for relations
+	 * that have no physical storage.
+	 */
+	if (!RELKIND_HAS_STORAGE(rel->rd_rel->relkind))
+		changeDependencyOnTablespace(RelationRelationId, reloid,
+									 rd_rel->reltablespace);
+
+	heap_freetuple(tuple);
+	table_close(pg_class, RowExclusiveLock);
+}
+
+/*
+ *		renameatt_check			- basic sanity checks before attribute rename
+ */
+static void
+renameatt_check(Oid myrelid, Form_pg_class classform, bool recursing)
+{
+	char		relkind = classform->relkind;
+
+	if (classform->reloftype && !recursing)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot rename column of typed table")));
+
+	/*
+	 * Renaming the columns of sequences or toast tables doesn't actually
+	 * break anything from the system's point of view, since internal
+	 * references are by attnum.  But it doesn't seem right to allow users to
+	 * change names that are hardcoded into the system, hence the following
+	 * restriction.
+	 */
+	if (relkind != RELKIND_RELATION &&
+		relkind != RELKIND_VIEW &&
+		relkind != RELKIND_MATVIEW &&
+		relkind != RELKIND_COMPOSITE_TYPE &&
+		relkind != RELKIND_INDEX &&
+		relkind != RELKIND_PARTITIONED_INDEX &&
+		relkind != RELKIND_FOREIGN_TABLE &&
+		relkind != RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot rename columns of relation \"%s\"",
+						NameStr(classform->relname)),
+				 errdetail_relkind_not_supported(relkind)));
+
+	/*
+	 * permissions checking.  only the owner of a class can change its schema.
+	 */
+	if (!pg_class_ownercheck(myrelid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(myrelid)),
+					   NameStr(classform->relname));
+	if (!allowSystemTableMods && IsSystemClass(myrelid, classform))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						NameStr(classform->relname))));
+}
+
+/*
+ *		renameatt_internal		- workhorse for renameatt
+ *
+ * Return value is the attribute number in the 'myrelid' relation.
+ */
+static AttrNumber
+renameatt_internal(Oid myrelid,
+				   const char *oldattname,
+				   const char *newattname,
+				   bool recurse,
+				   bool recursing,
+				   int expected_parents,
+				   DropBehavior behavior)
+{
+	Relation	targetrelation;
+	Relation	attrelation;
+	HeapTuple	atttup;
+	Form_pg_attribute attform;
+	AttrNumber	attnum;
+
+	/*
+	 * Grab an exclusive lock on the target table, which we will NOT release
+	 * until end of transaction.
+	 */
+	targetrelation = relation_open(myrelid, AccessExclusiveLock);
+	renameatt_check(myrelid, RelationGetForm(targetrelation), recursing);
+
+	/*
+	 * if the 'recurse' flag is set then we are supposed to rename this
+	 * attribute in all classes that inherit from 'relname' (as well as in
+	 * 'relname').
+	 *
+	 * any permissions or problems with duplicate attributes will cause the
+	 * whole transaction to abort, which is what we want -- all or nothing.
+	 */
+	if (recurse)
+	{
+		List	   *child_oids,
+				   *child_numparents;
+		ListCell   *lo,
+				   *li;
+
+		/*
+		 * we need the number of parents for each child so that the recursive
+		 * calls to renameatt() can determine whether there are any parents
+		 * outside the inheritance hierarchy being processed.
+		 */
+		child_oids = find_all_inheritors(myrelid, AccessExclusiveLock,
+										 &child_numparents);
+
+		/*
+		 * find_all_inheritors does the recursive search of the inheritance
+		 * hierarchy, so all we have to do is process all of the relids in the
+		 * list that it returns.
+		 */
+		forboth(lo, child_oids, li, child_numparents)
+		{
+			Oid			childrelid = lfirst_oid(lo);
+			int			numparents = lfirst_int(li);
+
+			if (childrelid == myrelid)
+				continue;
+			/* note we need not recurse again */
+			renameatt_internal(childrelid, oldattname, newattname, false, true, numparents, behavior);
+		}
+	}
+	else
+	{
+		/*
+		 * If we are told not to recurse, there had better not be any child
+		 * tables; else the rename would put them out of step.
+		 *
+		 * expected_parents will only be 0 if we are not already recursing.
+		 */
+		if (expected_parents == 0 &&
+			find_inheritance_children(myrelid, NoLock) != NIL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("inherited column \"%s\" must be renamed in child tables too",
+							oldattname)));
+	}
+
+	/* rename attributes in typed tables of composite type */
+	if (targetrelation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+	{
+		List	   *child_oids;
+		ListCell   *lo;
+
+		child_oids = find_typed_table_dependencies(targetrelation->rd_rel->reltype,
+												   RelationGetRelationName(targetrelation),
+												   behavior);
+
+		foreach(lo, child_oids)
+			renameatt_internal(lfirst_oid(lo), oldattname, newattname, true, true, 0, behavior);
+	}
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+	atttup = SearchSysCacheCopyAttName(myrelid, oldattname);
+	if (!HeapTupleIsValid(atttup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" does not exist",
+						oldattname)));
+	attform = (Form_pg_attribute) GETSTRUCT(atttup);
+
+	attnum = attform->attnum;
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot rename system column \"%s\"",
+						oldattname)));
+
+	/*
+	 * if the attribute is inherited, forbid the renaming.  if this is a
+	 * top-level call to renameatt(), then expected_parents will be 0, so the
+	 * effect of this code will be to prohibit the renaming if the attribute
+	 * is inherited at all.  if this is a recursive call to renameatt(),
+	 * expected_parents will be the number of parents the current relation has
+	 * within the inheritance hierarchy being processed, so we'll prohibit the
+	 * renaming only if there are additional parents from elsewhere.
+	 */
+	if (attform->attinhcount > expected_parents)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("cannot rename inherited column \"%s\"",
+						oldattname)));
+
+	/* new name should not already exist */
+	(void) check_for_column_name_collision(targetrelation, newattname, false);
+
+	/* apply the update */
+	namestrcpy(&(attform->attname), newattname);
+
+	CatalogTupleUpdate(attrelation, &atttup->t_self, atttup);
+
+	InvokeObjectPostAlterHook(RelationRelationId, myrelid, attnum);
+
+	heap_freetuple(atttup);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	relation_close(targetrelation, NoLock); /* close rel but keep lock */
+
+	return attnum;
+}
+
+/*
+ * Perform permissions and integrity checks before acquiring a relation lock.
+ */
+static void
+RangeVarCallbackForRenameAttribute(const RangeVar *rv, Oid relid, Oid oldrelid,
+								   void *arg)
+{
+	HeapTuple	tuple;
+	Form_pg_class form;
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(tuple))
+		return;					/* concurrently dropped */
+	form = (Form_pg_class) GETSTRUCT(tuple);
+	renameatt_check(relid, form, false);
+	ReleaseSysCache(tuple);
+}
+
+/*
+ *		renameatt		- changes the name of an attribute in a relation
+ *
+ * The returned ObjectAddress is that of the renamed column.
+ */
+ObjectAddress
+renameatt(RenameStmt *stmt)
+{
+	Oid			relid;
+	AttrNumber	attnum;
+	ObjectAddress address;
+
+	/* lock level taken here should match renameatt_internal */
+	relid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock,
+									 stmt->missing_ok ? RVR_MISSING_OK : 0,
+									 RangeVarCallbackForRenameAttribute,
+									 NULL);
+
+	if (!OidIsValid(relid))
+	{
+		ereport(NOTICE,
+				(errmsg("relation \"%s\" does not exist, skipping",
+						stmt->relation->relname)));
+		return InvalidObjectAddress;
+	}
+
+	attnum =
+		renameatt_internal(relid,
+						   stmt->subname,	/* old att name */
+						   stmt->newname,	/* new att name */
+						   stmt->relation->inh, /* recursive? */
+						   false,	/* recursing? */
+						   0,	/* expected inhcount */
+						   stmt->behavior);
+
+	ObjectAddressSubSet(address, RelationRelationId, relid, attnum);
+
+	return address;
+}
+
+/*
+ * same logic as renameatt_internal
+ */
+static ObjectAddress
+rename_constraint_internal(Oid myrelid,
+						   Oid mytypid,
+						   const char *oldconname,
+						   const char *newconname,
+						   bool recurse,
+						   bool recursing,
+						   int expected_parents)
+{
+	Relation	targetrelation = NULL;
+	Oid			constraintOid;
+	HeapTuple	tuple;
+	Form_pg_constraint con;
+	ObjectAddress address;
+
+	AssertArg(!myrelid || !mytypid);
+
+	if (mytypid)
+	{
+		constraintOid = get_domain_constraint_oid(mytypid, oldconname, false);
+	}
+	else
+	{
+		targetrelation = relation_open(myrelid, AccessExclusiveLock);
+
+		/*
+		 * don't tell it whether we're recursing; we allow changing typed
+		 * tables here
+		 */
+		renameatt_check(myrelid, RelationGetForm(targetrelation), false);
+
+		constraintOid = get_relation_constraint_oid(myrelid, oldconname, false);
+	}
+
+	tuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(constraintOid));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for constraint %u",
+			 constraintOid);
+	con = (Form_pg_constraint) GETSTRUCT(tuple);
+
+	if (myrelid && con->contype == CONSTRAINT_CHECK && !con->connoinherit)
+	{
+		if (recurse)
+		{
+			List	   *child_oids,
+					   *child_numparents;
+			ListCell   *lo,
+					   *li;
+
+			child_oids = find_all_inheritors(myrelid, AccessExclusiveLock,
+											 &child_numparents);
+
+			forboth(lo, child_oids, li, child_numparents)
+			{
+				Oid			childrelid = lfirst_oid(lo);
+				int			numparents = lfirst_int(li);
+
+				if (childrelid == myrelid)
+					continue;
+
+				rename_constraint_internal(childrelid, InvalidOid, oldconname, newconname, false, true, numparents);
+			}
+		}
+		else
+		{
+			if (expected_parents == 0 &&
+				find_inheritance_children(myrelid, NoLock) != NIL)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+						 errmsg("inherited constraint \"%s\" must be renamed in child tables too",
+								oldconname)));
+		}
+
+		if (con->coninhcount > expected_parents)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot rename inherited constraint \"%s\"",
+							oldconname)));
+	}
+
+	if (con->conindid
+		&& (con->contype == CONSTRAINT_PRIMARY
+			|| con->contype == CONSTRAINT_UNIQUE
+			|| con->contype == CONSTRAINT_EXCLUSION))
+		/* rename the index; this renames the constraint as well */
+		RenameRelationInternal(con->conindid, newconname, false, true);
+	else
+		RenameConstraintById(constraintOid, newconname);
+
+	ObjectAddressSet(address, ConstraintRelationId, constraintOid);
+
+	ReleaseSysCache(tuple);
+
+	if (targetrelation)
+	{
+		/*
+		 * Invalidate relcache so as others can see the new constraint name.
+		 */
+		CacheInvalidateRelcache(targetrelation);
+
+		relation_close(targetrelation, NoLock); /* close rel but keep lock */
+	}
+
+	return address;
+}
+
+ObjectAddress
+RenameConstraint(RenameStmt *stmt)
+{
+	Oid			relid = InvalidOid;
+	Oid			typid = InvalidOid;
+
+	if (stmt->renameType == OBJECT_DOMCONSTRAINT)
+	{
+		Relation	rel;
+		HeapTuple	tup;
+
+		typid = typenameTypeId(NULL, makeTypeNameFromNameList(castNode(List, stmt->object)));
+		rel = table_open(TypeRelationId, RowExclusiveLock);
+		tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid));
+		if (!HeapTupleIsValid(tup))
+			elog(ERROR, "cache lookup failed for type %u", typid);
+		checkDomainOwner(tup);
+		ReleaseSysCache(tup);
+		table_close(rel, NoLock);
+	}
+	else
+	{
+		/* lock level taken here should match rename_constraint_internal */
+		relid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock,
+										 stmt->missing_ok ? RVR_MISSING_OK : 0,
+										 RangeVarCallbackForRenameAttribute,
+										 NULL);
+		if (!OidIsValid(relid))
+		{
+			ereport(NOTICE,
+					(errmsg("relation \"%s\" does not exist, skipping",
+							stmt->relation->relname)));
+			return InvalidObjectAddress;
+		}
+	}
+
+	return
+		rename_constraint_internal(relid, typid,
+								   stmt->subname,
+								   stmt->newname,
+								   (stmt->relation &&
+									stmt->relation->inh),	/* recursive? */
+								   false,	/* recursing? */
+								   0 /* expected inhcount */ );
+}
+
+/*
+ * Execute ALTER TABLE/INDEX/SEQUENCE/VIEW/MATERIALIZED VIEW/FOREIGN TABLE
+ * RENAME
+ */
+ObjectAddress
+RenameRelation(RenameStmt *stmt)
+{
+	bool		is_index_stmt = stmt->renameType == OBJECT_INDEX;
+	Oid			relid;
+	ObjectAddress address;
+
+	/*
+	 * Grab an exclusive lock on the target table, index, sequence, view,
+	 * materialized view, or foreign table, which we will NOT release until
+	 * end of transaction.
+	 *
+	 * Lock level used here should match RenameRelationInternal, to avoid lock
+	 * escalation.  However, because ALTER INDEX can be used with any relation
+	 * type, we mustn't believe without verification.
+	 */
+	for (;;)
+	{
+		LOCKMODE	lockmode;
+		char		relkind;
+		bool		obj_is_index;
+
+		lockmode = is_index_stmt ? ShareUpdateExclusiveLock : AccessExclusiveLock;
+
+		relid = RangeVarGetRelidExtended(stmt->relation, lockmode,
+										 stmt->missing_ok ? RVR_MISSING_OK : 0,
+										 RangeVarCallbackForAlterRelation,
+										 (void *) stmt);
+
+		if (!OidIsValid(relid))
+		{
+			ereport(NOTICE,
+					(errmsg("relation \"%s\" does not exist, skipping",
+							stmt->relation->relname)));
+			return InvalidObjectAddress;
+		}
+
+		/*
+		 * We allow mismatched statement and object types (e.g., ALTER INDEX
+		 * to rename a table), but we might've used the wrong lock level.  If
+		 * that happens, retry with the correct lock level.  We don't bother
+		 * if we already acquired AccessExclusiveLock with an index, however.
+		 */
+		relkind = get_rel_relkind(relid);
+		obj_is_index = (relkind == RELKIND_INDEX ||
+						relkind == RELKIND_PARTITIONED_INDEX);
+		if (obj_is_index || is_index_stmt == obj_is_index)
+			break;
+
+		UnlockRelationOid(relid, lockmode);
+		is_index_stmt = obj_is_index;
+	}
+
+	/* Do the work */
+	RenameRelationInternal(relid, stmt->newname, false, is_index_stmt);
+
+	ObjectAddressSet(address, RelationRelationId, relid);
+
+	return address;
+}
+
+/*
+ *		RenameRelationInternal - change the name of a relation
+ */
+void
+RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bool is_index)
+{
+	Relation	targetrelation;
+	Relation	relrelation;	/* for RELATION relation */
+	HeapTuple	reltup;
+	Form_pg_class relform;
+	Oid			namespaceId;
+
+	/*
+	 * Grab a lock on the target relation, which we will NOT release until end
+	 * of transaction.  We need at least a self-exclusive lock so that
+	 * concurrent DDL doesn't overwrite the rename if they start updating
+	 * while still seeing the old version.  The lock also guards against
+	 * triggering relcache reloads in concurrent sessions, which might not
+	 * handle this information changing under them.  For indexes, we can use a
+	 * reduced lock level because RelationReloadIndexInfo() handles indexes
+	 * specially.
+	 */
+	targetrelation = relation_open(myrelid, is_index ? ShareUpdateExclusiveLock : AccessExclusiveLock);
+	namespaceId = RelationGetNamespace(targetrelation);
+
+	/*
+	 * Find relation's pg_class tuple, and make sure newrelname isn't in use.
+	 */
+	relrelation = table_open(RelationRelationId, RowExclusiveLock);
+
+	reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(myrelid));
+	if (!HeapTupleIsValid(reltup))	/* shouldn't happen */
+		elog(ERROR, "cache lookup failed for relation %u", myrelid);
+	relform = (Form_pg_class) GETSTRUCT(reltup);
+
+	if (get_relname_relid(newrelname, namespaceId) != InvalidOid)
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_TABLE),
+				 errmsg("relation \"%s\" already exists",
+						newrelname)));
+
+	/*
+	 * RenameRelation is careful not to believe the caller's idea of the
+	 * relation kind being handled.  We don't have to worry about this, but
+	 * let's not be totally oblivious to it.  We can process an index as
+	 * not-an-index, but not the other way around.
+	 */
+	Assert(!is_index ||
+		   is_index == (targetrelation->rd_rel->relkind == RELKIND_INDEX ||
+						targetrelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX));
+
+	/*
+	 * Update pg_class tuple with new relname.  (Scribbling on reltup is OK
+	 * because it's a copy...)
+	 */
+	namestrcpy(&(relform->relname), newrelname);
+
+	CatalogTupleUpdate(relrelation, &reltup->t_self, reltup);
+
+	InvokeObjectPostAlterHookArg(RelationRelationId, myrelid, 0,
+								 InvalidOid, is_internal);
+
+	heap_freetuple(reltup);
+	table_close(relrelation, RowExclusiveLock);
+
+	/*
+	 * Also rename the associated type, if any.
+	 */
+	if (OidIsValid(targetrelation->rd_rel->reltype))
+		RenameTypeInternal(targetrelation->rd_rel->reltype,
+						   newrelname, namespaceId);
+
+	/*
+	 * Also rename the associated constraint, if any.
+	 */
+	if (targetrelation->rd_rel->relkind == RELKIND_INDEX ||
+		targetrelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
+	{
+		Oid			constraintId = get_index_constraint(myrelid);
+
+		if (OidIsValid(constraintId))
+			RenameConstraintById(constraintId, newrelname);
+	}
+
+	/*
+	 * Close rel, but keep lock!
+	 */
+	relation_close(targetrelation, NoLock);
+}
+
+/*
+ *		ResetRelRewrite - reset relrewrite
+ */
+void
+ResetRelRewrite(Oid myrelid)
+{
+	Relation	relrelation;	/* for RELATION relation */
+	HeapTuple	reltup;
+	Form_pg_class relform;
+
+	/*
+	 * Find relation's pg_class tuple.
+	 */
+	relrelation = table_open(RelationRelationId, RowExclusiveLock);
+
+	reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(myrelid));
+	if (!HeapTupleIsValid(reltup))	/* shouldn't happen */
+		elog(ERROR, "cache lookup failed for relation %u", myrelid);
+	relform = (Form_pg_class) GETSTRUCT(reltup);
+
+	/*
+	 * Update pg_class tuple.
+	 */
+	relform->relrewrite = InvalidOid;
+
+	CatalogTupleUpdate(relrelation, &reltup->t_self, reltup);
+
+	heap_freetuple(reltup);
+	table_close(relrelation, RowExclusiveLock);
+}
+
+/*
+ * Disallow ALTER TABLE (and similar commands) when the current backend has
+ * any open reference to the target table besides the one just acquired by
+ * the calling command; this implies there's an open cursor or active plan.
+ * We need this check because our lock doesn't protect us against stomping
+ * on our own foot, only other people's feet!
+ *
+ * For ALTER TABLE, the only case known to cause serious trouble is ALTER
+ * COLUMN TYPE, and some changes are obviously pretty benign, so this could
+ * possibly be relaxed to only error out for certain types of alterations.
+ * But the use-case for allowing any of these things is not obvious, so we
+ * won't work hard at it for now.
+ *
+ * We also reject these commands if there are any pending AFTER trigger events
+ * for the rel.  This is certainly necessary for the rewriting variants of
+ * ALTER TABLE, because they don't preserve tuple TIDs and so the pending
+ * events would try to fetch the wrong tuples.  It might be overly cautious
+ * in other cases, but again it seems better to err on the side of paranoia.
+ *
+ * REINDEX calls this with "rel" referencing the index to be rebuilt; here
+ * we are worried about active indexscans on the index.  The trigger-event
+ * check can be skipped, since we are doing no damage to the parent table.
+ *
+ * The statement name (eg, "ALTER TABLE") is passed for use in error messages.
+ */
+void
+CheckTableNotInUse(Relation rel, const char *stmt)
+{
+	int			expected_refcnt;
+
+	expected_refcnt = rel->rd_isnailed ? 2 : 1;
+	if (rel->rd_refcnt != expected_refcnt)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_IN_USE),
+		/* translator: first %s is a SQL command, eg ALTER TABLE */
+				 errmsg("cannot %s \"%s\" because it is being used by active queries in this session",
+						stmt, RelationGetRelationName(rel))));
+
+	if (rel->rd_rel->relkind != RELKIND_INDEX &&
+		rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX &&
+		AfterTriggerPendingOnRel(RelationGetRelid(rel)))
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_IN_USE),
+		/* translator: first %s is a SQL command, eg ALTER TABLE */
+				 errmsg("cannot %s \"%s\" because it has pending trigger events",
+						stmt, RelationGetRelationName(rel))));
+}
+
+/*
+ * AlterTableLookupRelation
+ *		Look up, and lock, the OID for the relation named by an alter table
+ *		statement.
+ */
+Oid
+AlterTableLookupRelation(AlterTableStmt *stmt, LOCKMODE lockmode)
+{
+	return RangeVarGetRelidExtended(stmt->relation, lockmode,
+									stmt->missing_ok ? RVR_MISSING_OK : 0,
+									RangeVarCallbackForAlterRelation,
+									(void *) stmt);
+}
+
+/*
+ * AlterTable
+ *		Execute ALTER TABLE, which can be a list of subcommands
+ *
+ * ALTER TABLE is performed in three phases:
+ *		1. Examine subcommands and perform pre-transformation checking.
+ *		2. Validate and transform subcommands, and update system catalogs.
+ *		3. Scan table(s) to check new constraints, and optionally recopy
+ *		   the data into new table(s).
+ * Phase 3 is not performed unless one or more of the subcommands requires
+ * it.  The intention of this design is to allow multiple independent
+ * updates of the table schema to be performed with only one pass over the
+ * data.
+ *
+ * ATPrepCmd performs phase 1.  A "work queue" entry is created for
+ * each table to be affected (there may be multiple affected tables if the
+ * commands traverse a table inheritance hierarchy).  Also we do preliminary
+ * validation of the subcommands.  Because earlier subcommands may change
+ * the catalog state seen by later commands, there are limits to what can
+ * be done in this phase.  Generally, this phase acquires table locks,
+ * checks permissions and relkind, and recurses to find child tables.
+ *
+ * ATRewriteCatalogs performs phase 2 for each affected table.
+ * Certain subcommands need to be performed before others to avoid
+ * unnecessary conflicts; for example, DROP COLUMN should come before
+ * ADD COLUMN.  Therefore phase 1 divides the subcommands into multiple
+ * lists, one for each logical "pass" of phase 2.
+ *
+ * ATRewriteTables performs phase 3 for those tables that need it.
+ *
+ * For most subcommand types, phases 2 and 3 do no explicit recursion,
+ * since phase 1 already does it.  However, for certain subcommand types
+ * it is only possible to determine how to recurse at phase 2 time; for
+ * those cases, phase 1 sets the cmd->recurse flag (or, in some older coding,
+ * changes the command subtype of a "Recurse" variant XXX to be cleaned up.)
+ *
+ * Thanks to the magic of MVCC, an error anywhere along the way rolls back
+ * the whole operation; we don't have to do anything special to clean up.
+ *
+ * The caller must lock the relation, with an appropriate lock level
+ * for the subcommands requested, using AlterTableGetLockLevel(stmt->cmds)
+ * or higher. We pass the lock level down
+ * so that we can apply it recursively to inherited tables. Note that the
+ * lock level we want as we recurse might well be higher than required for
+ * that specific subcommand. So we pass down the overall lock requirement,
+ * rather than reassess it at lower levels.
+ *
+ * The caller also provides a "context" which is to be passed back to
+ * utility.c when we need to execute a subcommand such as CREATE INDEX.
+ * Some of the fields therein, such as the relid, are used here as well.
+ */
+void
+AlterTable(AlterTableStmt *stmt, LOCKMODE lockmode,
+		   AlterTableUtilityContext *context)
+{
+	Relation	rel;
+
+	/* Caller is required to provide an adequate lock. */
+	rel = relation_open(context->relid, NoLock);
+
+	CheckTableNotInUse(rel, "ALTER TABLE");
+
+	ATController(stmt, rel, stmt->cmds, stmt->relation->inh, lockmode, context);
+}
+
+/*
+ * AlterTableInternal
+ *
+ * ALTER TABLE with target specified by OID
+ *
+ * We do not reject if the relation is already open, because it's quite
+ * likely that one or more layers of caller have it open.  That means it
+ * is unsafe to use this entry point for alterations that could break
+ * existing query plans.  On the assumption it's not used for such, we
+ * don't have to reject pending AFTER triggers, either.
+ *
+ * Also, since we don't have an AlterTableUtilityContext, this cannot be
+ * used for any subcommand types that require parse transformation or
+ * could generate subcommands that have to be passed to ProcessUtility.
+ */
+void
+AlterTableInternal(Oid relid, List *cmds, bool recurse)
+{
+	Relation	rel;
+	LOCKMODE	lockmode = AlterTableGetLockLevel(cmds);
+
+	rel = relation_open(relid, lockmode);
+
+	EventTriggerAlterTableRelid(relid);
+
+	ATController(NULL, rel, cmds, recurse, lockmode, NULL);
+}
+
+/*
+ * AlterTableGetLockLevel
+ *
+ * Sets the overall lock level required for the supplied list of subcommands.
+ * Policy for doing this set according to needs of AlterTable(), see
+ * comments there for overall explanation.
+ *
+ * Function is called before and after parsing, so it must give same
+ * answer each time it is called. Some subcommands are transformed
+ * into other subcommand types, so the transform must never be made to a
+ * lower lock level than previously assigned. All transforms are noted below.
+ *
+ * Since this is called before we lock the table we cannot use table metadata
+ * to influence the type of lock we acquire.
+ *
+ * There should be no lockmodes hardcoded into the subcommand functions. All
+ * lockmode decisions for ALTER TABLE are made here only. The one exception is
+ * ALTER TABLE RENAME which is treated as a different statement type T_RenameStmt
+ * and does not travel through this section of code and cannot be combined with
+ * any of the subcommands given here.
+ *
+ * Note that Hot Standby only knows about AccessExclusiveLocks on the primary
+ * so any changes that might affect SELECTs running on standbys need to use
+ * AccessExclusiveLocks even if you think a lesser lock would do, unless you
+ * have a solution for that also.
+ *
+ * Also note that pg_dump uses only an AccessShareLock, meaning that anything
+ * that takes a lock less than AccessExclusiveLock can change object definitions
+ * while pg_dump is running. Be careful to check that the appropriate data is
+ * derived by pg_dump using an MVCC snapshot, rather than syscache lookups,
+ * otherwise we might end up with an inconsistent dump that can't restore.
+ */
+LOCKMODE
+AlterTableGetLockLevel(List *cmds)
+{
+	/*
+	 * This only works if we read catalog tables using MVCC snapshots.
+	 */
+	ListCell   *lcmd;
+	LOCKMODE	lockmode = ShareUpdateExclusiveLock;
+
+	foreach(lcmd, cmds)
+	{
+		AlterTableCmd *cmd = (AlterTableCmd *) lfirst(lcmd);
+		LOCKMODE	cmd_lockmode = AccessExclusiveLock; /* default for compiler */
+
+		switch (cmd->subtype)
+		{
+				/*
+				 * These subcommands rewrite the heap, so require full locks.
+				 */
+			case AT_AddColumn:	/* may rewrite heap, in some cases and visible
+								 * to SELECT */
+			case AT_SetAccessMethod:	/* must rewrite heap */
+			case AT_SetTableSpace:	/* must rewrite heap */
+			case AT_AlterColumnType:	/* must rewrite heap */
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * These subcommands may require addition of toast tables. If
+				 * we add a toast table to a table currently being scanned, we
+				 * might miss data added to the new toast table by concurrent
+				 * insert transactions.
+				 */
+			case AT_SetStorage: /* may add toast tables, see
+								 * ATRewriteCatalogs() */
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * Removing constraints can affect SELECTs that have been
+				 * optimized assuming the constraint holds true. See also
+				 * CloneFkReferenced.
+				 */
+			case AT_DropConstraint: /* as DROP INDEX */
+			case AT_DropNotNull:	/* may change some SQL plans */
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * Subcommands that may be visible to concurrent SELECTs
+				 */
+			case AT_DropColumn: /* change visible to SELECT */
+			case AT_AddColumnToView:	/* CREATE VIEW */
+			case AT_DropOids:	/* used to equiv to DropColumn */
+			case AT_EnableAlwaysRule:	/* may change SELECT rules */
+			case AT_EnableReplicaRule:	/* may change SELECT rules */
+			case AT_EnableRule: /* may change SELECT rules */
+			case AT_DisableRule:	/* may change SELECT rules */
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * Changing owner may remove implicit SELECT privileges
+				 */
+			case AT_ChangeOwner:	/* change visible to SELECT */
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * Changing foreign table options may affect optimization.
+				 */
+			case AT_GenericOptions:
+			case AT_AlterColumnGenericOptions:
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * These subcommands affect write operations only.
+				 */
+			case AT_EnableTrig:
+			case AT_EnableAlwaysTrig:
+			case AT_EnableReplicaTrig:
+			case AT_EnableTrigAll:
+			case AT_EnableTrigUser:
+			case AT_DisableTrig:
+			case AT_DisableTrigAll:
+			case AT_DisableTrigUser:
+				cmd_lockmode = ShareRowExclusiveLock;
+				break;
+
+				/*
+				 * These subcommands affect write operations only. XXX
+				 * Theoretically, these could be ShareRowExclusiveLock.
+				 */
+			case AT_ColumnDefault:
+			case AT_CookedColumnDefault:
+			case AT_AlterConstraint:
+			case AT_AddIndex:	/* from ADD CONSTRAINT */
+			case AT_AddIndexConstraint:
+			case AT_ReplicaIdentity:
+			case AT_SetNotNull:
+			case AT_EnableRowSecurity:
+			case AT_DisableRowSecurity:
+			case AT_ForceRowSecurity:
+			case AT_NoForceRowSecurity:
+			case AT_AddIdentity:
+			case AT_DropIdentity:
+			case AT_SetIdentity:
+			case AT_DropExpression:
+			case AT_SetCompression:
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+			case AT_AddConstraint:
+			case AT_AddConstraintRecurse:	/* becomes AT_AddConstraint */
+			case AT_ReAddConstraint:	/* becomes AT_AddConstraint */
+			case AT_ReAddDomainConstraint:	/* becomes AT_AddConstraint */
+				if (IsA(cmd->def, Constraint))
+				{
+					Constraint *con = (Constraint *) cmd->def;
+
+					switch (con->contype)
+					{
+						case CONSTR_EXCLUSION:
+						case CONSTR_PRIMARY:
+						case CONSTR_UNIQUE:
+
+							/*
+							 * Cases essentially the same as CREATE INDEX. We
+							 * could reduce the lock strength to ShareLock if
+							 * we can work out how to allow concurrent catalog
+							 * updates. XXX Might be set down to
+							 * ShareRowExclusiveLock but requires further
+							 * analysis.
+							 */
+							cmd_lockmode = AccessExclusiveLock;
+							break;
+						case CONSTR_FOREIGN:
+
+							/*
+							 * We add triggers to both tables when we add a
+							 * Foreign Key, so the lock level must be at least
+							 * as strong as CREATE TRIGGER.
+							 */
+							cmd_lockmode = ShareRowExclusiveLock;
+							break;
+
+						default:
+							cmd_lockmode = AccessExclusiveLock;
+					}
+				}
+				break;
+
+				/*
+				 * These subcommands affect inheritance behaviour. Queries
+				 * started before us will continue to see the old inheritance
+				 * behaviour, while queries started after we commit will see
+				 * new behaviour. No need to prevent reads or writes to the
+				 * subtable while we hook it up though. Changing the TupDesc
+				 * may be a problem, so keep highest lock.
+				 */
+			case AT_AddInherit:
+			case AT_DropInherit:
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * These subcommands affect implicit row type conversion. They
+				 * have affects similar to CREATE/DROP CAST on queries. don't
+				 * provide for invalidating parse trees as a result of such
+				 * changes, so we keep these at AccessExclusiveLock.
+				 */
+			case AT_AddOf:
+			case AT_DropOf:
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * Only used by CREATE OR REPLACE VIEW which must conflict
+				 * with an SELECTs currently using the view.
+				 */
+			case AT_ReplaceRelOptions:
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+				/*
+				 * These subcommands affect general strategies for performance
+				 * and maintenance, though don't change the semantic results
+				 * from normal data reads and writes. Delaying an ALTER TABLE
+				 * behind currently active writes only delays the point where
+				 * the new strategy begins to take effect, so there is no
+				 * benefit in waiting. In this case the minimum restriction
+				 * applies: we don't currently allow concurrent catalog
+				 * updates.
+				 */
+			case AT_SetStatistics:	/* Uses MVCC in getTableAttrs() */
+			case AT_ClusterOn:	/* Uses MVCC in getIndexes() */
+			case AT_DropCluster:	/* Uses MVCC in getIndexes() */
+			case AT_SetOptions: /* Uses MVCC in getTableAttrs() */
+			case AT_ResetOptions:	/* Uses MVCC in getTableAttrs() */
+				cmd_lockmode = ShareUpdateExclusiveLock;
+				break;
+
+			case AT_SetLogged:
+			case AT_SetUnLogged:
+				cmd_lockmode = AccessExclusiveLock;
+				break;
+
+			case AT_ValidateConstraint: /* Uses MVCC in getConstraints() */
+				cmd_lockmode = ShareUpdateExclusiveLock;
+				break;
+
+				/*
+				 * Rel options are more complex than first appears. Options
+				 * are set here for tables, views and indexes; for historical
+				 * reasons these can all be used with ALTER TABLE, so we can't
+				 * decide between them using the basic grammar.
+				 */
+			case AT_SetRelOptions:	/* Uses MVCC in getIndexes() and
+									 * getTables() */
+			case AT_ResetRelOptions:	/* Uses MVCC in getIndexes() and
+										 * getTables() */
+				cmd_lockmode = AlterTableGetRelOptionsLockLevel((List *) cmd->def);
+				break;
+
+			case AT_AttachPartition:
+				cmd_lockmode = ShareUpdateExclusiveLock;
+				break;
+
+			case AT_DetachPartition:
+				if (((PartitionCmd *) cmd->def)->concurrent)
+					cmd_lockmode = ShareUpdateExclusiveLock;
+				else
+					cmd_lockmode = AccessExclusiveLock;
+				break;
+
+			case AT_DetachPartitionFinalize:
+				cmd_lockmode = ShareUpdateExclusiveLock;
+				break;
+
+			case AT_CheckNotNull:
+
+				/*
+				 * This only examines the table's schema; but lock must be
+				 * strong enough to prevent concurrent DROP NOT NULL.
+				 */
+				cmd_lockmode = AccessShareLock;
+				break;
+
+			default:			/* oops */
+				elog(ERROR, "unrecognized alter table type: %d",
+					 (int) cmd->subtype);
+				break;
+		}
+
+		/*
+		 * Take the greatest lockmode from any subcommand
+		 */
+		if (cmd_lockmode > lockmode)
+			lockmode = cmd_lockmode;
+	}
+
+	return lockmode;
+}
+
+/*
+ * ATController provides top level control over the phases.
+ *
+ * parsetree is passed in to allow it to be passed to event triggers
+ * when requested.
+ */
+static void
+ATController(AlterTableStmt *parsetree,
+			 Relation rel, List *cmds, bool recurse, LOCKMODE lockmode,
+			 AlterTableUtilityContext *context)
+{
+	List	   *wqueue = NIL;
+	ListCell   *lcmd;
+
+	/* Phase 1: preliminary examination of commands, create work queue */
+	foreach(lcmd, cmds)
+	{
+		AlterTableCmd *cmd = (AlterTableCmd *) lfirst(lcmd);
+
+		ATPrepCmd(&wqueue, rel, cmd, recurse, false, lockmode, context);
+	}
+
+	/* Close the relation, but keep lock until commit */
+	relation_close(rel, NoLock);
+
+	/* Phase 2: update system catalogs */
+	ATRewriteCatalogs(&wqueue, lockmode, context);
+
+	/* Phase 3: scan/rewrite tables as needed, and run afterStmts */
+	ATRewriteTables(parsetree, &wqueue, lockmode, context);
+}
+
+/*
+ * ATPrepCmd
+ *
+ * Traffic cop for ALTER TABLE Phase 1 operations, including simple
+ * recursion and permission checks.
+ *
+ * Caller must have acquired appropriate lock type on relation already.
+ * This lock should be held until commit.
+ */
+static void
+ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
+		  bool recurse, bool recursing, LOCKMODE lockmode,
+		  AlterTableUtilityContext *context)
+{
+	AlteredTableInfo *tab;
+	int			pass = AT_PASS_UNSET;
+
+	/* Find or create work queue entry for this table */
+	tab = ATGetQueueEntry(wqueue, rel);
+
+	/*
+	 * Disallow any ALTER TABLE other than ALTER TABLE DETACH FINALIZE on
+	 * partitions that are pending detach.
+	 */
+	if (rel->rd_rel->relispartition &&
+		cmd->subtype != AT_DetachPartitionFinalize &&
+		PartitionHasPendingDetach(RelationGetRelid(rel)))
+		ereport(ERROR,
+				errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				errmsg("cannot alter partition \"%s\" with an incomplete detach",
+					   RelationGetRelationName(rel)),
+				errhint("Use ALTER TABLE ... DETACH PARTITION ... FINALIZE to complete the pending detach operation."));
+
+	/*
+	 * Copy the original subcommand for each table, so we can scribble on it.
+	 * This avoids conflicts when different child tables need to make
+	 * different parse transformations (for example, the same column may have
+	 * different column numbers in different children).
+	 */
+	cmd = copyObject(cmd);
+
+	/*
+	 * Do permissions and relkind checking, recursion to child tables if
+	 * needed, and any additional phase-1 processing needed.  (But beware of
+	 * adding any processing that looks at table details that another
+	 * subcommand could change.  In some cases we reject multiple subcommands
+	 * that could try to change the same state in contrary ways.)
+	 */
+	switch (cmd->subtype)
+	{
+		case AT_AddColumn:		/* ADD COLUMN */
+			ATSimplePermissions(cmd->subtype, rel,
+								ATT_TABLE | ATT_COMPOSITE_TYPE | ATT_FOREIGN_TABLE);
+			ATPrepAddColumn(wqueue, rel, recurse, recursing, false, cmd,
+							lockmode, context);
+			/* Recursion occurs during execution phase */
+			pass = AT_PASS_ADD_COL;
+			break;
+		case AT_AddColumnToView:	/* add column via CREATE OR REPLACE VIEW */
+			ATSimplePermissions(cmd->subtype, rel, ATT_VIEW);
+			ATPrepAddColumn(wqueue, rel, recurse, recursing, true, cmd,
+							lockmode, context);
+			/* Recursion occurs during execution phase */
+			pass = AT_PASS_ADD_COL;
+			break;
+		case AT_ColumnDefault:	/* ALTER COLUMN DEFAULT */
+
+			/*
+			 * We allow defaults on views so that INSERT into a view can have
+			 * default-ish behavior.  This works because the rewriter
+			 * substitutes default values into INSERTs before it expands
+			 * rules.
+			 */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE);
+			ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+			/* No command-specific prep needed */
+			pass = cmd->def ? AT_PASS_ADD_OTHERCONSTR : AT_PASS_DROP;
+			break;
+		case AT_CookedColumnDefault:	/* add a pre-cooked default */
+			/* This is currently used only in CREATE TABLE */
+			/* (so the permission check really isn't necessary) */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			pass = AT_PASS_ADD_OTHERCONSTR;
+			break;
+		case AT_AddIdentity:
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			pass = AT_PASS_ADD_OTHERCONSTR;
+			break;
+		case AT_SetIdentity:
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			/* This should run after AddIdentity, so do it in MISC pass */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_DropIdentity:
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			pass = AT_PASS_DROP;
+			break;
+		case AT_DropNotNull:	/* ALTER COLUMN DROP NOT NULL */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			ATPrepDropNotNull(rel, recurse, recursing);
+			ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+			pass = AT_PASS_DROP;
+			break;
+		case AT_SetNotNull:		/* ALTER COLUMN SET NOT NULL */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			/* Need command-specific recursion decision */
+			ATPrepSetNotNull(wqueue, rel, cmd, recurse, recursing,
+							 lockmode, context);
+			pass = AT_PASS_COL_ATTRS;
+			break;
+		case AT_CheckNotNull:	/* check column is already marked NOT NULL */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+			/* No command-specific prep needed */
+			pass = AT_PASS_COL_ATTRS;
+			break;
+		case AT_DropExpression: /* ALTER COLUMN DROP EXPRESSION */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+			ATPrepDropExpression(rel, cmd, recurse, recursing, lockmode);
+			pass = AT_PASS_DROP;
+			break;
+		case AT_SetStatistics:	/* ALTER COLUMN SET STATISTICS */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW | ATT_INDEX | ATT_PARTITIONED_INDEX | ATT_FOREIGN_TABLE);
+			ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_SetOptions:		/* ALTER COLUMN SET ( options ) */
+		case AT_ResetOptions:	/* ALTER COLUMN RESET ( options ) */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW | ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_SetStorage:		/* ALTER COLUMN SET STORAGE */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW | ATT_FOREIGN_TABLE);
+			ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_SetCompression: /* ALTER COLUMN SET COMPRESSION */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW);
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_DropColumn:		/* DROP COLUMN */
+			ATSimplePermissions(cmd->subtype, rel,
+								ATT_TABLE | ATT_COMPOSITE_TYPE | ATT_FOREIGN_TABLE);
+			ATPrepDropColumn(wqueue, rel, recurse, recursing, cmd,
+							 lockmode, context);
+			/* Recursion occurs during execution phase */
+			pass = AT_PASS_DROP;
+			break;
+		case AT_AddIndex:		/* ADD INDEX */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE);
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			pass = AT_PASS_ADD_INDEX;
+			break;
+		case AT_AddConstraint:	/* ADD CONSTRAINT */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			/* Recursion occurs during execution phase */
+			/* No command-specific prep needed except saving recurse flag */
+			if (recurse)
+				cmd->subtype = AT_AddConstraintRecurse;
+			pass = AT_PASS_ADD_CONSTR;
+			break;
+		case AT_AddIndexConstraint: /* ADD CONSTRAINT USING INDEX */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE);
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			pass = AT_PASS_ADD_INDEXCONSTR;
+			break;
+		case AT_DropConstraint: /* DROP CONSTRAINT */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			ATCheckPartitionsNotInUse(rel, lockmode);
+			/* Other recursion occurs during execution phase */
+			/* No command-specific prep needed except saving recurse flag */
+			if (recurse)
+				cmd->subtype = AT_DropConstraintRecurse;
+			pass = AT_PASS_DROP;
+			break;
+		case AT_AlterColumnType:	/* ALTER COLUMN TYPE */
+			ATSimplePermissions(cmd->subtype, rel,
+								ATT_TABLE | ATT_COMPOSITE_TYPE | ATT_FOREIGN_TABLE);
+			/* See comments for ATPrepAlterColumnType */
+			cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, recurse, lockmode,
+									  AT_PASS_UNSET, context);
+			Assert(cmd != NULL);
+			/* Performs own recursion */
+			ATPrepAlterColumnType(wqueue, tab, rel, recurse, recursing, cmd,
+								  lockmode, context);
+			pass = AT_PASS_ALTER_TYPE;
+			break;
+		case AT_AlterColumnGenericOptions:
+			ATSimplePermissions(cmd->subtype, rel, ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_ChangeOwner:	/* ALTER OWNER */
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_ClusterOn:		/* CLUSTER ON */
+		case AT_DropCluster:	/* SET WITHOUT CLUSTER */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW);
+			/* These commands never recurse */
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_SetLogged:		/* SET LOGGED */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_SEQUENCE);
+			if (tab->chgPersistence)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot change persistence setting twice")));
+			tab->chgPersistence = ATPrepChangePersistence(rel, true);
+			/* force rewrite if necessary; see comment in ATRewriteTables */
+			if (tab->chgPersistence)
+			{
+				tab->rewrite |= AT_REWRITE_ALTER_PERSISTENCE;
+				tab->newrelpersistence = RELPERSISTENCE_PERMANENT;
+			}
+			pass = AT_PASS_MISC;
+			break;
+		case AT_SetUnLogged:	/* SET UNLOGGED */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_SEQUENCE);
+			if (tab->chgPersistence)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot change persistence setting twice")));
+			tab->chgPersistence = ATPrepChangePersistence(rel, false);
+			/* force rewrite if necessary; see comment in ATRewriteTables */
+			if (tab->chgPersistence)
+			{
+				tab->rewrite |= AT_REWRITE_ALTER_PERSISTENCE;
+				tab->newrelpersistence = RELPERSISTENCE_UNLOGGED;
+			}
+			pass = AT_PASS_MISC;
+			break;
+		case AT_DropOids:		/* SET WITHOUT OIDS */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			pass = AT_PASS_DROP;
+			break;
+		case AT_SetAccessMethod:	/* SET ACCESS METHOD */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW);
+
+			/* partitioned tables don't have an access method */
+			if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+				ereport(ERROR,
+						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+						 errmsg("cannot change access method of a partitioned table")));
+
+			/* check if another access method change was already requested */
+			if (OidIsValid(tab->newAccessMethod))
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot have multiple SET ACCESS METHOD subcommands")));
+
+			ATPrepSetAccessMethod(tab, rel, cmd->name);
+			pass = AT_PASS_MISC;	/* does not matter; no work in Phase 2 */
+			break;
+		case AT_SetTableSpace:	/* SET TABLESPACE */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW | ATT_INDEX |
+								ATT_PARTITIONED_INDEX);
+			/* This command never recurses */
+			ATPrepSetTableSpace(tab, rel, cmd->name, lockmode);
+			pass = AT_PASS_MISC;	/* doesn't actually matter */
+			break;
+		case AT_SetRelOptions:	/* SET (...) */
+		case AT_ResetRelOptions:	/* RESET (...) */
+		case AT_ReplaceRelOptions:	/* reset them all, then set just these */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_VIEW | ATT_MATVIEW | ATT_INDEX);
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_AddInherit:		/* INHERIT */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			ATPrepAddInherit(rel);
+			pass = AT_PASS_MISC;
+			break;
+		case AT_DropInherit:	/* NO INHERIT */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_AlterConstraint:	/* ALTER CONSTRAINT */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE);
+			/* Recursion occurs during execution phase */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_ValidateConstraint: /* VALIDATE CONSTRAINT */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			/* Recursion occurs during execution phase */
+			/* No command-specific prep needed except saving recurse flag */
+			if (recurse)
+				cmd->subtype = AT_ValidateConstraintRecurse;
+			pass = AT_PASS_MISC;
+			break;
+		case AT_ReplicaIdentity:	/* REPLICA IDENTITY ... */
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW);
+			pass = AT_PASS_MISC;
+			/* This command never recurses */
+			/* No command-specific prep needed */
+			break;
+		case AT_EnableTrig:		/* ENABLE TRIGGER variants */
+		case AT_EnableAlwaysTrig:
+		case AT_EnableReplicaTrig:
+		case AT_EnableTrigAll:
+		case AT_EnableTrigUser:
+		case AT_DisableTrig:	/* DISABLE TRIGGER variants */
+		case AT_DisableTrigAll:
+		case AT_DisableTrigUser:
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+			/* Set up recursion for phase 2; no other prep needed */
+			if (recurse)
+				cmd->recurse = true;
+			pass = AT_PASS_MISC;
+			break;
+		case AT_EnableRule:		/* ENABLE/DISABLE RULE variants */
+		case AT_EnableAlwaysRule:
+		case AT_EnableReplicaRule:
+		case AT_DisableRule:
+		case AT_AddOf:			/* OF */
+		case AT_DropOf:			/* NOT OF */
+		case AT_EnableRowSecurity:
+		case AT_DisableRowSecurity:
+		case AT_ForceRowSecurity:
+		case AT_NoForceRowSecurity:
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE);
+			/* These commands never recurse */
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_GenericOptions:
+			ATSimplePermissions(cmd->subtype, rel, ATT_FOREIGN_TABLE);
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_AttachPartition:
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_PARTITIONED_INDEX);
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_DetachPartition:
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE);
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		case AT_DetachPartitionFinalize:
+			ATSimplePermissions(cmd->subtype, rel, ATT_TABLE);
+			/* No command-specific prep needed */
+			pass = AT_PASS_MISC;
+			break;
+		default:				/* oops */
+			elog(ERROR, "unrecognized alter table type: %d",
+				 (int) cmd->subtype);
+			pass = AT_PASS_UNSET;	/* keep compiler quiet */
+			break;
+	}
+	Assert(pass > AT_PASS_UNSET);
+
+	/* Add the subcommand to the appropriate list for phase 2 */
+	tab->subcmds[pass] = lappend(tab->subcmds[pass], cmd);
+}
+
+/*
+ * ATRewriteCatalogs
+ *
+ * Traffic cop for ALTER TABLE Phase 2 operations.  Subcommands are
+ * dispatched in a "safe" execution order (designed to avoid unnecessary
+ * conflicts).
+ */
+static void
+ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode,
+				  AlterTableUtilityContext *context)
+{
+	int			pass;
+	ListCell   *ltab;
+
+	/*
+	 * We process all the tables "in parallel", one pass at a time.  This is
+	 * needed because we may have to propagate work from one table to another
+	 * (specifically, ALTER TYPE on a foreign key's PK has to dispatch the
+	 * re-adding of the foreign key constraint to the other table).  Work can
+	 * only be propagated into later passes, however.
+	 */
+	for (pass = 0; pass < AT_NUM_PASSES; pass++)
+	{
+		/* Go through each table that needs to be processed */
+		foreach(ltab, *wqueue)
+		{
+			AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+			List	   *subcmds = tab->subcmds[pass];
+			ListCell   *lcmd;
+
+			if (subcmds == NIL)
+				continue;
+
+			/*
+			 * Open the relation and store it in tab.  This allows subroutines
+			 * close and reopen, if necessary.  Appropriate lock was obtained
+			 * by phase 1, needn't get it again.
+			 */
+			tab->rel = relation_open(tab->relid, NoLock);
+
+			foreach(lcmd, subcmds)
+				ATExecCmd(wqueue, tab,
+						  lfirst_node(AlterTableCmd, lcmd),
+						  lockmode, pass, context);
+
+			/*
+			 * After the ALTER TYPE pass, do cleanup work (this is not done in
+			 * ATExecAlterColumnType since it should be done only once if
+			 * multiple columns of a table are altered).
+			 */
+			if (pass == AT_PASS_ALTER_TYPE)
+				ATPostAlterTypeCleanup(wqueue, tab, lockmode);
+
+			if (tab->rel)
+			{
+				relation_close(tab->rel, NoLock);
+				tab->rel = NULL;
+			}
+		}
+	}
+
+	/* Check to see if a toast table must be added. */
+	foreach(ltab, *wqueue)
+	{
+		AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+
+		/*
+		 * If the table is source table of ATTACH PARTITION command, we did
+		 * not modify anything about it that will change its toasting
+		 * requirement, so no need to check.
+		 */
+		if (((tab->relkind == RELKIND_RELATION ||
+			  tab->relkind == RELKIND_PARTITIONED_TABLE) &&
+			 tab->partition_constraint == NULL) ||
+			tab->relkind == RELKIND_MATVIEW)
+			AlterTableCreateToastTable(tab->relid, (Datum) 0, lockmode);
+	}
+}
+
+/*
+ * ATExecCmd: dispatch a subcommand to appropriate execution routine
+ */
+static void
+ATExecCmd(List **wqueue, AlteredTableInfo *tab,
+		  AlterTableCmd *cmd, LOCKMODE lockmode, int cur_pass,
+		  AlterTableUtilityContext *context)
+{
+	ObjectAddress address = InvalidObjectAddress;
+	Relation	rel = tab->rel;
+
+	switch (cmd->subtype)
+	{
+		case AT_AddColumn:		/* ADD COLUMN */
+		case AT_AddColumnToView:	/* add column via CREATE OR REPLACE VIEW */
+			address = ATExecAddColumn(wqueue, tab, rel, &cmd,
+									  false, false,
+									  lockmode, cur_pass, context);
+			break;
+		case AT_AddColumnRecurse:
+			address = ATExecAddColumn(wqueue, tab, rel, &cmd,
+									  true, false,
+									  lockmode, cur_pass, context);
+			break;
+		case AT_ColumnDefault:	/* ALTER COLUMN DEFAULT */
+			address = ATExecColumnDefault(rel, cmd->name, cmd->def, lockmode);
+			break;
+		case AT_CookedColumnDefault:	/* add a pre-cooked default */
+			address = ATExecCookedColumnDefault(rel, cmd->num, cmd->def);
+			break;
+		case AT_AddIdentity:
+			cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode,
+									  cur_pass, context);
+			Assert(cmd != NULL);
+			address = ATExecAddIdentity(rel, cmd->name, cmd->def, lockmode);
+			break;
+		case AT_SetIdentity:
+			cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode,
+									  cur_pass, context);
+			Assert(cmd != NULL);
+			address = ATExecSetIdentity(rel, cmd->name, cmd->def, lockmode);
+			break;
+		case AT_DropIdentity:
+			address = ATExecDropIdentity(rel, cmd->name, cmd->missing_ok, lockmode);
+			break;
+		case AT_DropNotNull:	/* ALTER COLUMN DROP NOT NULL */
+			address = ATExecDropNotNull(rel, cmd->name, lockmode);
+			break;
+		case AT_SetNotNull:		/* ALTER COLUMN SET NOT NULL */
+			address = ATExecSetNotNull(tab, rel, cmd->name, lockmode);
+			break;
+		case AT_CheckNotNull:	/* check column is already marked NOT NULL */
+			ATExecCheckNotNull(tab, rel, cmd->name, lockmode);
+			break;
+		case AT_DropExpression:
+			address = ATExecDropExpression(rel, cmd->name, cmd->missing_ok, lockmode);
+			break;
+		case AT_SetStatistics:	/* ALTER COLUMN SET STATISTICS */
+			address = ATExecSetStatistics(rel, cmd->name, cmd->num, cmd->def, lockmode);
+			break;
+		case AT_SetOptions:		/* ALTER COLUMN SET ( options ) */
+			address = ATExecSetOptions(rel, cmd->name, cmd->def, false, lockmode);
+			break;
+		case AT_ResetOptions:	/* ALTER COLUMN RESET ( options ) */
+			address = ATExecSetOptions(rel, cmd->name, cmd->def, true, lockmode);
+			break;
+		case AT_SetStorage:		/* ALTER COLUMN SET STORAGE */
+			address = ATExecSetStorage(rel, cmd->name, cmd->def, lockmode);
+			break;
+		case AT_SetCompression:
+			address = ATExecSetCompression(tab, rel, cmd->name, cmd->def,
+										   lockmode);
+			break;
+		case AT_DropColumn:		/* DROP COLUMN */
+			address = ATExecDropColumn(wqueue, rel, cmd->name,
+									   cmd->behavior, false, false,
+									   cmd->missing_ok, lockmode,
+									   NULL);
+			break;
+		case AT_DropColumnRecurse:	/* DROP COLUMN with recursion */
+			address = ATExecDropColumn(wqueue, rel, cmd->name,
+									   cmd->behavior, true, false,
+									   cmd->missing_ok, lockmode,
+									   NULL);
+			break;
+		case AT_AddIndex:		/* ADD INDEX */
+			address = ATExecAddIndex(tab, rel, (IndexStmt *) cmd->def, false,
+									 lockmode);
+			break;
+		case AT_ReAddIndex:		/* ADD INDEX */
+			address = ATExecAddIndex(tab, rel, (IndexStmt *) cmd->def, true,
+									 lockmode);
+			break;
+		case AT_ReAddStatistics:	/* ADD STATISTICS */
+			address = ATExecAddStatistics(tab, rel, (CreateStatsStmt *) cmd->def,
+										  true, lockmode);
+			break;
+		case AT_AddConstraint:	/* ADD CONSTRAINT */
+			/* Transform the command only during initial examination */
+			if (cur_pass == AT_PASS_ADD_CONSTR)
+				cmd = ATParseTransformCmd(wqueue, tab, rel, cmd,
+										  false, lockmode,
+										  cur_pass, context);
+			/* Depending on constraint type, might be no more work to do now */
+			if (cmd != NULL)
+				address =
+					ATExecAddConstraint(wqueue, tab, rel,
+										(Constraint *) cmd->def,
+										false, false, lockmode);
+			break;
+		case AT_AddConstraintRecurse:	/* ADD CONSTRAINT with recursion */
+			/* Transform the command only during initial examination */
+			if (cur_pass == AT_PASS_ADD_CONSTR)
+				cmd = ATParseTransformCmd(wqueue, tab, rel, cmd,
+										  true, lockmode,
+										  cur_pass, context);
+			/* Depending on constraint type, might be no more work to do now */
+			if (cmd != NULL)
+				address =
+					ATExecAddConstraint(wqueue, tab, rel,
+										(Constraint *) cmd->def,
+										true, false, lockmode);
+			break;
+		case AT_ReAddConstraint:	/* Re-add pre-existing check constraint */
+			address =
+				ATExecAddConstraint(wqueue, tab, rel, (Constraint *) cmd->def,
+									true, true, lockmode);
+			break;
+		case AT_ReAddDomainConstraint:	/* Re-add pre-existing domain check
+										 * constraint */
+			address =
+				AlterDomainAddConstraint(((AlterDomainStmt *) cmd->def)->typeName,
+										 ((AlterDomainStmt *) cmd->def)->def,
+										 NULL);
+			break;
+		case AT_ReAddComment:	/* Re-add existing comment */
+			address = CommentObject((CommentStmt *) cmd->def);
+			break;
+		case AT_AddIndexConstraint: /* ADD CONSTRAINT USING INDEX */
+			address = ATExecAddIndexConstraint(tab, rel, (IndexStmt *) cmd->def,
+											   lockmode);
+			break;
+		case AT_AlterConstraint:	/* ALTER CONSTRAINT */
+			address = ATExecAlterConstraint(rel, cmd, false, false, lockmode);
+			break;
+		case AT_ValidateConstraint: /* VALIDATE CONSTRAINT */
+			address = ATExecValidateConstraint(wqueue, rel, cmd->name, false,
+											   false, lockmode);
+			break;
+		case AT_ValidateConstraintRecurse:	/* VALIDATE CONSTRAINT with
+											 * recursion */
+			address = ATExecValidateConstraint(wqueue, rel, cmd->name, true,
+											   false, lockmode);
+			break;
+		case AT_DropConstraint: /* DROP CONSTRAINT */
+			ATExecDropConstraint(rel, cmd->name, cmd->behavior,
+								 false, false,
+								 cmd->missing_ok, lockmode);
+			break;
+		case AT_DropConstraintRecurse:	/* DROP CONSTRAINT with recursion */
+			ATExecDropConstraint(rel, cmd->name, cmd->behavior,
+								 true, false,
+								 cmd->missing_ok, lockmode);
+			break;
+		case AT_AlterColumnType:	/* ALTER COLUMN TYPE */
+			/* parse transformation was done earlier */
+			address = ATExecAlterColumnType(tab, rel, cmd, lockmode);
+			break;
+		case AT_AlterColumnGenericOptions:	/* ALTER COLUMN OPTIONS */
+			address =
+				ATExecAlterColumnGenericOptions(rel, cmd->name,
+												(List *) cmd->def, lockmode);
+			break;
+		case AT_ChangeOwner:	/* ALTER OWNER */
+			ATExecChangeOwner(RelationGetRelid(rel),
+							  get_rolespec_oid(cmd->newowner, false),
+							  false, lockmode);
+			break;
+		case AT_ClusterOn:		/* CLUSTER ON */
+			address = ATExecClusterOn(rel, cmd->name, lockmode);
+			break;
+		case AT_DropCluster:	/* SET WITHOUT CLUSTER */
+			ATExecDropCluster(rel, lockmode);
+			break;
+		case AT_SetLogged:		/* SET LOGGED */
+		case AT_SetUnLogged:	/* SET UNLOGGED */
+			break;
+		case AT_DropOids:		/* SET WITHOUT OIDS */
+			/* nothing to do here, oid columns don't exist anymore */
+			break;
+		case AT_SetAccessMethod:	/* SET ACCESS METHOD */
+			/* handled specially in Phase 3 */
+			break;
+		case AT_SetTableSpace:	/* SET TABLESPACE */
+
+			/*
+			 * Only do this for partitioned tables and indexes, for which this
+			 * is just a catalog change.  Other relation types which have
+			 * storage are handled by Phase 3.
+			 */
+			if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
+				rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
+				ATExecSetTableSpaceNoStorage(rel, tab->newTableSpace);
+
+			break;
+		case AT_SetRelOptions:	/* SET (...) */
+		case AT_ResetRelOptions:	/* RESET (...) */
+		case AT_ReplaceRelOptions:	/* replace entire option list */
+			ATExecSetRelOptions(rel, (List *) cmd->def, cmd->subtype, lockmode);
+			break;
+		case AT_EnableTrig:		/* ENABLE TRIGGER name */
+			ATExecEnableDisableTrigger(rel, cmd->name,
+									   TRIGGER_FIRES_ON_ORIGIN, false,
+									   cmd->recurse,
+									   lockmode);
+			break;
+		case AT_EnableAlwaysTrig:	/* ENABLE ALWAYS TRIGGER name */
+			ATExecEnableDisableTrigger(rel, cmd->name,
+									   TRIGGER_FIRES_ALWAYS, false,
+									   cmd->recurse,
+									   lockmode);
+			break;
+		case AT_EnableReplicaTrig:	/* ENABLE REPLICA TRIGGER name */
+			ATExecEnableDisableTrigger(rel, cmd->name,
+									   TRIGGER_FIRES_ON_REPLICA, false,
+									   cmd->recurse,
+									   lockmode);
+			break;
+		case AT_DisableTrig:	/* DISABLE TRIGGER name */
+			ATExecEnableDisableTrigger(rel, cmd->name,
+									   TRIGGER_DISABLED, false,
+									   cmd->recurse,
+									   lockmode);
+			break;
+		case AT_EnableTrigAll:	/* ENABLE TRIGGER ALL */
+			ATExecEnableDisableTrigger(rel, NULL,
+									   TRIGGER_FIRES_ON_ORIGIN, false,
+									   cmd->recurse,
+									   lockmode);
+			break;
+		case AT_DisableTrigAll: /* DISABLE TRIGGER ALL */
+			ATExecEnableDisableTrigger(rel, NULL,
+									   TRIGGER_DISABLED, false,
+									   cmd->recurse,
+									   lockmode);
+			break;
+		case AT_EnableTrigUser: /* ENABLE TRIGGER USER */
+			ATExecEnableDisableTrigger(rel, NULL,
+									   TRIGGER_FIRES_ON_ORIGIN, true,
+									   cmd->recurse,
+									   lockmode);
+			break;
+		case AT_DisableTrigUser:	/* DISABLE TRIGGER USER */
+			ATExecEnableDisableTrigger(rel, NULL,
+									   TRIGGER_DISABLED, true,
+									   cmd->recurse,
+									   lockmode);
+			break;
+
+		case AT_EnableRule:		/* ENABLE RULE name */
+			ATExecEnableDisableRule(rel, cmd->name,
+									RULE_FIRES_ON_ORIGIN, lockmode);
+			break;
+		case AT_EnableAlwaysRule:	/* ENABLE ALWAYS RULE name */
+			ATExecEnableDisableRule(rel, cmd->name,
+									RULE_FIRES_ALWAYS, lockmode);
+			break;
+		case AT_EnableReplicaRule:	/* ENABLE REPLICA RULE name */
+			ATExecEnableDisableRule(rel, cmd->name,
+									RULE_FIRES_ON_REPLICA, lockmode);
+			break;
+		case AT_DisableRule:	/* DISABLE RULE name */
+			ATExecEnableDisableRule(rel, cmd->name,
+									RULE_DISABLED, lockmode);
+			break;
+
+		case AT_AddInherit:
+			address = ATExecAddInherit(rel, (RangeVar *) cmd->def, lockmode);
+			break;
+		case AT_DropInherit:
+			address = ATExecDropInherit(rel, (RangeVar *) cmd->def, lockmode);
+			break;
+		case AT_AddOf:
+			address = ATExecAddOf(rel, (TypeName *) cmd->def, lockmode);
+			break;
+		case AT_DropOf:
+			ATExecDropOf(rel, lockmode);
+			break;
+		case AT_ReplicaIdentity:
+			ATExecReplicaIdentity(rel, (ReplicaIdentityStmt *) cmd->def, lockmode);
+			break;
+		case AT_EnableRowSecurity:
+			ATExecSetRowSecurity(rel, true);
+			break;
+		case AT_DisableRowSecurity:
+			ATExecSetRowSecurity(rel, false);
+			break;
+		case AT_ForceRowSecurity:
+			ATExecForceNoForceRowSecurity(rel, true);
+			break;
+		case AT_NoForceRowSecurity:
+			ATExecForceNoForceRowSecurity(rel, false);
+			break;
+		case AT_GenericOptions:
+			ATExecGenericOptions(rel, (List *) cmd->def);
+			break;
+		case AT_AttachPartition:
+			cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode,
+									  cur_pass, context);
+			Assert(cmd != NULL);
+			if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+				ATExecAttachPartition(wqueue, rel, (PartitionCmd *) cmd->def,
+									  context);
+			else
+				ATExecAttachPartitionIdx(wqueue, rel,
+										 ((PartitionCmd *) cmd->def)->name);
+			break;
+		case AT_DetachPartition:
+			cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode,
+									  cur_pass, context);
+			Assert(cmd != NULL);
+			/* ATPrepCmd ensures it must be a table */
+			Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+			ATExecDetachPartition(wqueue, tab, rel,
+								  ((PartitionCmd *) cmd->def)->name,
+								  ((PartitionCmd *) cmd->def)->concurrent);
+			break;
+		case AT_DetachPartitionFinalize:
+			ATExecDetachPartitionFinalize(rel, ((PartitionCmd *) cmd->def)->name);
+			break;
+		default:				/* oops */
+			elog(ERROR, "unrecognized alter table type: %d",
+				 (int) cmd->subtype);
+			break;
+	}
+
+	/*
+	 * Report the subcommand to interested event triggers.
+	 */
+	if (cmd)
+		EventTriggerCollectAlterTableSubcmd((Node *) cmd, address);
+
+	/*
+	 * Bump the command counter to ensure the next subcommand in the sequence
+	 * can see the changes so far
+	 */
+	CommandCounterIncrement();
+}
+
+/*
+ * ATParseTransformCmd: perform parse transformation for one subcommand
+ *
+ * Returns the transformed subcommand tree, if there is one, else NULL.
+ *
+ * The parser may hand back additional AlterTableCmd(s) and/or other
+ * utility statements, either before or after the original subcommand.
+ * Other AlterTableCmds are scheduled into the appropriate slot of the
+ * AlteredTableInfo (they had better be for later passes than the current one).
+ * Utility statements that are supposed to happen before the AlterTableCmd
+ * are executed immediately.  Those that are supposed to happen afterwards
+ * are added to the tab->afterStmts list to be done at the very end.
+ */
+static AlterTableCmd *
+ATParseTransformCmd(List **wqueue, AlteredTableInfo *tab, Relation rel,
+					AlterTableCmd *cmd, bool recurse, LOCKMODE lockmode,
+					int cur_pass, AlterTableUtilityContext *context)
+{
+	AlterTableCmd *newcmd = NULL;
+	AlterTableStmt *atstmt = makeNode(AlterTableStmt);
+	List	   *beforeStmts;
+	List	   *afterStmts;
+	ListCell   *lc;
+
+	/* Gin up an AlterTableStmt with just this subcommand and this table */
+	atstmt->relation =
+		makeRangeVar(get_namespace_name(RelationGetNamespace(rel)),
+					 pstrdup(RelationGetRelationName(rel)),
+					 -1);
+	atstmt->relation->inh = recurse;
+	atstmt->cmds = list_make1(cmd);
+	atstmt->objtype = OBJECT_TABLE; /* needn't be picky here */
+	atstmt->missing_ok = false;
+
+	/* Transform the AlterTableStmt */
+	atstmt = transformAlterTableStmt(RelationGetRelid(rel),
+									 atstmt,
+									 context->queryString,
+									 &beforeStmts,
+									 &afterStmts);
+
+	/* Execute any statements that should happen before these subcommand(s) */
+	foreach(lc, beforeStmts)
+	{
+		Node	   *stmt = (Node *) lfirst(lc);
+
+		ProcessUtilityForAlterTable(stmt, context);
+		CommandCounterIncrement();
+	}
+
+	/* Examine the transformed subcommands and schedule them appropriately */
+	foreach(lc, atstmt->cmds)
+	{
+		AlterTableCmd *cmd2 = lfirst_node(AlterTableCmd, lc);
+		int			pass;
+
+		/*
+		 * This switch need only cover the subcommand types that can be added
+		 * by parse_utilcmd.c; otherwise, we'll use the default strategy of
+		 * executing the subcommand immediately, as a substitute for the
+		 * original subcommand.  (Note, however, that this does cause
+		 * AT_AddConstraint subcommands to be rescheduled into later passes,
+		 * which is important for index and foreign key constraints.)
+		 *
+		 * We assume we needn't do any phase-1 checks for added subcommands.
+		 */
+		switch (cmd2->subtype)
+		{
+			case AT_SetNotNull:
+				/* Need command-specific recursion decision */
+				ATPrepSetNotNull(wqueue, rel, cmd2,
+								 recurse, false,
+								 lockmode, context);
+				pass = AT_PASS_COL_ATTRS;
+				break;
+			case AT_AddIndex:
+				/* This command never recurses */
+				/* No command-specific prep needed */
+				pass = AT_PASS_ADD_INDEX;
+				break;
+			case AT_AddIndexConstraint:
+				/* This command never recurses */
+				/* No command-specific prep needed */
+				pass = AT_PASS_ADD_INDEXCONSTR;
+				break;
+			case AT_AddConstraint:
+				/* Recursion occurs during execution phase */
+				if (recurse)
+					cmd2->subtype = AT_AddConstraintRecurse;
+				switch (castNode(Constraint, cmd2->def)->contype)
+				{
+					case CONSTR_PRIMARY:
+					case CONSTR_UNIQUE:
+					case CONSTR_EXCLUSION:
+						pass = AT_PASS_ADD_INDEXCONSTR;
+						break;
+					default:
+						pass = AT_PASS_ADD_OTHERCONSTR;
+						break;
+				}
+				break;
+			case AT_AlterColumnGenericOptions:
+				/* This command never recurses */
+				/* No command-specific prep needed */
+				pass = AT_PASS_MISC;
+				break;
+			default:
+				pass = cur_pass;
+				break;
+		}
+
+		if (pass < cur_pass)
+		{
+			/* Cannot schedule into a pass we already finished */
+			elog(ERROR, "ALTER TABLE scheduling failure: too late for pass %d",
+				 pass);
+		}
+		else if (pass > cur_pass)
+		{
+			/* OK, queue it up for later */
+			tab->subcmds[pass] = lappend(tab->subcmds[pass], cmd2);
+		}
+		else
+		{
+			/*
+			 * We should see at most one subcommand for the current pass,
+			 * which is the transformed version of the original subcommand.
+			 */
+			if (newcmd == NULL && cmd->subtype == cmd2->subtype)
+			{
+				/* Found the transformed version of our subcommand */
+				newcmd = cmd2;
+			}
+			else
+				elog(ERROR, "ALTER TABLE scheduling failure: bogus item for pass %d",
+					 pass);
+		}
+	}
+
+	/* Queue up any after-statements to happen at the end */
+	tab->afterStmts = list_concat(tab->afterStmts, afterStmts);
+
+	return newcmd;
+}
+
+/*
+ * ATRewriteTables: ALTER TABLE phase 3
+ */
+static void
+ATRewriteTables(AlterTableStmt *parsetree, List **wqueue, LOCKMODE lockmode,
+				AlterTableUtilityContext *context)
+{
+	ListCell   *ltab;
+
+	/* Go through each table that needs to be checked or rewritten */
+	foreach(ltab, *wqueue)
+	{
+		AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+
+		/* Relations without storage may be ignored here */
+		if (!RELKIND_HAS_STORAGE(tab->relkind))
+			continue;
+
+		/*
+		 * If we change column data types, the operation has to be propagated
+		 * to tables that use this table's rowtype as a column type.
+		 * tab->newvals will also be non-NULL in the case where we're adding a
+		 * column with a default.  We choose to forbid that case as well,
+		 * since composite types might eventually support defaults.
+		 *
+		 * (Eventually we'll probably need to check for composite type
+		 * dependencies even when we're just scanning the table without a
+		 * rewrite, but at the moment a composite type does not enforce any
+		 * constraints, so it's not necessary/appropriate to enforce them just
+		 * during ALTER.)
+		 */
+		if (tab->newvals != NIL || tab->rewrite > 0)
+		{
+			Relation	rel;
+
+			rel = table_open(tab->relid, NoLock);
+			find_composite_type_dependencies(rel->rd_rel->reltype, rel, NULL);
+			table_close(rel, NoLock);
+		}
+
+		/*
+		 * We only need to rewrite the table if at least one column needs to
+		 * be recomputed, or we are changing its persistence or access method.
+		 *
+		 * There are two reasons for requiring a rewrite when changing
+		 * persistence: on one hand, we need to ensure that the buffers
+		 * belonging to each of the two relations are marked with or without
+		 * BM_PERMANENT properly.  On the other hand, since rewriting creates
+		 * and assigns a new relfilenode, we automatically create or drop an
+		 * init fork for the relation as appropriate.
+		 */
+		if (tab->rewrite > 0 && tab->relkind != RELKIND_SEQUENCE)
+		{
+			/* Build a temporary relation and copy data */
+			Relation	OldHeap;
+			Oid			OIDNewHeap;
+			Oid			NewAccessMethod;
+			Oid			NewTableSpace;
+			char		persistence;
+
+			OldHeap = table_open(tab->relid, NoLock);
+
+			/*
+			 * We don't support rewriting of system catalogs; there are too
+			 * many corner cases and too little benefit.  In particular this
+			 * is certainly not going to work for mapped catalogs.
+			 */
+			if (IsSystemRelation(OldHeap))
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot rewrite system relation \"%s\"",
+								RelationGetRelationName(OldHeap))));
+
+			if (RelationIsUsedAsCatalogTable(OldHeap))
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot rewrite table \"%s\" used as a catalog table",
+								RelationGetRelationName(OldHeap))));
+
+			/*
+			 * Don't allow rewrite on temp tables of other backends ... their
+			 * local buffer manager is not going to cope.
+			 */
+			if (RELATION_IS_OTHER_TEMP(OldHeap))
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot rewrite temporary tables of other sessions")));
+
+			/*
+			 * Select destination tablespace (same as original unless user
+			 * requested a change)
+			 */
+			if (tab->newTableSpace)
+				NewTableSpace = tab->newTableSpace;
+			else
+				NewTableSpace = OldHeap->rd_rel->reltablespace;
+
+			/*
+			 * Select destination access method (same as original unless user
+			 * requested a change)
+			 */
+			if (OidIsValid(tab->newAccessMethod))
+				NewAccessMethod = tab->newAccessMethod;
+			else
+				NewAccessMethod = OldHeap->rd_rel->relam;
+
+			/*
+			 * Select persistence of transient table (same as original unless
+			 * user requested a change)
+			 */
+			persistence = tab->chgPersistence ?
+				tab->newrelpersistence : OldHeap->rd_rel->relpersistence;
+
+			table_close(OldHeap, NoLock);
+
+			/*
+			 * Fire off an Event Trigger now, before actually rewriting the
+			 * table.
+			 *
+			 * We don't support Event Trigger for nested commands anywhere,
+			 * here included, and parsetree is given NULL when coming from
+			 * AlterTableInternal.
+			 *
+			 * And fire it only once.
+			 */
+			if (parsetree)
+				EventTriggerTableRewrite((Node *) parsetree,
+										 tab->relid,
+										 tab->rewrite);
+
+			/*
+			 * Create transient table that will receive the modified data.
+			 *
+			 * Ensure it is marked correctly as logged or unlogged.  We have
+			 * to do this here so that buffers for the new relfilenode will
+			 * have the right persistence set, and at the same time ensure
+			 * that the original filenode's buffers will get read in with the
+			 * correct setting (i.e. the original one).  Otherwise a rollback
+			 * after the rewrite would possibly result with buffers for the
+			 * original filenode having the wrong persistence setting.
+			 *
+			 * NB: This relies on swap_relation_files() also swapping the
+			 * persistence. That wouldn't work for pg_class, but that can't be
+			 * unlogged anyway.
+			 */
+			OIDNewHeap = make_new_heap(tab->relid, NewTableSpace, NewAccessMethod,
+									   persistence, lockmode);
+
+			/*
+			 * Copy the heap data into the new table with the desired
+			 * modifications, and test the current data within the table
+			 * against new constraints generated by ALTER TABLE commands.
+			 */
+			ATRewriteTable(tab, OIDNewHeap, lockmode);
+
+			/*
+			 * Swap the physical files of the old and new heaps, then rebuild
+			 * indexes and discard the old heap.  We can use RecentXmin for
+			 * the table's new relfrozenxid because we rewrote all the tuples
+			 * in ATRewriteTable, so no older Xid remains in the table.  Also,
+			 * we never try to swap toast tables by content, since we have no
+			 * interest in letting this code work on system catalogs.
+			 */
+			finish_heap_swap(tab->relid, OIDNewHeap,
+							 false, false, true,
+							 !OidIsValid(tab->newTableSpace),
+							 RecentXmin,
+							 ReadNextMultiXactId(),
+							 persistence);
+
+			InvokeObjectPostAlterHook(RelationRelationId, tab->relid, 0);
+		}
+		else if (tab->rewrite > 0 && tab->relkind == RELKIND_SEQUENCE)
+		{
+			if (tab->chgPersistence)
+				SequenceChangePersistence(tab->relid, tab->newrelpersistence);
+		}
+		else
+		{
+			/*
+			 * If required, test the current data within the table against new
+			 * constraints generated by ALTER TABLE commands, but don't
+			 * rebuild data.
+			 */
+			if (tab->constraints != NIL || tab->verify_new_notnull ||
+				tab->partition_constraint != NULL)
+				ATRewriteTable(tab, InvalidOid, lockmode);
+
+			/*
+			 * If we had SET TABLESPACE but no reason to reconstruct tuples,
+			 * just do a block-by-block copy.
+			 */
+			if (tab->newTableSpace)
+				ATExecSetTableSpace(tab->relid, tab->newTableSpace, lockmode);
+		}
+
+		/*
+		 * Also change persistence of owned sequences, so that it matches the
+		 * table persistence.
+		 */
+		if (tab->chgPersistence)
+		{
+			List	   *seqlist = getOwnedSequences(tab->relid);
+			ListCell   *lc;
+
+			foreach(lc, seqlist)
+			{
+				Oid			seq_relid = lfirst_oid(lc);
+
+				SequenceChangePersistence(seq_relid, tab->newrelpersistence);
+			}
+		}
+	}
+
+	/*
+	 * Foreign key constraints are checked in a final pass, since (a) it's
+	 * generally best to examine each one separately, and (b) it's at least
+	 * theoretically possible that we have changed both relations of the
+	 * foreign key, and we'd better have finished both rewrites before we try
+	 * to read the tables.
+	 */
+	foreach(ltab, *wqueue)
+	{
+		AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+		Relation	rel = NULL;
+		ListCell   *lcon;
+
+		/* Relations without storage may be ignored here too */
+		if (!RELKIND_HAS_STORAGE(tab->relkind))
+			continue;
+
+		foreach(lcon, tab->constraints)
+		{
+			NewConstraint *con = lfirst(lcon);
+
+			if (con->contype == CONSTR_FOREIGN)
+			{
+				Constraint *fkconstraint = (Constraint *) con->qual;
+				Relation	refrel;
+
+				if (rel == NULL)
+				{
+					/* Long since locked, no need for another */
+					rel = table_open(tab->relid, NoLock);
+				}
+
+				refrel = table_open(con->refrelid, RowShareLock);
+
+				validateForeignKeyConstraint(fkconstraint->conname, rel, refrel,
+											 con->refindid,
+											 con->conid);
+
+				/*
+				 * No need to mark the constraint row as validated, we did
+				 * that when we inserted the row earlier.
+				 */
+
+				table_close(refrel, NoLock);
+			}
+		}
+
+		if (rel)
+			table_close(rel, NoLock);
+	}
+
+	/* Finally, run any afterStmts that were queued up */
+	foreach(ltab, *wqueue)
+	{
+		AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+		ListCell   *lc;
+
+		foreach(lc, tab->afterStmts)
+		{
+			Node	   *stmt = (Node *) lfirst(lc);
+
+			ProcessUtilityForAlterTable(stmt, context);
+			CommandCounterIncrement();
+		}
+	}
+}
+
+/*
+ * ATRewriteTable: scan or rewrite one table
+ *
+ * OIDNewHeap is InvalidOid if we don't need to rewrite
+ */
+static void
+ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
+{
+	Relation	oldrel;
+	Relation	newrel;
+	TupleDesc	oldTupDesc;
+	TupleDesc	newTupDesc;
+	bool		needscan = false;
+	List	   *notnull_attrs;
+	int			i;
+	ListCell   *l;
+	EState	   *estate;
+	CommandId	mycid;
+	BulkInsertState bistate;
+	int			ti_options;
+	ExprState  *partqualstate = NULL;
+
+	/*
+	 * Open the relation(s).  We have surely already locked the existing
+	 * table.
+	 */
+	oldrel = table_open(tab->relid, NoLock);
+	oldTupDesc = tab->oldDesc;
+	newTupDesc = RelationGetDescr(oldrel);	/* includes all mods */
+
+	if (OidIsValid(OIDNewHeap))
+		newrel = table_open(OIDNewHeap, lockmode);
+	else
+		newrel = NULL;
+
+	/*
+	 * Prepare a BulkInsertState and options for table_tuple_insert.  The FSM
+	 * is empty, so don't bother using it.
+	 */
+	if (newrel)
+	{
+		mycid = GetCurrentCommandId(true);
+		bistate = GetBulkInsertState();
+		ti_options = TABLE_INSERT_SKIP_FSM;
+	}
+	else
+	{
+		/* keep compiler quiet about using these uninitialized */
+		mycid = 0;
+		bistate = NULL;
+		ti_options = 0;
+	}
+
+	/*
+	 * Generate the constraint and default execution states
+	 */
+
+	estate = CreateExecutorState();
+
+	/* Build the needed expression execution states */
+	foreach(l, tab->constraints)
+	{
+		NewConstraint *con = lfirst(l);
+
+		switch (con->contype)
+		{
+			case CONSTR_CHECK:
+				needscan = true;
+				con->qualstate = ExecPrepareExpr((Expr *) con->qual, estate);
+				break;
+			case CONSTR_FOREIGN:
+				/* Nothing to do here */
+				break;
+			default:
+				elog(ERROR, "unrecognized constraint type: %d",
+					 (int) con->contype);
+		}
+	}
+
+	/* Build expression execution states for partition check quals */
+	if (tab->partition_constraint)
+	{
+		needscan = true;
+		partqualstate = ExecPrepareExpr(tab->partition_constraint, estate);
+	}
+
+	foreach(l, tab->newvals)
+	{
+		NewColumnValue *ex = lfirst(l);
+
+		/* expr already planned */
+		ex->exprstate = ExecInitExpr((Expr *) ex->expr, NULL);
+	}
+
+	notnull_attrs = NIL;
+	if (newrel || tab->verify_new_notnull)
+	{
+		/*
+		 * If we are rebuilding the tuples OR if we added any new but not
+		 * verified NOT NULL constraints, check all not-null constraints. This
+		 * is a bit of overkill but it minimizes risk of bugs, and
+		 * heap_attisnull is a pretty cheap test anyway.
+		 */
+		for (i = 0; i < newTupDesc->natts; i++)
+		{
+			Form_pg_attribute attr = TupleDescAttr(newTupDesc, i);
+
+			if (attr->attnotnull && !attr->attisdropped)
+				notnull_attrs = lappend_int(notnull_attrs, i);
+		}
+		if (notnull_attrs)
+			needscan = true;
+	}
+
+	if (newrel || needscan)
+	{
+		ExprContext *econtext;
+		TupleTableSlot *oldslot;
+		TupleTableSlot *newslot;
+		TableScanDesc scan;
+		MemoryContext oldCxt;
+		List	   *dropped_attrs = NIL;
+		ListCell   *lc;
+		Snapshot	snapshot;
+
+		if (newrel)
+			ereport(DEBUG1,
+					(errmsg_internal("rewriting table \"%s\"",
+									 RelationGetRelationName(oldrel))));
+		else
+			ereport(DEBUG1,
+					(errmsg_internal("verifying table \"%s\"",
+									 RelationGetRelationName(oldrel))));
+
+		if (newrel)
+		{
+			/*
+			 * All predicate locks on the tuples or pages are about to be made
+			 * invalid, because we move tuples around.  Promote them to
+			 * relation locks.
+			 */
+			TransferPredicateLocksToHeapRelation(oldrel);
+		}
+
+		econtext = GetPerTupleExprContext(estate);
+
+		/*
+		 * Create necessary tuple slots. When rewriting, two slots are needed,
+		 * otherwise one suffices. In the case where one slot suffices, we
+		 * need to use the new tuple descriptor, otherwise some constraints
+		 * can't be evaluated.  Note that even when the tuple layout is the
+		 * same and no rewrite is required, the tupDescs might not be
+		 * (consider ADD COLUMN without a default).
+		 */
+		if (tab->rewrite)
+		{
+			Assert(newrel != NULL);
+			oldslot = MakeSingleTupleTableSlot(oldTupDesc,
+											   table_slot_callbacks(oldrel));
+			newslot = MakeSingleTupleTableSlot(newTupDesc,
+											   table_slot_callbacks(newrel));
+
+			/*
+			 * Set all columns in the new slot to NULL initially, to ensure
+			 * columns added as part of the rewrite are initialized to NULL.
+			 * That is necessary as tab->newvals will not contain an
+			 * expression for columns with a NULL default, e.g. when adding a
+			 * column without a default together with a column with a default
+			 * requiring an actual rewrite.
+			 */
+			ExecStoreAllNullTuple(newslot);
+		}
+		else
+		{
+			oldslot = MakeSingleTupleTableSlot(newTupDesc,
+											   table_slot_callbacks(oldrel));
+			newslot = NULL;
+		}
+
+		/*
+		 * Any attributes that are dropped according to the new tuple
+		 * descriptor can be set to NULL. We precompute the list of dropped
+		 * attributes to avoid needing to do so in the per-tuple loop.
+		 */
+		for (i = 0; i < newTupDesc->natts; i++)
+		{
+			if (TupleDescAttr(newTupDesc, i)->attisdropped)
+				dropped_attrs = lappend_int(dropped_attrs, i);
+		}
+
+		/*
+		 * Scan through the rows, generating a new row if needed and then
+		 * checking all the constraints.
+		 */
+		snapshot = RegisterSnapshot(GetLatestSnapshot());
+		scan = table_beginscan(oldrel, snapshot, 0, NULL);
+
+		/*
+		 * Switch to per-tuple memory context and reset it for each tuple
+		 * produced, so we don't leak memory.
+		 */
+		oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+
+		while (table_scan_getnextslot(scan, ForwardScanDirection, oldslot))
+		{
+			TupleTableSlot *insertslot;
+
+			if (tab->rewrite > 0)
+			{
+				/* Extract data from old tuple */
+				slot_getallattrs(oldslot);
+				ExecClearTuple(newslot);
+
+				/* copy attributes */
+				memcpy(newslot->tts_values, oldslot->tts_values,
+					   sizeof(Datum) * oldslot->tts_nvalid);
+				memcpy(newslot->tts_isnull, oldslot->tts_isnull,
+					   sizeof(bool) * oldslot->tts_nvalid);
+
+				/* Set dropped attributes to null in new tuple */
+				foreach(lc, dropped_attrs)
+					newslot->tts_isnull[lfirst_int(lc)] = true;
+
+				/*
+				 * Constraints and GENERATED expressions might reference the
+				 * tableoid column, so fill tts_tableOid with the desired
+				 * value.  (We must do this each time, because it gets
+				 * overwritten with newrel's OID during storing.)
+				 */
+				newslot->tts_tableOid = RelationGetRelid(oldrel);
+
+				/*
+				 * Process supplied expressions to replace selected columns.
+				 *
+				 * First, evaluate expressions whose inputs come from the old
+				 * tuple.
+				 */
+				econtext->ecxt_scantuple = oldslot;
+
+				foreach(l, tab->newvals)
+				{
+					NewColumnValue *ex = lfirst(l);
+
+					if (ex->is_generated)
+						continue;
+
+					newslot->tts_values[ex->attnum - 1]
+						= ExecEvalExpr(ex->exprstate,
+									   econtext,
+									   &newslot->tts_isnull[ex->attnum - 1]);
+				}
+
+				ExecStoreVirtualTuple(newslot);
+
+				/*
+				 * Now, evaluate any expressions whose inputs come from the
+				 * new tuple.  We assume these columns won't reference each
+				 * other, so that there's no ordering dependency.
+				 */
+				econtext->ecxt_scantuple = newslot;
+
+				foreach(l, tab->newvals)
+				{
+					NewColumnValue *ex = lfirst(l);
+
+					if (!ex->is_generated)
+						continue;
+
+					newslot->tts_values[ex->attnum - 1]
+						= ExecEvalExpr(ex->exprstate,
+									   econtext,
+									   &newslot->tts_isnull[ex->attnum - 1]);
+				}
+
+				insertslot = newslot;
+			}
+			else
+			{
+				/*
+				 * If there's no rewrite, old and new table are guaranteed to
+				 * have the same AM, so we can just use the old slot to verify
+				 * new constraints etc.
+				 */
+				insertslot = oldslot;
+			}
+
+			/* Now check any constraints on the possibly-changed tuple */
+			econtext->ecxt_scantuple = insertslot;
+
+			foreach(l, notnull_attrs)
+			{
+				int			attn = lfirst_int(l);
+
+				if (slot_attisnull(insertslot, attn + 1))
+				{
+					Form_pg_attribute attr = TupleDescAttr(newTupDesc, attn);
+
+					ereport(ERROR,
+							(errcode(ERRCODE_NOT_NULL_VIOLATION),
+							 errmsg("column \"%s\" of relation \"%s\" contains null values",
+									NameStr(attr->attname),
+									RelationGetRelationName(oldrel)),
+							 errtablecol(oldrel, attn + 1)));
+				}
+			}
+
+			foreach(l, tab->constraints)
+			{
+				NewConstraint *con = lfirst(l);
+
+				switch (con->contype)
+				{
+					case CONSTR_CHECK:
+						if (!ExecCheck(con->qualstate, econtext))
+							ereport(ERROR,
+									(errcode(ERRCODE_CHECK_VIOLATION),
+									 errmsg("check constraint \"%s\" of relation \"%s\" is violated by some row",
+											con->name,
+											RelationGetRelationName(oldrel)),
+									 errtableconstraint(oldrel, con->name)));
+						break;
+					case CONSTR_FOREIGN:
+						/* Nothing to do here */
+						break;
+					default:
+						elog(ERROR, "unrecognized constraint type: %d",
+							 (int) con->contype);
+				}
+			}
+
+			if (partqualstate && !ExecCheck(partqualstate, econtext))
+			{
+				if (tab->validate_default)
+					ereport(ERROR,
+							(errcode(ERRCODE_CHECK_VIOLATION),
+							 errmsg("updated partition constraint for default partition \"%s\" would be violated by some row",
+									RelationGetRelationName(oldrel)),
+							 errtable(oldrel)));
+				else
+					ereport(ERROR,
+							(errcode(ERRCODE_CHECK_VIOLATION),
+							 errmsg("partition constraint of relation \"%s\" is violated by some row",
+									RelationGetRelationName(oldrel)),
+							 errtable(oldrel)));
+			}
+
+			/* Write the tuple out to the new relation */
+			if (newrel)
+				table_tuple_insert(newrel, insertslot, mycid,
+								   ti_options, bistate);
+
+			ResetExprContext(econtext);
+
+			CHECK_FOR_INTERRUPTS();
+		}
+
+		MemoryContextSwitchTo(oldCxt);
+		table_endscan(scan);
+		UnregisterSnapshot(snapshot);
+
+		ExecDropSingleTupleTableSlot(oldslot);
+		if (newslot)
+			ExecDropSingleTupleTableSlot(newslot);
+	}
+
+	FreeExecutorState(estate);
+
+	table_close(oldrel, NoLock);
+	if (newrel)
+	{
+		FreeBulkInsertState(bistate);
+
+		table_finish_bulk_insert(newrel, ti_options);
+
+		table_close(newrel, NoLock);
+	}
+}
+
+/*
+ * ATGetQueueEntry: find or create an entry in the ALTER TABLE work queue
+ */
+static AlteredTableInfo *
+ATGetQueueEntry(List **wqueue, Relation rel)
+{
+	Oid			relid = RelationGetRelid(rel);
+	AlteredTableInfo *tab;
+	ListCell   *ltab;
+
+	foreach(ltab, *wqueue)
+	{
+		tab = (AlteredTableInfo *) lfirst(ltab);
+		if (tab->relid == relid)
+			return tab;
+	}
+
+	/*
+	 * Not there, so add it.  Note that we make a copy of the relation's
+	 * existing descriptor before anything interesting can happen to it.
+	 */
+	tab = (AlteredTableInfo *) palloc0(sizeof(AlteredTableInfo));
+	tab->relid = relid;
+	tab->rel = NULL;			/* set later */
+	tab->relkind = rel->rd_rel->relkind;
+	tab->oldDesc = CreateTupleDescCopyConstr(RelationGetDescr(rel));
+	tab->newAccessMethod = InvalidOid;
+	tab->newTableSpace = InvalidOid;
+	tab->newrelpersistence = RELPERSISTENCE_PERMANENT;
+	tab->chgPersistence = false;
+
+	*wqueue = lappend(*wqueue, tab);
+
+	return tab;
+}
+
+static const char *
+alter_table_type_to_string(AlterTableType cmdtype)
+{
+	switch (cmdtype)
+	{
+		case AT_AddColumn:
+		case AT_AddColumnRecurse:
+		case AT_AddColumnToView:
+			return "ADD COLUMN";
+		case AT_ColumnDefault:
+		case AT_CookedColumnDefault:
+			return "ALTER COLUMN ... SET DEFAULT";
+		case AT_DropNotNull:
+			return "ALTER COLUMN ... DROP NOT NULL";
+		case AT_SetNotNull:
+			return "ALTER COLUMN ... SET NOT NULL";
+		case AT_DropExpression:
+			return "ALTER COLUMN ... DROP EXPRESSION";
+		case AT_CheckNotNull:
+			return NULL;		/* not real grammar */
+		case AT_SetStatistics:
+			return "ALTER COLUMN ... SET STATISTICS";
+		case AT_SetOptions:
+			return "ALTER COLUMN ... SET";
+		case AT_ResetOptions:
+			return "ALTER COLUMN ... RESET";
+		case AT_SetStorage:
+			return "ALTER COLUMN ... SET STORAGE";
+		case AT_SetCompression:
+			return "ALTER COLUMN ... SET COMPRESSION";
+		case AT_DropColumn:
+		case AT_DropColumnRecurse:
+			return "DROP COLUMN";
+		case AT_AddIndex:
+		case AT_ReAddIndex:
+			return NULL;		/* not real grammar */
+		case AT_AddConstraint:
+		case AT_AddConstraintRecurse:
+		case AT_ReAddConstraint:
+		case AT_ReAddDomainConstraint:
+		case AT_AddIndexConstraint:
+			return "ADD CONSTRAINT";
+		case AT_AlterConstraint:
+			return "ALTER CONSTRAINT";
+		case AT_ValidateConstraint:
+		case AT_ValidateConstraintRecurse:
+			return "VALIDATE CONSTRAINT";
+		case AT_DropConstraint:
+		case AT_DropConstraintRecurse:
+			return "DROP CONSTRAINT";
+		case AT_ReAddComment:
+			return NULL;		/* not real grammar */
+		case AT_AlterColumnType:
+			return "ALTER COLUMN ... SET DATA TYPE";
+		case AT_AlterColumnGenericOptions:
+			return "ALTER COLUMN ... OPTIONS";
+		case AT_ChangeOwner:
+			return "OWNER TO";
+		case AT_ClusterOn:
+			return "CLUSTER ON";
+		case AT_DropCluster:
+			return "SET WITHOUT CLUSTER";
+		case AT_SetAccessMethod:
+			return "SET ACCESS METHOD";
+		case AT_SetLogged:
+			return "SET LOGGED";
+		case AT_SetUnLogged:
+			return "SET UNLOGGED";
+		case AT_DropOids:
+			return "SET WITHOUT OIDS";
+		case AT_SetTableSpace:
+			return "SET TABLESPACE";
+		case AT_SetRelOptions:
+			return "SET";
+		case AT_ResetRelOptions:
+			return "RESET";
+		case AT_ReplaceRelOptions:
+			return NULL;		/* not real grammar */
+		case AT_EnableTrig:
+			return "ENABLE TRIGGER";
+		case AT_EnableAlwaysTrig:
+			return "ENABLE ALWAYS TRIGGER";
+		case AT_EnableReplicaTrig:
+			return "ENABLE REPLICA TRIGGER";
+		case AT_DisableTrig:
+			return "DISABLE TRIGGER";
+		case AT_EnableTrigAll:
+			return "ENABLE TRIGGER ALL";
+		case AT_DisableTrigAll:
+			return "DISABLE TRIGGER ALL";
+		case AT_EnableTrigUser:
+			return "ENABLE TRIGGER USER";
+		case AT_DisableTrigUser:
+			return "DISABLE TRIGGER USER";
+		case AT_EnableRule:
+			return "ENABLE RULE";
+		case AT_EnableAlwaysRule:
+			return "ENABLE ALWAYS RULE";
+		case AT_EnableReplicaRule:
+			return "ENABLE REPLICA RULE";
+		case AT_DisableRule:
+			return "DISABLE RULE";
+		case AT_AddInherit:
+			return "INHERIT";
+		case AT_DropInherit:
+			return "NO INHERIT";
+		case AT_AddOf:
+			return "OF";
+		case AT_DropOf:
+			return "NOT OF";
+		case AT_ReplicaIdentity:
+			return "REPLICA IDENTITY";
+		case AT_EnableRowSecurity:
+			return "ENABLE ROW SECURITY";
+		case AT_DisableRowSecurity:
+			return "DISABLE ROW SECURITY";
+		case AT_ForceRowSecurity:
+			return "FORCE ROW SECURITY";
+		case AT_NoForceRowSecurity:
+			return "NO FORCE ROW SECURITY";
+		case AT_GenericOptions:
+			return "OPTIONS";
+		case AT_AttachPartition:
+			return "ATTACH PARTITION";
+		case AT_DetachPartition:
+			return "DETACH PARTITION";
+		case AT_DetachPartitionFinalize:
+			return "DETACH PARTITION ... FINALIZE";
+		case AT_AddIdentity:
+			return "ALTER COLUMN ... ADD IDENTITY";
+		case AT_SetIdentity:
+			return "ALTER COLUMN ... SET";
+		case AT_DropIdentity:
+			return "ALTER COLUMN ... DROP IDENTITY";
+		case AT_ReAddStatistics:
+			return NULL;		/* not real grammar */
+	}
+
+	return NULL;
+}
+
+/*
+ * ATSimplePermissions
+ *
+ * - Ensure that it is a relation (or possibly a view)
+ * - Ensure this user is the owner
+ * - Ensure that it is not a system table
+ */
+static void
+ATSimplePermissions(AlterTableType cmdtype, Relation rel, int allowed_targets)
+{
+	int			actual_target;
+
+	switch (rel->rd_rel->relkind)
+	{
+		case RELKIND_RELATION:
+		case RELKIND_PARTITIONED_TABLE:
+			actual_target = ATT_TABLE;
+			break;
+		case RELKIND_VIEW:
+			actual_target = ATT_VIEW;
+			break;
+		case RELKIND_MATVIEW:
+			actual_target = ATT_MATVIEW;
+			break;
+		case RELKIND_INDEX:
+			actual_target = ATT_INDEX;
+			break;
+		case RELKIND_PARTITIONED_INDEX:
+			actual_target = ATT_PARTITIONED_INDEX;
+			break;
+		case RELKIND_COMPOSITE_TYPE:
+			actual_target = ATT_COMPOSITE_TYPE;
+			break;
+		case RELKIND_FOREIGN_TABLE:
+			actual_target = ATT_FOREIGN_TABLE;
+			break;
+		case RELKIND_SEQUENCE:
+			actual_target = ATT_SEQUENCE;
+			break;
+		default:
+			actual_target = 0;
+			break;
+	}
+
+	/* Wrong target type? */
+	if ((actual_target & allowed_targets) == 0)
+	{
+		const char *action_str = alter_table_type_to_string(cmdtype);
+
+		if (action_str)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+			/* translator: %s is a group of some SQL keywords */
+					 errmsg("ALTER action %s cannot be performed on relation \"%s\"",
+							action_str, RelationGetRelationName(rel)),
+					 errdetail_relkind_not_supported(rel->rd_rel->relkind)));
+		else
+			/* internal error? */
+			elog(ERROR, "invalid ALTER action attempted on relation \"%s\"",
+				 RelationGetRelationName(rel));
+	}
+
+	/* Permissions checks */
+	if (!pg_class_ownercheck(RelationGetRelid(rel), GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind),
+					   RelationGetRelationName(rel));
+
+	if (!allowSystemTableMods && IsSystemRelation(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						RelationGetRelationName(rel))));
+}
+
+/*
+ * ATSimpleRecursion
+ *
+ * Simple table recursion sufficient for most ALTER TABLE operations.
+ * All direct and indirect children are processed in an unspecified order.
+ * Note that if a child inherits from the original table via multiple
+ * inheritance paths, it will be visited just once.
+ */
+static void
+ATSimpleRecursion(List **wqueue, Relation rel,
+				  AlterTableCmd *cmd, bool recurse, LOCKMODE lockmode,
+				  AlterTableUtilityContext *context)
+{
+	/*
+	 * Propagate to children, if desired and if there are (or might be) any
+	 * children.
+	 */
+	if (recurse && rel->rd_rel->relhassubclass)
+	{
+		Oid			relid = RelationGetRelid(rel);
+		ListCell   *child;
+		List	   *children;
+
+		children = find_all_inheritors(relid, lockmode, NULL);
+
+		/*
+		 * find_all_inheritors does the recursive search of the inheritance
+		 * hierarchy, so all we have to do is process all of the relids in the
+		 * list that it returns.
+		 */
+		foreach(child, children)
+		{
+			Oid			childrelid = lfirst_oid(child);
+			Relation	childrel;
+
+			if (childrelid == relid)
+				continue;
+			/* find_all_inheritors already got lock */
+			childrel = relation_open(childrelid, NoLock);
+			CheckTableNotInUse(childrel, "ALTER TABLE");
+			ATPrepCmd(wqueue, childrel, cmd, false, true, lockmode, context);
+			relation_close(childrel, NoLock);
+		}
+	}
+}
+
+/*
+ * Obtain list of partitions of the given table, locking them all at the given
+ * lockmode and ensuring that they all pass CheckTableNotInUse.
+ *
+ * This function is a no-op if the given relation is not a partitioned table;
+ * in particular, nothing is done if it's a legacy inheritance parent.
+ */
+static void
+ATCheckPartitionsNotInUse(Relation rel, LOCKMODE lockmode)
+{
+	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		List	   *inh;
+		ListCell   *cell;
+
+		inh = find_all_inheritors(RelationGetRelid(rel), lockmode, NULL);
+		/* first element is the parent rel; must ignore it */
+		for_each_from(cell, inh, 1)
+		{
+			Relation	childrel;
+
+			/* find_all_inheritors already got lock */
+			childrel = table_open(lfirst_oid(cell), NoLock);
+			CheckTableNotInUse(childrel, "ALTER TABLE");
+			table_close(childrel, NoLock);
+		}
+		list_free(inh);
+	}
+}
+
+/*
+ * ATTypedTableRecursion
+ *
+ * Propagate ALTER TYPE operations to the typed tables of that type.
+ * Also check the RESTRICT/CASCADE behavior.  Given CASCADE, also permit
+ * recursion to inheritance children of the typed tables.
+ */
+static void
+ATTypedTableRecursion(List **wqueue, Relation rel, AlterTableCmd *cmd,
+					  LOCKMODE lockmode, AlterTableUtilityContext *context)
+{
+	ListCell   *child;
+	List	   *children;
+
+	Assert(rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE);
+
+	children = find_typed_table_dependencies(rel->rd_rel->reltype,
+											 RelationGetRelationName(rel),
+											 cmd->behavior);
+
+	foreach(child, children)
+	{
+		Oid			childrelid = lfirst_oid(child);
+		Relation	childrel;
+
+		childrel = relation_open(childrelid, lockmode);
+		CheckTableNotInUse(childrel, "ALTER TABLE");
+		ATPrepCmd(wqueue, childrel, cmd, true, true, lockmode, context);
+		relation_close(childrel, NoLock);
+	}
+}
+
+
+/*
+ * find_composite_type_dependencies
+ *
+ * Check to see if the type "typeOid" is being used as a column in some table
+ * (possibly nested several levels deep in composite types, arrays, etc!).
+ * Eventually, we'd like to propagate the check or rewrite operation
+ * into such tables, but for now, just error out if we find any.
+ *
+ * Caller should provide either the associated relation of a rowtype,
+ * or a type name (not both) for use in the error message, if any.
+ *
+ * Note that "typeOid" is not necessarily a composite type; it could also be
+ * another container type such as an array or range, or a domain over one of
+ * these things.  The name of this function is therefore somewhat historical,
+ * but it's not worth changing.
+ *
+ * We assume that functions and views depending on the type are not reasons
+ * to reject the ALTER.  (How safe is this really?)
+ */
+void
+find_composite_type_dependencies(Oid typeOid, Relation origRelation,
+								 const char *origTypeName)
+{
+	Relation	depRel;
+	ScanKeyData key[2];
+	SysScanDesc depScan;
+	HeapTuple	depTup;
+
+	/* since this function recurses, it could be driven to stack overflow */
+	check_stack_depth();
+
+	/*
+	 * We scan pg_depend to find those things that depend on the given type.
+	 * (We assume we can ignore refobjsubid for a type.)
+	 */
+	depRel = table_open(DependRelationId, AccessShareLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_refclassid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(TypeRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_refobjid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(typeOid));
+
+	depScan = systable_beginscan(depRel, DependReferenceIndexId, true,
+								 NULL, 2, key);
+
+	while (HeapTupleIsValid(depTup = systable_getnext(depScan)))
+	{
+		Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup);
+		Relation	rel;
+		TupleDesc	tupleDesc;
+		Form_pg_attribute att;
+
+		/* Check for directly dependent types */
+		if (pg_depend->classid == TypeRelationId)
+		{
+			/*
+			 * This must be an array, domain, or range containing the given
+			 * type, so recursively check for uses of this type.  Note that
+			 * any error message will mention the original type not the
+			 * container; this is intentional.
+			 */
+			find_composite_type_dependencies(pg_depend->objid,
+											 origRelation, origTypeName);
+			continue;
+		}
+
+		/* Else, ignore dependees that aren't relations */
+		if (pg_depend->classid != RelationRelationId)
+			continue;
+
+		rel = relation_open(pg_depend->objid, AccessShareLock);
+		tupleDesc = RelationGetDescr(rel);
+
+		/*
+		 * If objsubid identifies a specific column, refer to that in error
+		 * messages.  Otherwise, search to see if there's a user column of the
+		 * type.  (We assume system columns are never of interesting types.)
+		 * The search is needed because an index containing an expression
+		 * column of the target type will just be recorded as a whole-relation
+		 * dependency.  If we do not find a column of the type, the dependency
+		 * must indicate that the type is transiently referenced in an index
+		 * expression but not stored on disk, which we assume is OK, just as
+		 * we do for references in views.  (It could also be that the target
+		 * type is embedded in some container type that is stored in an index
+		 * column, but the previous recursion should catch such cases.)
+		 */
+		if (pg_depend->objsubid > 0 && pg_depend->objsubid <= tupleDesc->natts)
+			att = TupleDescAttr(tupleDesc, pg_depend->objsubid - 1);
+		else
+		{
+			att = NULL;
+			for (int attno = 1; attno <= tupleDesc->natts; attno++)
+			{
+				att = TupleDescAttr(tupleDesc, attno - 1);
+				if (att->atttypid == typeOid && !att->attisdropped)
+					break;
+				att = NULL;
+			}
+			if (att == NULL)
+			{
+				/* No such column, so assume OK */
+				relation_close(rel, AccessShareLock);
+				continue;
+			}
+		}
+
+		/*
+		 * We definitely should reject if the relation has storage.  If it's
+		 * partitioned, then perhaps we don't have to reject: if there are
+		 * partitions then we'll fail when we find one, else there is no
+		 * stored data to worry about.  However, it's possible that the type
+		 * change would affect conclusions about whether the type is sortable
+		 * or hashable and thus (if it's a partitioning column) break the
+		 * partitioning rule.  For now, reject for partitioned rels too.
+		 */
+		if (RELKIND_HAS_STORAGE(rel->rd_rel->relkind) ||
+			RELKIND_HAS_PARTITIONS(rel->rd_rel->relkind))
+		{
+			if (origTypeName)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot alter type \"%s\" because column \"%s.%s\" uses it",
+								origTypeName,
+								RelationGetRelationName(rel),
+								NameStr(att->attname))));
+			else if (origRelation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot alter type \"%s\" because column \"%s.%s\" uses it",
+								RelationGetRelationName(origRelation),
+								RelationGetRelationName(rel),
+								NameStr(att->attname))));
+			else if (origRelation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot alter foreign table \"%s\" because column \"%s.%s\" uses its row type",
+								RelationGetRelationName(origRelation),
+								RelationGetRelationName(rel),
+								NameStr(att->attname))));
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot alter table \"%s\" because column \"%s.%s\" uses its row type",
+								RelationGetRelationName(origRelation),
+								RelationGetRelationName(rel),
+								NameStr(att->attname))));
+		}
+		else if (OidIsValid(rel->rd_rel->reltype))
+		{
+			/*
+			 * A view or composite type itself isn't a problem, but we must
+			 * recursively check for indirect dependencies via its rowtype.
+			 */
+			find_composite_type_dependencies(rel->rd_rel->reltype,
+											 origRelation, origTypeName);
+		}
+
+		relation_close(rel, AccessShareLock);
+	}
+
+	systable_endscan(depScan);
+
+	relation_close(depRel, AccessShareLock);
+}
+
+
+/*
+ * find_typed_table_dependencies
+ *
+ * Check to see if a composite type is being used as the type of a
+ * typed table.  Abort if any are found and behavior is RESTRICT.
+ * Else return the list of tables.
+ */
+static List *
+find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior behavior)
+{
+	Relation	classRel;
+	ScanKeyData key[1];
+	TableScanDesc scan;
+	HeapTuple	tuple;
+	List	   *result = NIL;
+
+	classRel = table_open(RelationRelationId, AccessShareLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_class_reloftype,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(typeOid));
+
+	scan = table_beginscan_catalog(classRel, 1, key);
+
+	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	{
+		Form_pg_class classform = (Form_pg_class) GETSTRUCT(tuple);
+
+		if (behavior == DROP_RESTRICT)
+			ereport(ERROR,
+					(errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST),
+					 errmsg("cannot alter type \"%s\" because it is the type of a typed table",
+							typeName),
+					 errhint("Use ALTER ... CASCADE to alter the typed tables too.")));
+		else
+			result = lappend_oid(result, classform->oid);
+	}
+
+	table_endscan(scan);
+	table_close(classRel, AccessShareLock);
+
+	return result;
+}
+
+
+/*
+ * check_of_type
+ *
+ * Check whether a type is suitable for CREATE TABLE OF/ALTER TABLE OF.  If it
+ * isn't suitable, throw an error.  Currently, we require that the type
+ * originated with CREATE TYPE AS.  We could support any row type, but doing so
+ * would require handling a number of extra corner cases in the DDL commands.
+ * (Also, allowing domain-over-composite would open up a can of worms about
+ * whether and how the domain's constraints should apply to derived tables.)
+ */
+void
+check_of_type(HeapTuple typetuple)
+{
+	Form_pg_type typ = (Form_pg_type) GETSTRUCT(typetuple);
+	bool		typeOk = false;
+
+	if (typ->typtype == TYPTYPE_COMPOSITE)
+	{
+		Relation	typeRelation;
+
+		Assert(OidIsValid(typ->typrelid));
+		typeRelation = relation_open(typ->typrelid, AccessShareLock);
+		typeOk = (typeRelation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE);
+
+		/*
+		 * Close the parent rel, but keep our AccessShareLock on it until xact
+		 * commit.  That will prevent someone else from deleting or ALTERing
+		 * the type before the typed table creation/conversion commits.
+		 */
+		relation_close(typeRelation, NoLock);
+	}
+	if (!typeOk)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("type %s is not a composite type",
+						format_type_be(typ->oid))));
+}
+
+
+/*
+ * ALTER TABLE ADD COLUMN
+ *
+ * Adds an additional attribute to a relation making the assumption that
+ * CHECK, NOT NULL, and FOREIGN KEY constraints will be removed from the
+ * AT_AddColumn AlterTableCmd by parse_utilcmd.c and added as independent
+ * AlterTableCmd's.
+ *
+ * ADD COLUMN cannot use the normal ALTER TABLE recursion mechanism, because we
+ * have to decide at runtime whether to recurse or not depending on whether we
+ * actually add a column or merely merge with an existing column.  (We can't
+ * check this in a static pre-pass because it won't handle multiple inheritance
+ * situations correctly.)
+ */
+static void
+ATPrepAddColumn(List **wqueue, Relation rel, bool recurse, bool recursing,
+				bool is_view, AlterTableCmd *cmd, LOCKMODE lockmode,
+				AlterTableUtilityContext *context)
+{
+	if (rel->rd_rel->reloftype && !recursing)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot add column to typed table")));
+
+	if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+		ATTypedTableRecursion(wqueue, rel, cmd, lockmode, context);
+
+	if (recurse && !is_view)
+		cmd->subtype = AT_AddColumnRecurse;
+}
+
+/*
+ * Add a column to a table.  The return value is the address of the
+ * new column in the parent relation.
+ *
+ * cmd is pass-by-ref so that we can replace it with the parse-transformed
+ * copy (but that happens only after we check for IF NOT EXISTS).
+ */
+static ObjectAddress
+ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
+				AlterTableCmd **cmd,
+				bool recurse, bool recursing,
+				LOCKMODE lockmode, int cur_pass,
+				AlterTableUtilityContext *context)
+{
+	Oid			myrelid = RelationGetRelid(rel);
+	ColumnDef  *colDef = castNode(ColumnDef, (*cmd)->def);
+	bool		if_not_exists = (*cmd)->missing_ok;
+	Relation	pgclass,
+				attrdesc;
+	HeapTuple	reltup;
+	FormData_pg_attribute attribute;
+	int			newattnum;
+	char		relkind;
+	HeapTuple	typeTuple;
+	Oid			typeOid;
+	int32		typmod;
+	Oid			collOid;
+	Form_pg_type tform;
+	Expr	   *defval;
+	List	   *children;
+	ListCell   *child;
+	AlterTableCmd *childcmd;
+	AclResult	aclresult;
+	ObjectAddress address;
+	TupleDesc	tupdesc;
+	FormData_pg_attribute *aattr[] = {&attribute};
+
+	/* At top level, permission check was done in ATPrepCmd, else do it */
+	if (recursing)
+		ATSimplePermissions((*cmd)->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+	if (rel->rd_rel->relispartition && !recursing)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot add column to a partition")));
+
+	attrdesc = table_open(AttributeRelationId, RowExclusiveLock);
+
+	/*
+	 * Are we adding the column to a recursion child?  If so, check whether to
+	 * merge with an existing definition for the column.  If we do merge, we
+	 * must not recurse.  Children will already have the column, and recursing
+	 * into them would mess up attinhcount.
+	 */
+	if (colDef->inhcount > 0)
+	{
+		HeapTuple	tuple;
+
+		/* Does child already have a column by this name? */
+		tuple = SearchSysCacheCopyAttName(myrelid, colDef->colname);
+		if (HeapTupleIsValid(tuple))
+		{
+			Form_pg_attribute childatt = (Form_pg_attribute) GETSTRUCT(tuple);
+			Oid			ctypeId;
+			int32		ctypmod;
+			Oid			ccollid;
+
+			/* Child column must match on type, typmod, and collation */
+			typenameTypeIdAndMod(NULL, colDef->typeName, &ctypeId, &ctypmod);
+			if (ctypeId != childatt->atttypid ||
+				ctypmod != childatt->atttypmod)
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("child table \"%s\" has different type for column \"%s\"",
+								RelationGetRelationName(rel), colDef->colname)));
+			ccollid = GetColumnDefCollation(NULL, colDef, ctypeId);
+			if (ccollid != childatt->attcollation)
+				ereport(ERROR,
+						(errcode(ERRCODE_COLLATION_MISMATCH),
+						 errmsg("child table \"%s\" has different collation for column \"%s\"",
+								RelationGetRelationName(rel), colDef->colname),
+						 errdetail("\"%s\" versus \"%s\"",
+								   get_collation_name(ccollid),
+								   get_collation_name(childatt->attcollation))));
+
+			/* Bump the existing child att's inhcount */
+			childatt->attinhcount++;
+			CatalogTupleUpdate(attrdesc, &tuple->t_self, tuple);
+
+			heap_freetuple(tuple);
+
+			/* Inform the user about the merge */
+			ereport(NOTICE,
+					(errmsg("merging definition of column \"%s\" for child \"%s\"",
+							colDef->colname, RelationGetRelationName(rel))));
+
+			table_close(attrdesc, RowExclusiveLock);
+			return InvalidObjectAddress;
+		}
+	}
+
+	/* skip if the name already exists and if_not_exists is true */
+	if (!check_for_column_name_collision(rel, colDef->colname, if_not_exists))
+	{
+		table_close(attrdesc, RowExclusiveLock);
+		return InvalidObjectAddress;
+	}
+
+	/*
+	 * Okay, we need to add the column, so go ahead and do parse
+	 * transformation.  This can result in queueing up, or even immediately
+	 * executing, subsidiary operations (such as creation of unique indexes);
+	 * so we mustn't do it until we have made the if_not_exists check.
+	 *
+	 * When recursing, the command was already transformed and we needn't do
+	 * so again.  Also, if context isn't given we can't transform.  (That
+	 * currently happens only for AT_AddColumnToView; we expect that view.c
+	 * passed us a ColumnDef that doesn't need work.)
+	 */
+	if (context != NULL && !recursing)
+	{
+		*cmd = ATParseTransformCmd(wqueue, tab, rel, *cmd, recurse, lockmode,
+								   cur_pass, context);
+		Assert(*cmd != NULL);
+		colDef = castNode(ColumnDef, (*cmd)->def);
+	}
+
+	/*
+	 * Cannot add identity column if table has children, because identity does
+	 * not inherit.  (Adding column and identity separately will work.)
+	 */
+	if (colDef->identity &&
+		recurse &&
+		find_inheritance_children(myrelid, NoLock) != NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("cannot recursively add identity column to table that has child tables")));
+
+	pgclass = table_open(RelationRelationId, RowExclusiveLock);
+
+	reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(myrelid));
+	if (!HeapTupleIsValid(reltup))
+		elog(ERROR, "cache lookup failed for relation %u", myrelid);
+	relkind = ((Form_pg_class) GETSTRUCT(reltup))->relkind;
+
+	/* Determine the new attribute's number */
+	newattnum = ((Form_pg_class) GETSTRUCT(reltup))->relnatts + 1;
+	if (newattnum > MaxHeapAttributeNumber)
+		ereport(ERROR,
+				(errcode(ERRCODE_TOO_MANY_COLUMNS),
+				 errmsg("tables can have at most %d columns",
+						MaxHeapAttributeNumber)));
+
+	typeTuple = typenameType(NULL, colDef->typeName, &typmod);
+	tform = (Form_pg_type) GETSTRUCT(typeTuple);
+	typeOid = tform->oid;
+
+	aclresult = pg_type_aclcheck(typeOid, GetUserId(), ACL_USAGE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error_type(aclresult, typeOid);
+
+	collOid = GetColumnDefCollation(NULL, colDef, typeOid);
+
+	/* make sure datatype is legal for a column */
+	CheckAttributeType(colDef->colname, typeOid, collOid,
+					   list_make1_oid(rel->rd_rel->reltype),
+					   0);
+
+	/*
+	 * Construct new attribute's pg_attribute entry.  (Variable-length fields
+	 * are handled by InsertPgAttributeTuples().)
+	 */
+	attribute.attrelid = myrelid;
+	namestrcpy(&(attribute.attname), colDef->colname);
+	attribute.atttypid = typeOid;
+	attribute.attstattarget = (newattnum > 0) ? -1 : 0;
+	attribute.attlen = tform->typlen;
+	attribute.attnum = newattnum;
+	attribute.attndims = list_length(colDef->typeName->arrayBounds);
+	attribute.atttypmod = typmod;
+	attribute.attbyval = tform->typbyval;
+	attribute.attalign = tform->typalign;
+	attribute.attstorage = tform->typstorage;
+	attribute.attcompression = GetAttributeCompression(typeOid,
+													   colDef->compression);
+	attribute.attnotnull = colDef->is_not_null;
+	attribute.atthasdef = false;
+	attribute.atthasmissing = false;
+	attribute.attidentity = colDef->identity;
+	attribute.attgenerated = colDef->generated;
+	attribute.attisdropped = false;
+	attribute.attislocal = colDef->is_local;
+	attribute.attinhcount = colDef->inhcount;
+	attribute.attcollation = collOid;
+
+	ReleaseSysCache(typeTuple);
+
+	tupdesc = CreateTupleDesc(lengthof(aattr), (FormData_pg_attribute **) &aattr);
+
+	InsertPgAttributeTuples(attrdesc, tupdesc, myrelid, NULL, NULL);
+
+	table_close(attrdesc, RowExclusiveLock);
+
+	/*
+	 * Update pg_class tuple as appropriate
+	 */
+	((Form_pg_class) GETSTRUCT(reltup))->relnatts = newattnum;
+
+	CatalogTupleUpdate(pgclass, &reltup->t_self, reltup);
+
+	heap_freetuple(reltup);
+
+	/* Post creation hook for new attribute */
+	InvokeObjectPostCreateHook(RelationRelationId, myrelid, newattnum);
+
+	table_close(pgclass, RowExclusiveLock);
+
+	/* Make the attribute's catalog entry visible */
+	CommandCounterIncrement();
+
+	/*
+	 * Store the DEFAULT, if any, in the catalogs
+	 */
+	if (colDef->raw_default)
+	{
+		RawColumnDefault *rawEnt;
+
+		rawEnt = (RawColumnDefault *) palloc(sizeof(RawColumnDefault));
+		rawEnt->attnum = attribute.attnum;
+		rawEnt->raw_default = copyObject(colDef->raw_default);
+
+		/*
+		 * Attempt to skip a complete table rewrite by storing the specified
+		 * DEFAULT value outside of the heap.  This may be disabled inside
+		 * AddRelationNewConstraints if the optimization cannot be applied.
+		 */
+		rawEnt->missingMode = (!colDef->generated);
+
+		rawEnt->generated = colDef->generated;
+
+		/*
+		 * This function is intended for CREATE TABLE, so it processes a
+		 * _list_ of defaults, but we just do one.
+		 */
+		AddRelationNewConstraints(rel, list_make1(rawEnt), NIL,
+								  false, true, false, NULL);
+
+		/* Make the additional catalog changes visible */
+		CommandCounterIncrement();
+
+		/*
+		 * Did the request for a missing value work? If not we'll have to do a
+		 * rewrite
+		 */
+		if (!rawEnt->missingMode)
+			tab->rewrite |= AT_REWRITE_DEFAULT_VAL;
+	}
+
+	/*
+	 * Tell Phase 3 to fill in the default expression, if there is one.
+	 *
+	 * If there is no default, Phase 3 doesn't have to do anything, because
+	 * that effectively means that the default is NULL.  The heap tuple access
+	 * routines always check for attnum > # of attributes in tuple, and return
+	 * NULL if so, so without any modification of the tuple data we will get
+	 * the effect of NULL values in the new column.
+	 *
+	 * An exception occurs when the new column is of a domain type: the domain
+	 * might have a NOT NULL constraint, or a check constraint that indirectly
+	 * rejects nulls.  If there are any domain constraints then we construct
+	 * an explicit NULL default value that will be passed through
+	 * CoerceToDomain processing.  (This is a tad inefficient, since it causes
+	 * rewriting the table which we really don't have to do, but the present
+	 * design of domain processing doesn't offer any simple way of checking
+	 * the constraints more directly.)
+	 *
+	 * Note: we use build_column_default, and not just the cooked default
+	 * returned by AddRelationNewConstraints, so that the right thing happens
+	 * when a datatype's default applies.
+	 *
+	 * Note: it might seem that this should happen at the end of Phase 2, so
+	 * that the effects of subsequent subcommands can be taken into account.
+	 * It's intentional that we do it now, though.  The new column should be
+	 * filled according to what is said in the ADD COLUMN subcommand, so that
+	 * the effects are the same as if this subcommand had been run by itself
+	 * and the later subcommands had been issued in new ALTER TABLE commands.
+	 *
+	 * We can skip this entirely for relations without storage, since Phase 3
+	 * is certainly not going to touch them.  System attributes don't have
+	 * interesting defaults, either.
+	 */
+	if (RELKIND_HAS_STORAGE(relkind) && attribute.attnum > 0)
+	{
+		/*
+		 * For an identity column, we can't use build_column_default(),
+		 * because the sequence ownership isn't set yet.  So do it manually.
+		 */
+		if (colDef->identity)
+		{
+			NextValueExpr *nve = makeNode(NextValueExpr);
+
+			nve->seqid = RangeVarGetRelid(colDef->identitySequence, NoLock, false);
+			nve->typeId = typeOid;
+
+			defval = (Expr *) nve;
+
+			/* must do a rewrite for identity columns */
+			tab->rewrite |= AT_REWRITE_DEFAULT_VAL;
+		}
+		else
+			defval = (Expr *) build_column_default(rel, attribute.attnum);
+
+		if (!defval && DomainHasConstraints(typeOid))
+		{
+			Oid			baseTypeId;
+			int32		baseTypeMod;
+			Oid			baseTypeColl;
+
+			baseTypeMod = typmod;
+			baseTypeId = getBaseTypeAndTypmod(typeOid, &baseTypeMod);
+			baseTypeColl = get_typcollation(baseTypeId);
+			defval = (Expr *) makeNullConst(baseTypeId, baseTypeMod, baseTypeColl);
+			defval = (Expr *) coerce_to_target_type(NULL,
+													(Node *) defval,
+													baseTypeId,
+													typeOid,
+													typmod,
+													COERCION_ASSIGNMENT,
+													COERCE_IMPLICIT_CAST,
+													-1);
+			if (defval == NULL) /* should not happen */
+				elog(ERROR, "failed to coerce base type to domain");
+		}
+
+		if (defval)
+		{
+			NewColumnValue *newval;
+
+			newval = (NewColumnValue *) palloc0(sizeof(NewColumnValue));
+			newval->attnum = attribute.attnum;
+			newval->expr = expression_planner(defval);
+			newval->is_generated = (colDef->generated != '\0');
+
+			tab->newvals = lappend(tab->newvals, newval);
+		}
+
+		if (DomainHasConstraints(typeOid))
+			tab->rewrite |= AT_REWRITE_DEFAULT_VAL;
+
+		if (!TupleDescAttr(rel->rd_att, attribute.attnum - 1)->atthasmissing)
+		{
+			/*
+			 * If the new column is NOT NULL, and there is no missing value,
+			 * tell Phase 3 it needs to check for NULLs.
+			 */
+			tab->verify_new_notnull |= colDef->is_not_null;
+		}
+	}
+
+	/*
+	 * Add needed dependency entries for the new column.
+	 */
+	add_column_datatype_dependency(myrelid, newattnum, attribute.atttypid);
+	add_column_collation_dependency(myrelid, newattnum, attribute.attcollation);
+
+	/*
+	 * Propagate to children as appropriate.  Unlike most other ALTER
+	 * routines, we have to do this one level of recursion at a time; we can't
+	 * use find_all_inheritors to do it in one pass.
+	 */
+	children =
+		find_inheritance_children(RelationGetRelid(rel), lockmode);
+
+	/*
+	 * If we are told not to recurse, there had better not be any child
+	 * tables; else the addition would put them out of step.
+	 */
+	if (children && !recurse)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("column must be added to child tables too")));
+
+	/* Children should see column as singly inherited */
+	if (!recursing)
+	{
+		childcmd = copyObject(*cmd);
+		colDef = castNode(ColumnDef, childcmd->def);
+		colDef->inhcount = 1;
+		colDef->is_local = false;
+	}
+	else
+		childcmd = *cmd;		/* no need to copy again */
+
+	foreach(child, children)
+	{
+		Oid			childrelid = lfirst_oid(child);
+		Relation	childrel;
+		AlteredTableInfo *childtab;
+
+		/* find_inheritance_children already got lock */
+		childrel = table_open(childrelid, NoLock);
+		CheckTableNotInUse(childrel, "ALTER TABLE");
+
+		/* Find or create work queue entry for this table */
+		childtab = ATGetQueueEntry(wqueue, childrel);
+
+		/* Recurse to child; return value is ignored */
+		ATExecAddColumn(wqueue, childtab, childrel,
+						&childcmd, recurse, true,
+						lockmode, cur_pass, context);
+
+		table_close(childrel, NoLock);
+	}
+
+	ObjectAddressSubSet(address, RelationRelationId, myrelid, newattnum);
+	return address;
+}
+
+/*
+ * If a new or renamed column will collide with the name of an existing
+ * column and if_not_exists is false then error out, else do nothing.
+ */
+static bool
+check_for_column_name_collision(Relation rel, const char *colname,
+								bool if_not_exists)
+{
+	HeapTuple	attTuple;
+	int			attnum;
+
+	/*
+	 * this test is deliberately not attisdropped-aware, since if one tries to
+	 * add a column matching a dropped column name, it's gonna fail anyway.
+	 */
+	attTuple = SearchSysCache2(ATTNAME,
+							   ObjectIdGetDatum(RelationGetRelid(rel)),
+							   PointerGetDatum(colname));
+	if (!HeapTupleIsValid(attTuple))
+		return true;
+
+	attnum = ((Form_pg_attribute) GETSTRUCT(attTuple))->attnum;
+	ReleaseSysCache(attTuple);
+
+	/*
+	 * We throw a different error message for conflicts with system column
+	 * names, since they are normally not shown and the user might otherwise
+	 * be confused about the reason for the conflict.
+	 */
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_COLUMN),
+				 errmsg("column name \"%s\" conflicts with a system column name",
+						colname)));
+	else
+	{
+		if (if_not_exists)
+		{
+			ereport(NOTICE,
+					(errcode(ERRCODE_DUPLICATE_COLUMN),
+					 errmsg("column \"%s\" of relation \"%s\" already exists, skipping",
+							colname, RelationGetRelationName(rel))));
+			return false;
+		}
+
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" already exists",
+						colname, RelationGetRelationName(rel))));
+	}
+
+	return true;
+}
+
+/*
+ * Install a column's dependency on its datatype.
+ */
+static void
+add_column_datatype_dependency(Oid relid, int32 attnum, Oid typid)
+{
+	ObjectAddress myself,
+				referenced;
+
+	myself.classId = RelationRelationId;
+	myself.objectId = relid;
+	myself.objectSubId = attnum;
+	referenced.classId = TypeRelationId;
+	referenced.objectId = typid;
+	referenced.objectSubId = 0;
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+}
+
+/*
+ * Install a column's dependency on its collation.
+ */
+static void
+add_column_collation_dependency(Oid relid, int32 attnum, Oid collid)
+{
+	ObjectAddress myself,
+				referenced;
+
+	/* We know the default collation is pinned, so don't bother recording it */
+	if (OidIsValid(collid) && collid != DEFAULT_COLLATION_OID)
+	{
+		myself.classId = RelationRelationId;
+		myself.objectId = relid;
+		myself.objectSubId = attnum;
+		referenced.classId = CollationRelationId;
+		referenced.objectId = collid;
+		referenced.objectSubId = 0;
+		recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+	}
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN DROP NOT NULL
+ */
+
+static void
+ATPrepDropNotNull(Relation rel, bool recurse, bool recursing)
+{
+	/*
+	 * If the parent is a partitioned table, like check constraints, we do not
+	 * support removing the NOT NULL while partitions exist.
+	 */
+	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		PartitionDesc partdesc = RelationGetPartitionDesc(rel, true);
+
+		Assert(partdesc != NULL);
+		if (partdesc->nparts > 0 && !recurse && !recursing)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot remove constraint from only the partitioned table when partitions exist"),
+					 errhint("Do not specify the ONLY keyword.")));
+	}
+}
+
+/*
+ * Return the address of the modified column.  If the column was already
+ * nullable, InvalidObjectAddress is returned.
+ */
+static ObjectAddress
+ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode)
+{
+	HeapTuple	tuple;
+	Form_pg_attribute attTup;
+	AttrNumber	attnum;
+	Relation	attr_rel;
+	List	   *indexoidlist;
+	ListCell   *indexoidscan;
+	ObjectAddress address;
+
+	/*
+	 * lookup the attribute
+	 */
+	attr_rel = table_open(AttributeRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+	attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = attTup->attnum;
+
+	/* Prevent them from altering a system attribute */
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	if (attTup->attidentity)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("column \"%s\" of relation \"%s\" is an identity column",
+						colName, RelationGetRelationName(rel))));
+
+	/*
+	 * Check that the attribute is not in a primary key or in an index used as
+	 * a replica identity.
+	 *
+	 * Note: we'll throw error even if the pkey index is not valid.
+	 */
+
+	/* Loop over all indexes on the relation */
+	indexoidlist = RelationGetIndexList(rel);
+
+	foreach(indexoidscan, indexoidlist)
+	{
+		Oid			indexoid = lfirst_oid(indexoidscan);
+		HeapTuple	indexTuple;
+		Form_pg_index indexStruct;
+		int			i;
+
+		indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
+		if (!HeapTupleIsValid(indexTuple))
+			elog(ERROR, "cache lookup failed for index %u", indexoid);
+		indexStruct = (Form_pg_index) GETSTRUCT(indexTuple);
+
+		/*
+		 * If the index is not a primary key or an index used as replica
+		 * identity, skip the check.
+		 */
+		if (indexStruct->indisprimary || indexStruct->indisreplident)
+		{
+			/*
+			 * Loop over each attribute in the primary key or the index used
+			 * as replica identity and see if it matches the to-be-altered
+			 * attribute.
+			 */
+			for (i = 0; i < indexStruct->indnkeyatts; i++)
+			{
+				if (indexStruct->indkey.values[i] == attnum)
+				{
+					if (indexStruct->indisprimary)
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+								 errmsg("column \"%s\" is in a primary key",
+										colName)));
+					else
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+								 errmsg("column \"%s\" is in index used as replica identity",
+										colName)));
+				}
+			}
+		}
+
+		ReleaseSysCache(indexTuple);
+	}
+
+	list_free(indexoidlist);
+
+	/* If rel is partition, shouldn't drop NOT NULL if parent has the same */
+	if (rel->rd_rel->relispartition)
+	{
+		Oid			parentId = get_partition_parent(RelationGetRelid(rel), false);
+		Relation	parent = table_open(parentId, AccessShareLock);
+		TupleDesc	tupDesc = RelationGetDescr(parent);
+		AttrNumber	parent_attnum;
+
+		parent_attnum = get_attnum(parentId, colName);
+		if (TupleDescAttr(tupDesc, parent_attnum - 1)->attnotnull)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("column \"%s\" is marked NOT NULL in parent table",
+							colName)));
+		table_close(parent, AccessShareLock);
+	}
+
+	/*
+	 * Okay, actually perform the catalog change ... if needed
+	 */
+	if (attTup->attnotnull)
+	{
+		attTup->attnotnull = false;
+
+		CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple);
+
+		ObjectAddressSubSet(address, RelationRelationId,
+							RelationGetRelid(rel), attnum);
+	}
+	else
+		address = InvalidObjectAddress;
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel), attnum);
+
+	table_close(attr_rel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET NOT NULL
+ */
+
+static void
+ATPrepSetNotNull(List **wqueue, Relation rel,
+				 AlterTableCmd *cmd, bool recurse, bool recursing,
+				 LOCKMODE lockmode, AlterTableUtilityContext *context)
+{
+	/*
+	 * If we're already recursing, there's nothing to do; the topmost
+	 * invocation of ATSimpleRecursion already visited all children.
+	 */
+	if (recursing)
+		return;
+
+	/*
+	 * If the target column is already marked NOT NULL, we can skip recursing
+	 * to children, because their columns should already be marked NOT NULL as
+	 * well.  But there's no point in checking here unless the relation has
+	 * some children; else we can just wait till execution to check.  (If it
+	 * does have children, however, this can save taking per-child locks
+	 * unnecessarily.  This greatly improves concurrency in some parallel
+	 * restore scenarios.)
+	 *
+	 * Unfortunately, we can only apply this optimization to partitioned
+	 * tables, because traditional inheritance doesn't enforce that child
+	 * columns be NOT NULL when their parent is.  (That's a bug that should
+	 * get fixed someday.)
+	 */
+	if (rel->rd_rel->relhassubclass &&
+		rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		HeapTuple	tuple;
+		bool		attnotnull;
+
+		tuple = SearchSysCacheAttName(RelationGetRelid(rel), cmd->name);
+
+		/* Might as well throw the error now, if name is bad */
+		if (!HeapTupleIsValid(tuple))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column \"%s\" of relation \"%s\" does not exist",
+							cmd->name, RelationGetRelationName(rel))));
+
+		attnotnull = ((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull;
+		ReleaseSysCache(tuple);
+		if (attnotnull)
+			return;
+	}
+
+	/*
+	 * If we have ALTER TABLE ONLY ... SET NOT NULL on a partitioned table,
+	 * apply ALTER TABLE ... CHECK NOT NULL to every child.  Otherwise, use
+	 * normal recursion logic.
+	 */
+	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+		!recurse)
+	{
+		AlterTableCmd *newcmd = makeNode(AlterTableCmd);
+
+		newcmd->subtype = AT_CheckNotNull;
+		newcmd->name = pstrdup(cmd->name);
+		ATSimpleRecursion(wqueue, rel, newcmd, true, lockmode, context);
+	}
+	else
+		ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+}
+
+/*
+ * Return the address of the modified column.  If the column was already NOT
+ * NULL, InvalidObjectAddress is returned.
+ */
+static ObjectAddress
+ATExecSetNotNull(AlteredTableInfo *tab, Relation rel,
+				 const char *colName, LOCKMODE lockmode)
+{
+	HeapTuple	tuple;
+	AttrNumber	attnum;
+	Relation	attr_rel;
+	ObjectAddress address;
+
+	/*
+	 * lookup the attribute
+	 */
+	attr_rel = table_open(AttributeRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+
+	attnum = ((Form_pg_attribute) GETSTRUCT(tuple))->attnum;
+
+	/* Prevent them from altering a system attribute */
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	/*
+	 * Okay, actually perform the catalog change ... if needed
+	 */
+	if (!((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull)
+	{
+		((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull = true;
+
+		CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple);
+
+		/*
+		 * Ordinarily phase 3 must ensure that no NULLs exist in columns that
+		 * are set NOT NULL; however, if we can find a constraint which proves
+		 * this then we can skip that.  We needn't bother looking if we've
+		 * already found that we must verify some other NOT NULL constraint.
+		 */
+		if (!tab->verify_new_notnull &&
+			!NotNullImpliedByRelConstraints(rel, (Form_pg_attribute) GETSTRUCT(tuple)))
+		{
+			/* Tell Phase 3 it needs to test the constraint */
+			tab->verify_new_notnull = true;
+		}
+
+		ObjectAddressSubSet(address, RelationRelationId,
+							RelationGetRelid(rel), attnum);
+	}
+	else
+		address = InvalidObjectAddress;
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel), attnum);
+
+	table_close(attr_rel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN CHECK NOT NULL
+ *
+ * This doesn't exist in the grammar, but we generate AT_CheckNotNull
+ * commands against the partitions of a partitioned table if the user
+ * writes ALTER TABLE ONLY ... SET NOT NULL on the partitioned table,
+ * or tries to create a primary key on it (which internally creates
+ * AT_SetNotNull on the partitioned table).   Such a command doesn't
+ * allow us to actually modify any partition, but we want to let it
+ * go through if the partitions are already properly marked.
+ *
+ * In future, this might need to adjust the child table's state, likely
+ * by incrementing an inheritance count for the attnotnull constraint.
+ * For now we need only check for the presence of the flag.
+ */
+static void
+ATExecCheckNotNull(AlteredTableInfo *tab, Relation rel,
+				   const char *colName, LOCKMODE lockmode)
+{
+	HeapTuple	tuple;
+
+	tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName);
+
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+
+	if (!((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("constraint must be added to child tables too"),
+				 errdetail("Column \"%s\" of relation \"%s\" is not already NOT NULL.",
+						   colName, RelationGetRelationName(rel)),
+				 errhint("Do not specify the ONLY keyword.")));
+
+	ReleaseSysCache(tuple);
+}
+
+/*
+ * NotNullImpliedByRelConstraints
+ *		Does rel's existing constraints imply NOT NULL for the given attribute?
+ */
+static bool
+NotNullImpliedByRelConstraints(Relation rel, Form_pg_attribute attr)
+{
+	NullTest   *nnulltest = makeNode(NullTest);
+
+	nnulltest->arg = (Expr *) makeVar(1,
+									  attr->attnum,
+									  attr->atttypid,
+									  attr->atttypmod,
+									  attr->attcollation,
+									  0);
+	nnulltest->nulltesttype = IS_NOT_NULL;
+
+	/*
+	 * argisrow = false is correct even for a composite column, because
+	 * attnotnull does not represent a SQL-spec IS NOT NULL test in such a
+	 * case, just IS DISTINCT FROM NULL.
+	 */
+	nnulltest->argisrow = false;
+	nnulltest->location = -1;
+
+	if (ConstraintImpliedByRelConstraint(rel, list_make1(nnulltest), NIL))
+	{
+		ereport(DEBUG1,
+				(errmsg_internal("existing constraints on column \"%s.%s\" are sufficient to prove that it does not contain nulls",
+								 RelationGetRelationName(rel), NameStr(attr->attname))));
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET/DROP DEFAULT
+ *
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecColumnDefault(Relation rel, const char *colName,
+					Node *newDefault, LOCKMODE lockmode)
+{
+	TupleDesc	tupdesc = RelationGetDescr(rel);
+	AttrNumber	attnum;
+	ObjectAddress address;
+
+	/*
+	 * get the number of the attribute
+	 */
+	attnum = get_attnum(RelationGetRelid(rel), colName);
+	if (attnum == InvalidAttrNumber)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+
+	/* Prevent them from altering a system attribute */
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	if (TupleDescAttr(tupdesc, attnum - 1)->attidentity)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("column \"%s\" of relation \"%s\" is an identity column",
+						colName, RelationGetRelationName(rel)),
+				 newDefault ? 0 : errhint("Use ALTER TABLE ... ALTER COLUMN ... DROP IDENTITY instead.")));
+
+	if (TupleDescAttr(tupdesc, attnum - 1)->attgenerated)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("column \"%s\" of relation \"%s\" is a generated column",
+						colName, RelationGetRelationName(rel)),
+				 newDefault || TupleDescAttr(tupdesc, attnum - 1)->attgenerated != ATTRIBUTE_GENERATED_STORED ? 0 :
+				 errhint("Use ALTER TABLE ... ALTER COLUMN ... DROP EXPRESSION instead.")));
+
+	/*
+	 * Remove any old default for the column.  We use RESTRICT here for
+	 * safety, but at present we do not expect anything to depend on the
+	 * default.
+	 *
+	 * We treat removing the existing default as an internal operation when it
+	 * is preparatory to adding a new default, but as a user-initiated
+	 * operation when the user asked for a drop.
+	 */
+	RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT, false,
+					  newDefault != NULL);
+
+	if (newDefault)
+	{
+		/* SET DEFAULT */
+		RawColumnDefault *rawEnt;
+
+		rawEnt = (RawColumnDefault *) palloc(sizeof(RawColumnDefault));
+		rawEnt->attnum = attnum;
+		rawEnt->raw_default = newDefault;
+		rawEnt->missingMode = false;
+		rawEnt->generated = '\0';
+
+		/*
+		 * This function is intended for CREATE TABLE, so it processes a
+		 * _list_ of defaults, but we just do one.
+		 */
+		AddRelationNewConstraints(rel, list_make1(rawEnt), NIL,
+								  false, true, false, NULL);
+	}
+
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	return address;
+}
+
+/*
+ * Add a pre-cooked default expression.
+ *
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecCookedColumnDefault(Relation rel, AttrNumber attnum,
+						  Node *newDefault)
+{
+	ObjectAddress address;
+
+	/* We assume no checking is required */
+
+	/*
+	 * Remove any old default for the column.  We use RESTRICT here for
+	 * safety, but at present we do not expect anything to depend on the
+	 * default.  (In ordinary cases, there could not be a default in place
+	 * anyway, but it's possible when combining LIKE with inheritance.)
+	 */
+	RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT, false,
+					  true);
+
+	(void) StoreAttrDefault(rel, attnum, newDefault, true, false);
+
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN ADD IDENTITY
+ *
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecAddIdentity(Relation rel, const char *colName,
+				  Node *def, LOCKMODE lockmode)
+{
+	Relation	attrelation;
+	HeapTuple	tuple;
+	Form_pg_attribute attTup;
+	AttrNumber	attnum;
+	ObjectAddress address;
+	ColumnDef  *cdef = castNode(ColumnDef, def);
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+	attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = attTup->attnum;
+
+	/* Can't alter a system attribute */
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	/*
+	 * Creating a column as identity implies NOT NULL, so adding the identity
+	 * to an existing column that is not NOT NULL would create a state that
+	 * cannot be reproduced without contortions.
+	 */
+	if (!attTup->attnotnull)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("column \"%s\" of relation \"%s\" must be declared NOT NULL before identity can be added",
+						colName, RelationGetRelationName(rel))));
+
+	if (attTup->attidentity)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("column \"%s\" of relation \"%s\" is already an identity column",
+						colName, RelationGetRelationName(rel))));
+
+	if (attTup->atthasdef)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("column \"%s\" of relation \"%s\" already has a default value",
+						colName, RelationGetRelationName(rel))));
+
+	attTup->attidentity = cdef->identity;
+	CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  attTup->attnum);
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	heap_freetuple(tuple);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET { GENERATED or sequence options }
+ *
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecSetIdentity(Relation rel, const char *colName, Node *def, LOCKMODE lockmode)
+{
+	ListCell   *option;
+	DefElem    *generatedEl = NULL;
+	HeapTuple	tuple;
+	Form_pg_attribute attTup;
+	AttrNumber	attnum;
+	Relation	attrelation;
+	ObjectAddress address;
+
+	foreach(option, castNode(List, def))
+	{
+		DefElem    *defel = lfirst_node(DefElem, option);
+
+		if (strcmp(defel->defname, "generated") == 0)
+		{
+			if (generatedEl)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("conflicting or redundant options")));
+			generatedEl = defel;
+		}
+		else
+			elog(ERROR, "option \"%s\" not recognized",
+				 defel->defname);
+	}
+
+	/*
+	 * Even if there is nothing to change here, we run all the checks.  There
+	 * will be a subsequent ALTER SEQUENCE that relies on everything being
+	 * there.
+	 */
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+
+	attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = attTup->attnum;
+
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	if (!attTup->attidentity)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("column \"%s\" of relation \"%s\" is not an identity column",
+						colName, RelationGetRelationName(rel))));
+
+	if (generatedEl)
+	{
+		attTup->attidentity = defGetInt32(generatedEl);
+		CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+		InvokeObjectPostAlterHook(RelationRelationId,
+								  RelationGetRelid(rel),
+								  attTup->attnum);
+		ObjectAddressSubSet(address, RelationRelationId,
+							RelationGetRelid(rel), attnum);
+	}
+	else
+		address = InvalidObjectAddress;
+
+	heap_freetuple(tuple);
+	table_close(attrelation, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN DROP IDENTITY
+ *
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecDropIdentity(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode)
+{
+	HeapTuple	tuple;
+	Form_pg_attribute attTup;
+	AttrNumber	attnum;
+	Relation	attrelation;
+	ObjectAddress address;
+	Oid			seqid;
+	ObjectAddress seqaddress;
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+
+	attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = attTup->attnum;
+
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	if (!attTup->attidentity)
+	{
+		if (!missing_ok)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("column \"%s\" of relation \"%s\" is not an identity column",
+							colName, RelationGetRelationName(rel))));
+		else
+		{
+			ereport(NOTICE,
+					(errmsg("column \"%s\" of relation \"%s\" is not an identity column, skipping",
+							colName, RelationGetRelationName(rel))));
+			heap_freetuple(tuple);
+			table_close(attrelation, RowExclusiveLock);
+			return InvalidObjectAddress;
+		}
+	}
+
+	attTup->attidentity = '\0';
+	CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  attTup->attnum);
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	heap_freetuple(tuple);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	/* drop the internal sequence */
+	seqid = getIdentitySequence(RelationGetRelid(rel), attnum, false);
+	deleteDependencyRecordsForClass(RelationRelationId, seqid,
+									RelationRelationId, DEPENDENCY_INTERNAL);
+	CommandCounterIncrement();
+	seqaddress.classId = RelationRelationId;
+	seqaddress.objectId = seqid;
+	seqaddress.objectSubId = 0;
+	performDeletion(&seqaddress, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN DROP EXPRESSION
+ */
+static void
+ATPrepDropExpression(Relation rel, AlterTableCmd *cmd, bool recurse, bool recursing, LOCKMODE lockmode)
+{
+	/*
+	 * Reject ONLY if there are child tables.  We could implement this, but it
+	 * is a bit complicated.  GENERATED clauses must be attached to the column
+	 * definition and cannot be added later like DEFAULT, so if a child table
+	 * has a generation expression that the parent does not have, the child
+	 * column will necessarily be an attlocal column.  So to implement ONLY
+	 * here, we'd need extra code to update attislocal of the direct child
+	 * tables, somewhat similar to how DROP COLUMN does it, so that the
+	 * resulting state can be properly dumped and restored.
+	 */
+	if (!recurse &&
+		find_inheritance_children(RelationGetRelid(rel), lockmode))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("ALTER TABLE / DROP EXPRESSION must be applied to child tables too")));
+
+	/*
+	 * Cannot drop generation expression from inherited columns.
+	 */
+	if (!recursing)
+	{
+		HeapTuple	tuple;
+		Form_pg_attribute attTup;
+
+		tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), cmd->name);
+		if (!HeapTupleIsValid(tuple))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column \"%s\" of relation \"%s\" does not exist",
+							cmd->name, RelationGetRelationName(rel))));
+
+		attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+
+		if (attTup->attinhcount > 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot drop generation expression from inherited column")));
+	}
+}
+
+/*
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecDropExpression(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode)
+{
+	HeapTuple	tuple;
+	Form_pg_attribute attTup;
+	AttrNumber	attnum;
+	Relation	attrelation;
+	Oid			attrdefoid;
+	ObjectAddress address;
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+
+	attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = attTup->attnum;
+
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	if (attTup->attgenerated != ATTRIBUTE_GENERATED_STORED)
+	{
+		if (!missing_ok)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("column \"%s\" of relation \"%s\" is not a stored generated column",
+							colName, RelationGetRelationName(rel))));
+		else
+		{
+			ereport(NOTICE,
+					(errmsg("column \"%s\" of relation \"%s\" is not a stored generated column, skipping",
+							colName, RelationGetRelationName(rel))));
+			heap_freetuple(tuple);
+			table_close(attrelation, RowExclusiveLock);
+			return InvalidObjectAddress;
+		}
+	}
+
+	/*
+	 * Mark the column as no longer generated.  (The atthasdef flag needs to
+	 * get cleared too, but RemoveAttrDefault will handle that.)
+	 */
+	attTup->attgenerated = '\0';
+	CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  attnum);
+	heap_freetuple(tuple);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	/*
+	 * Drop the dependency records of the GENERATED expression, in particular
+	 * its INTERNAL dependency on the column, which would otherwise cause
+	 * dependency.c to refuse to perform the deletion.
+	 */
+	attrdefoid = GetAttrDefaultOid(RelationGetRelid(rel), attnum);
+	if (!OidIsValid(attrdefoid))
+		elog(ERROR, "could not find attrdef tuple for relation %u attnum %d",
+			 RelationGetRelid(rel), attnum);
+	(void) deleteDependencyRecordsFor(AttrDefaultRelationId, attrdefoid, false);
+
+	/* Make above changes visible */
+	CommandCounterIncrement();
+
+	/*
+	 * Get rid of the GENERATED expression itself.  We use RESTRICT here for
+	 * safety, but at present we do not expect anything to depend on the
+	 * default.
+	 */
+	RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT,
+					  false, false);
+
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET STATISTICS
+ *
+ * Return value is the address of the modified column
+ */
+static ObjectAddress
+ATExecSetStatistics(Relation rel, const char *colName, int16 colNum, Node *newValue, LOCKMODE lockmode)
+{
+	int			newtarget;
+	Relation	attrelation;
+	HeapTuple	tuple;
+	Form_pg_attribute attrtuple;
+	AttrNumber	attnum;
+	ObjectAddress address;
+
+	/*
+	 * We allow referencing columns by numbers only for indexes, since table
+	 * column numbers could contain gaps if columns are later dropped.
+	 */
+	if (rel->rd_rel->relkind != RELKIND_INDEX &&
+		rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX &&
+		!colName)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot refer to non-index column by number")));
+
+	Assert(IsA(newValue, Integer));
+	newtarget = intVal(newValue);
+
+	/*
+	 * Limit target to a sane range
+	 */
+	if (newtarget < -1)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("statistics target %d is too low",
+						newtarget)));
+	}
+	else if (newtarget > 10000)
+	{
+		newtarget = 10000;
+		ereport(WARNING,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("lowering statistics target to %d",
+						newtarget)));
+	}
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+	if (colName)
+	{
+		tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+
+		if (!HeapTupleIsValid(tuple))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column \"%s\" of relation \"%s\" does not exist",
+							colName, RelationGetRelationName(rel))));
+	}
+	else
+	{
+		tuple = SearchSysCacheCopyAttNum(RelationGetRelid(rel), colNum);
+
+		if (!HeapTupleIsValid(tuple))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column number %d of relation \"%s\" does not exist",
+							colNum, RelationGetRelationName(rel))));
+	}
+
+	attrtuple = (Form_pg_attribute) GETSTRUCT(tuple);
+
+	attnum = attrtuple->attnum;
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	if (rel->rd_rel->relkind == RELKIND_INDEX ||
+		rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
+	{
+		if (attnum > rel->rd_index->indnkeyatts)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot alter statistics on included column \"%s\" of index \"%s\"",
+							NameStr(attrtuple->attname), RelationGetRelationName(rel))));
+		else if (rel->rd_index->indkey.values[attnum - 1] != 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot alter statistics on non-expression column \"%s\" of index \"%s\"",
+							NameStr(attrtuple->attname), RelationGetRelationName(rel)),
+					 errhint("Alter statistics on table column instead.")));
+	}
+
+	attrtuple->attstattarget = newtarget;
+
+	CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  attrtuple->attnum);
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	heap_freetuple(tuple);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * Return value is the address of the modified column
+ */
+static ObjectAddress
+ATExecSetOptions(Relation rel, const char *colName, Node *options,
+				 bool isReset, LOCKMODE lockmode)
+{
+	Relation	attrelation;
+	HeapTuple	tuple,
+				newtuple;
+	Form_pg_attribute attrtuple;
+	AttrNumber	attnum;
+	Datum		datum,
+				newOptions;
+	bool		isnull;
+	ObjectAddress address;
+	Datum		repl_val[Natts_pg_attribute];
+	bool		repl_null[Natts_pg_attribute];
+	bool		repl_repl[Natts_pg_attribute];
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName);
+
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+	attrtuple = (Form_pg_attribute) GETSTRUCT(tuple);
+
+	attnum = attrtuple->attnum;
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	/* Generate new proposed attoptions (text array) */
+	datum = SysCacheGetAttr(ATTNAME, tuple, Anum_pg_attribute_attoptions,
+							&isnull);
+	newOptions = transformRelOptions(isnull ? (Datum) 0 : datum,
+									 castNode(List, options), NULL, NULL,
+									 false, isReset);
+	/* Validate new options */
+	(void) attribute_reloptions(newOptions, true);
+
+	/* Build new tuple. */
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+	if (newOptions != (Datum) 0)
+		repl_val[Anum_pg_attribute_attoptions - 1] = newOptions;
+	else
+		repl_null[Anum_pg_attribute_attoptions - 1] = true;
+	repl_repl[Anum_pg_attribute_attoptions - 1] = true;
+	newtuple = heap_modify_tuple(tuple, RelationGetDescr(attrelation),
+								 repl_val, repl_null, repl_repl);
+
+	/* Update system catalog. */
+	CatalogTupleUpdate(attrelation, &newtuple->t_self, newtuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  attrtuple->attnum);
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+
+	heap_freetuple(newtuple);
+
+	ReleaseSysCache(tuple);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * Helper function for ATExecSetStorage and ATExecSetCompression
+ *
+ * Set the attstorage and/or attcompression fields for index columns
+ * associated with the specified table column.
+ */
+static void
+SetIndexStorageProperties(Relation rel, Relation attrelation,
+						  AttrNumber attnum,
+						  bool setstorage, char newstorage,
+						  bool setcompression, char newcompression,
+						  LOCKMODE lockmode)
+{
+	ListCell   *lc;
+
+	foreach(lc, RelationGetIndexList(rel))
+	{
+		Oid			indexoid = lfirst_oid(lc);
+		Relation	indrel;
+		AttrNumber	indattnum = 0;
+		HeapTuple	tuple;
+
+		indrel = index_open(indexoid, lockmode);
+
+		for (int i = 0; i < indrel->rd_index->indnatts; i++)
+		{
+			if (indrel->rd_index->indkey.values[i] == attnum)
+			{
+				indattnum = i + 1;
+				break;
+			}
+		}
+
+		if (indattnum == 0)
+		{
+			index_close(indrel, lockmode);
+			continue;
+		}
+
+		tuple = SearchSysCacheCopyAttNum(RelationGetRelid(indrel), indattnum);
+
+		if (HeapTupleIsValid(tuple))
+		{
+			Form_pg_attribute attrtuple = (Form_pg_attribute) GETSTRUCT(tuple);
+
+			if (setstorage)
+				attrtuple->attstorage = newstorage;
+
+			if (setcompression)
+				attrtuple->attcompression = newcompression;
+
+			CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+			InvokeObjectPostAlterHook(RelationRelationId,
+									  RelationGetRelid(rel),
+									  attrtuple->attnum);
+
+			heap_freetuple(tuple);
+		}
+
+		index_close(indrel, lockmode);
+	}
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET STORAGE
+ *
+ * Return value is the address of the modified column
+ */
+static ObjectAddress
+ATExecSetStorage(Relation rel, const char *colName, Node *newValue, LOCKMODE lockmode)
+{
+	char	   *storagemode;
+	char		newstorage;
+	Relation	attrelation;
+	HeapTuple	tuple;
+	Form_pg_attribute attrtuple;
+	AttrNumber	attnum;
+	ObjectAddress address;
+
+	Assert(IsA(newValue, String));
+	storagemode = strVal(newValue);
+
+	if (pg_strcasecmp(storagemode, "plain") == 0)
+		newstorage = TYPSTORAGE_PLAIN;
+	else if (pg_strcasecmp(storagemode, "external") == 0)
+		newstorage = TYPSTORAGE_EXTERNAL;
+	else if (pg_strcasecmp(storagemode, "extended") == 0)
+		newstorage = TYPSTORAGE_EXTENDED;
+	else if (pg_strcasecmp(storagemode, "main") == 0)
+		newstorage = TYPSTORAGE_MAIN;
+	else
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid storage type \"%s\"",
+						storagemode)));
+		newstorage = 0;			/* keep compiler quiet */
+	}
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+	attrtuple = (Form_pg_attribute) GETSTRUCT(tuple);
+
+	attnum = attrtuple->attnum;
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	/*
+	 * safety check: do not allow toasted storage modes unless column datatype
+	 * is TOAST-aware.
+	 */
+	if (newstorage == TYPSTORAGE_PLAIN || TypeIsToastable(attrtuple->atttypid))
+		attrtuple->attstorage = newstorage;
+	else
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("column data type %s can only have storage PLAIN",
+						format_type_be(attrtuple->atttypid))));
+
+	CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  attrtuple->attnum);
+
+	heap_freetuple(tuple);
+
+	/*
+	 * Apply the change to indexes as well (only for simple index columns,
+	 * matching behavior of index.c ConstructTupleDescriptor()).
+	 */
+	SetIndexStorageProperties(rel, attrelation, attnum,
+							  true, newstorage,
+							  false, 0,
+							  lockmode);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	return address;
+}
+
+
+/*
+ * ALTER TABLE DROP COLUMN
+ *
+ * DROP COLUMN cannot use the normal ALTER TABLE recursion mechanism,
+ * because we have to decide at runtime whether to recurse or not depending
+ * on whether attinhcount goes to zero or not.  (We can't check this in a
+ * static pre-pass because it won't handle multiple inheritance situations
+ * correctly.)
+ */
+static void
+ATPrepDropColumn(List **wqueue, Relation rel, bool recurse, bool recursing,
+				 AlterTableCmd *cmd, LOCKMODE lockmode,
+				 AlterTableUtilityContext *context)
+{
+	if (rel->rd_rel->reloftype && !recursing)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot drop column from typed table")));
+
+	if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+		ATTypedTableRecursion(wqueue, rel, cmd, lockmode, context);
+
+	if (recurse)
+		cmd->subtype = AT_DropColumnRecurse;
+}
+
+/*
+ * Drops column 'colName' from relation 'rel' and returns the address of the
+ * dropped column.  The column is also dropped (or marked as no longer
+ * inherited from relation) from the relation's inheritance children, if any.
+ *
+ * In the recursive invocations for inheritance child relations, instead of
+ * dropping the column directly (if to be dropped at all), its object address
+ * is added to 'addrs', which must be non-NULL in such invocations.  All
+ * columns are dropped at the same time after all the children have been
+ * checked recursively.
+ */
+static ObjectAddress
+ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
+				 DropBehavior behavior,
+				 bool recurse, bool recursing,
+				 bool missing_ok, LOCKMODE lockmode,
+				 ObjectAddresses *addrs)
+{
+	HeapTuple	tuple;
+	Form_pg_attribute targetatt;
+	AttrNumber	attnum;
+	List	   *children;
+	ObjectAddress object;
+	bool		is_expr;
+
+	/* At top level, permission check was done in ATPrepCmd, else do it */
+	if (recursing)
+		ATSimplePermissions(AT_DropColumn, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+	/* Initialize addrs on the first invocation */
+	Assert(!recursing || addrs != NULL);
+	if (!recursing)
+		addrs = new_object_addresses();
+
+	/*
+	 * get the number of the attribute
+	 */
+	tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+	{
+		if (!missing_ok)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column \"%s\" of relation \"%s\" does not exist",
+							colName, RelationGetRelationName(rel))));
+		}
+		else
+		{
+			ereport(NOTICE,
+					(errmsg("column \"%s\" of relation \"%s\" does not exist, skipping",
+							colName, RelationGetRelationName(rel))));
+			return InvalidObjectAddress;
+		}
+	}
+	targetatt = (Form_pg_attribute) GETSTRUCT(tuple);
+
+	attnum = targetatt->attnum;
+
+	/* Can't drop a system attribute */
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot drop system column \"%s\"",
+						colName)));
+
+	/*
+	 * Don't drop inherited columns, unless recursing (presumably from a drop
+	 * of the parent column)
+	 */
+	if (targetatt->attinhcount > 0 && !recursing)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("cannot drop inherited column \"%s\"",
+						colName)));
+
+	/*
+	 * Don't drop columns used in the partition key, either.  (If we let this
+	 * go through, the key column's dependencies would cause a cascaded drop
+	 * of the whole table, which is surely not what the user expected.)
+	 */
+	if (has_partition_attrs(rel,
+							bms_make_singleton(attnum - FirstLowInvalidHeapAttributeNumber),
+							&is_expr))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("cannot drop column \"%s\" because it is part of the partition key of relation \"%s\"",
+						colName, RelationGetRelationName(rel))));
+
+	ReleaseSysCache(tuple);
+
+	/*
+	 * Propagate to children as appropriate.  Unlike most other ALTER
+	 * routines, we have to do this one level of recursion at a time; we can't
+	 * use find_all_inheritors to do it in one pass.
+	 */
+	children =
+		find_inheritance_children(RelationGetRelid(rel), lockmode);
+
+	if (children)
+	{
+		Relation	attr_rel;
+		ListCell   *child;
+
+		/*
+		 * In case of a partitioned table, the column must be dropped from the
+		 * partitions as well.
+		 */
+		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && !recurse)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot drop column from only the partitioned table when partitions exist"),
+					 errhint("Do not specify the ONLY keyword.")));
+
+		attr_rel = table_open(AttributeRelationId, RowExclusiveLock);
+		foreach(child, children)
+		{
+			Oid			childrelid = lfirst_oid(child);
+			Relation	childrel;
+			Form_pg_attribute childatt;
+
+			/* find_inheritance_children already got lock */
+			childrel = table_open(childrelid, NoLock);
+			CheckTableNotInUse(childrel, "ALTER TABLE");
+
+			tuple = SearchSysCacheCopyAttName(childrelid, colName);
+			if (!HeapTupleIsValid(tuple))	/* shouldn't happen */
+				elog(ERROR, "cache lookup failed for attribute \"%s\" of relation %u",
+					 colName, childrelid);
+			childatt = (Form_pg_attribute) GETSTRUCT(tuple);
+
+			if (childatt->attinhcount <= 0) /* shouldn't happen */
+				elog(ERROR, "relation %u has non-inherited attribute \"%s\"",
+					 childrelid, colName);
+
+			if (recurse)
+			{
+				/*
+				 * If the child column has other definition sources, just
+				 * decrement its inheritance count; if not, recurse to delete
+				 * it.
+				 */
+				if (childatt->attinhcount == 1 && !childatt->attislocal)
+				{
+					/* Time to delete this child column, too */
+					ATExecDropColumn(wqueue, childrel, colName,
+									 behavior, true, true,
+									 false, lockmode, addrs);
+				}
+				else
+				{
+					/* Child column must survive my deletion */
+					childatt->attinhcount--;
+
+					CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple);
+
+					/* Make update visible */
+					CommandCounterIncrement();
+				}
+			}
+			else
+			{
+				/*
+				 * If we were told to drop ONLY in this table (no recursion),
+				 * we need to mark the inheritors' attributes as locally
+				 * defined rather than inherited.
+				 */
+				childatt->attinhcount--;
+				childatt->attislocal = true;
+
+				CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple);
+
+				/* Make update visible */
+				CommandCounterIncrement();
+			}
+
+			heap_freetuple(tuple);
+
+			table_close(childrel, NoLock);
+		}
+		table_close(attr_rel, RowExclusiveLock);
+	}
+
+	/* Add object to delete */
+	object.classId = RelationRelationId;
+	object.objectId = RelationGetRelid(rel);
+	object.objectSubId = attnum;
+	add_exact_object_address(&object, addrs);
+
+	if (!recursing)
+	{
+		/* Recursion has ended, drop everything that was collected */
+		performMultipleDeletions(addrs, behavior, 0);
+		free_object_addresses(addrs);
+	}
+
+	return object;
+}
+
+/*
+ * ALTER TABLE ADD INDEX
+ *
+ * There is no such command in the grammar, but parse_utilcmd.c converts
+ * UNIQUE and PRIMARY KEY constraints into AT_AddIndex subcommands.  This lets
+ * us schedule creation of the index at the appropriate time during ALTER.
+ *
+ * Return value is the address of the new index.
+ */
+static ObjectAddress
+ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
+			   IndexStmt *stmt, bool is_rebuild, LOCKMODE lockmode)
+{
+	bool		check_rights;
+	bool		skip_build;
+	bool		quiet;
+	ObjectAddress address;
+
+	Assert(IsA(stmt, IndexStmt));
+	Assert(!stmt->concurrent);
+
+	/* The IndexStmt has already been through transformIndexStmt */
+	Assert(stmt->transformed);
+
+	/* suppress schema rights check when rebuilding existing index */
+	check_rights = !is_rebuild;
+	/* skip index build if phase 3 will do it or we're reusing an old one */
+	skip_build = tab->rewrite > 0 || OidIsValid(stmt->oldNode);
+	/* suppress notices when rebuilding existing index */
+	quiet = is_rebuild;
+
+	address = DefineIndex(RelationGetRelid(rel),
+						  stmt,
+						  InvalidOid,	/* no predefined OID */
+						  InvalidOid,	/* no parent index */
+						  InvalidOid,	/* no parent constraint */
+						  true, /* is_alter_table */
+						  check_rights,
+						  false,	/* check_not_in_use - we did it already */
+						  skip_build,
+						  quiet);
+
+	/*
+	 * If TryReuseIndex() stashed a relfilenode for us, we used it for the new
+	 * index instead of building from scratch.  Restore associated fields.
+	 * This may store InvalidSubTransactionId in both fields, in which case
+	 * relcache.c will assume it can rebuild the relcache entry.  Hence, do
+	 * this after the CCI that made catalog rows visible to any rebuild.  The
+	 * DROP of the old edition of this index will have scheduled the storage
+	 * for deletion at commit, so cancel that pending deletion.
+	 */
+	if (OidIsValid(stmt->oldNode))
+	{
+		Relation	irel = index_open(address.objectId, NoLock);
+
+		irel->rd_createSubid = stmt->oldCreateSubid;
+		irel->rd_firstRelfilenodeSubid = stmt->oldFirstRelfilenodeSubid;
+		RelationPreserveStorage(irel->rd_node, true);
+		index_close(irel, NoLock);
+	}
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ADD STATISTICS
+ *
+ * This is no such command in the grammar, but we use this internally to add
+ * AT_ReAddStatistics subcommands to rebuild extended statistics after a table
+ * column type change.
+ */
+static ObjectAddress
+ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
+					CreateStatsStmt *stmt, bool is_rebuild, LOCKMODE lockmode)
+{
+	ObjectAddress address;
+
+	Assert(IsA(stmt, CreateStatsStmt));
+
+	/* The CreateStatsStmt has already been through transformStatsStmt */
+	Assert(stmt->transformed);
+
+	address = CreateStatistics(stmt);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ADD CONSTRAINT USING INDEX
+ *
+ * Returns the address of the new constraint.
+ */
+static ObjectAddress
+ATExecAddIndexConstraint(AlteredTableInfo *tab, Relation rel,
+						 IndexStmt *stmt, LOCKMODE lockmode)
+{
+	Oid			index_oid = stmt->indexOid;
+	Relation	indexRel;
+	char	   *indexName;
+	IndexInfo  *indexInfo;
+	char	   *constraintName;
+	char		constraintType;
+	ObjectAddress address;
+	bits16		flags;
+
+	Assert(IsA(stmt, IndexStmt));
+	Assert(OidIsValid(index_oid));
+	Assert(stmt->isconstraint);
+
+	/*
+	 * Doing this on partitioned tables is not a simple feature to implement,
+	 * so let's punt for now.
+	 */
+	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("ALTER TABLE / ADD CONSTRAINT USING INDEX is not supported on partitioned tables")));
+
+	indexRel = index_open(index_oid, AccessShareLock);
+
+	indexName = pstrdup(RelationGetRelationName(indexRel));
+
+	indexInfo = BuildIndexInfo(indexRel);
+
+	/* this should have been checked at parse time */
+	if (!indexInfo->ii_Unique)
+		elog(ERROR, "index \"%s\" is not unique", indexName);
+
+	/*
+	 * Determine name to assign to constraint.  We require a constraint to
+	 * have the same name as the underlying index; therefore, use the index's
+	 * existing name as the default constraint name, and if the user
+	 * explicitly gives some other name for the constraint, rename the index
+	 * to match.
+	 */
+	constraintName = stmt->idxname;
+	if (constraintName == NULL)
+		constraintName = indexName;
+	else if (strcmp(constraintName, indexName) != 0)
+	{
+		ereport(NOTICE,
+				(errmsg("ALTER TABLE / ADD CONSTRAINT USING INDEX will rename index \"%s\" to \"%s\"",
+						indexName, constraintName)));
+		RenameRelationInternal(index_oid, constraintName, false, true);
+	}
+
+	/* Extra checks needed if making primary key */
+	if (stmt->primary)
+		index_check_primary_key(rel, indexInfo, true, stmt);
+
+	/* Note we currently don't support EXCLUSION constraints here */
+	if (stmt->primary)
+		constraintType = CONSTRAINT_PRIMARY;
+	else
+		constraintType = CONSTRAINT_UNIQUE;
+
+	/* Create the catalog entries for the constraint */
+	flags = INDEX_CONSTR_CREATE_UPDATE_INDEX |
+		INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS |
+		(stmt->initdeferred ? INDEX_CONSTR_CREATE_INIT_DEFERRED : 0) |
+		(stmt->deferrable ? INDEX_CONSTR_CREATE_DEFERRABLE : 0) |
+		(stmt->primary ? INDEX_CONSTR_CREATE_MARK_AS_PRIMARY : 0);
+
+	address = index_constraint_create(rel,
+									  index_oid,
+									  InvalidOid,
+									  indexInfo,
+									  constraintName,
+									  constraintType,
+									  flags,
+									  allowSystemTableMods,
+									  false);	/* is_internal */
+
+	index_close(indexRel, NoLock);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE ADD CONSTRAINT
+ *
+ * Return value is the address of the new constraint; if no constraint was
+ * added, InvalidObjectAddress is returned.
+ */
+static ObjectAddress
+ATExecAddConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
+					Constraint *newConstraint, bool recurse, bool is_readd,
+					LOCKMODE lockmode)
+{
+	ObjectAddress address = InvalidObjectAddress;
+
+	Assert(IsA(newConstraint, Constraint));
+
+	/*
+	 * Currently, we only expect to see CONSTR_CHECK and CONSTR_FOREIGN nodes
+	 * arriving here (see the preprocessing done in parse_utilcmd.c).  Use a
+	 * switch anyway to make it easier to add more code later.
+	 */
+	switch (newConstraint->contype)
+	{
+		case CONSTR_CHECK:
+			address =
+				ATAddCheckConstraint(wqueue, tab, rel,
+									 newConstraint, recurse, false, is_readd,
+									 lockmode);
+			break;
+
+		case CONSTR_FOREIGN:
+
+			/*
+			 * Assign or validate constraint name
+			 */
+			if (newConstraint->conname)
+			{
+				if (ConstraintNameIsUsed(CONSTRAINT_RELATION,
+										 RelationGetRelid(rel),
+										 newConstraint->conname))
+					ereport(ERROR,
+							(errcode(ERRCODE_DUPLICATE_OBJECT),
+							 errmsg("constraint \"%s\" for relation \"%s\" already exists",
+									newConstraint->conname,
+									RelationGetRelationName(rel))));
+			}
+			else
+				newConstraint->conname =
+					ChooseConstraintName(RelationGetRelationName(rel),
+										 ChooseForeignKeyConstraintNameAddition(newConstraint->fk_attrs),
+										 "fkey",
+										 RelationGetNamespace(rel),
+										 NIL);
+
+			address = ATAddForeignKeyConstraint(wqueue, tab, rel,
+												newConstraint,
+												recurse, false,
+												lockmode);
+			break;
+
+		default:
+			elog(ERROR, "unrecognized constraint type: %d",
+				 (int) newConstraint->contype);
+	}
+
+	return address;
+}
+
+/*
+ * Generate the column-name portion of the constraint name for a new foreign
+ * key given the list of column names that reference the referenced
+ * table.  This will be passed to ChooseConstraintName along with the parent
+ * table name and the "fkey" suffix.
+ *
+ * We know that less than NAMEDATALEN characters will actually be used, so we
+ * can truncate the result once we've generated that many.
+ *
+ * XXX see also ChooseExtendedStatisticNameAddition and
+ * ChooseIndexNameAddition.
+ */
+static char *
+ChooseForeignKeyConstraintNameAddition(List *colnames)
+{
+	char		buf[NAMEDATALEN * 2];
+	int			buflen = 0;
+	ListCell   *lc;
+
+	buf[0] = '\0';
+	foreach(lc, colnames)
+	{
+		const char *name = strVal(lfirst(lc));
+
+		if (buflen > 0)
+			buf[buflen++] = '_';	/* insert _ between names */
+
+		/*
+		 * At this point we have buflen <= NAMEDATALEN.  name should be less
+		 * than NAMEDATALEN already, but use strlcpy for paranoia.
+		 */
+		strlcpy(buf + buflen, name, NAMEDATALEN);
+		buflen += strlen(buf + buflen);
+		if (buflen >= NAMEDATALEN)
+			break;
+	}
+	return pstrdup(buf);
+}
+
+/*
+ * Add a check constraint to a single table and its children.  Returns the
+ * address of the constraint added to the parent relation, if one gets added,
+ * or InvalidObjectAddress otherwise.
+ *
+ * Subroutine for ATExecAddConstraint.
+ *
+ * We must recurse to child tables during execution, rather than using
+ * ALTER TABLE's normal prep-time recursion.  The reason is that all the
+ * constraints *must* be given the same name, else they won't be seen as
+ * related later.  If the user didn't explicitly specify a name, then
+ * AddRelationNewConstraints would normally assign different names to the
+ * child constraints.  To fix that, we must capture the name assigned at
+ * the parent table and pass that down.
+ */
+static ObjectAddress
+ATAddCheckConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
+					 Constraint *constr, bool recurse, bool recursing,
+					 bool is_readd, LOCKMODE lockmode)
+{
+	List	   *newcons;
+	ListCell   *lcon;
+	List	   *children;
+	ListCell   *child;
+	ObjectAddress address = InvalidObjectAddress;
+
+	/* At top level, permission check was done in ATPrepCmd, else do it */
+	if (recursing)
+		ATSimplePermissions(AT_AddConstraint, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+	/*
+	 * Call AddRelationNewConstraints to do the work, making sure it works on
+	 * a copy of the Constraint so transformExpr can't modify the original. It
+	 * returns a list of cooked constraints.
+	 *
+	 * If the constraint ends up getting merged with a pre-existing one, it's
+	 * omitted from the returned list, which is what we want: we do not need
+	 * to do any validation work.  That can only happen at child tables,
+	 * though, since we disallow merging at the top level.
+	 */
+	newcons = AddRelationNewConstraints(rel, NIL,
+										list_make1(copyObject(constr)),
+										recursing | is_readd,	/* allow_merge */
+										!recursing, /* is_local */
+										is_readd,	/* is_internal */
+										NULL);	/* queryString not available
+												 * here */
+
+	/* we don't expect more than one constraint here */
+	Assert(list_length(newcons) <= 1);
+
+	/* Add each to-be-validated constraint to Phase 3's queue */
+	foreach(lcon, newcons)
+	{
+		CookedConstraint *ccon = (CookedConstraint *) lfirst(lcon);
+
+		if (!ccon->skip_validation)
+		{
+			NewConstraint *newcon;
+
+			newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
+			newcon->name = ccon->name;
+			newcon->contype = ccon->contype;
+			newcon->qual = ccon->expr;
+
+			tab->constraints = lappend(tab->constraints, newcon);
+		}
+
+		/* Save the actually assigned name if it was defaulted */
+		if (constr->conname == NULL)
+			constr->conname = ccon->name;
+
+		ObjectAddressSet(address, ConstraintRelationId, ccon->conoid);
+	}
+
+	/* At this point we must have a locked-down name to use */
+	Assert(constr->conname != NULL);
+
+	/* Advance command counter in case same table is visited multiple times */
+	CommandCounterIncrement();
+
+	/*
+	 * If the constraint got merged with an existing constraint, we're done.
+	 * We mustn't recurse to child tables in this case, because they've
+	 * already got the constraint, and visiting them again would lead to an
+	 * incorrect value for coninhcount.
+	 */
+	if (newcons == NIL)
+		return address;
+
+	/*
+	 * If adding a NO INHERIT constraint, no need to find our children.
+	 */
+	if (constr->is_no_inherit)
+		return address;
+
+	/*
+	 * Propagate to children as appropriate.  Unlike most other ALTER
+	 * routines, we have to do this one level of recursion at a time; we can't
+	 * use find_all_inheritors to do it in one pass.
+	 */
+	children =
+		find_inheritance_children(RelationGetRelid(rel), lockmode);
+
+	/*
+	 * Check if ONLY was specified with ALTER TABLE.  If so, allow the
+	 * constraint creation only if there are no children currently.  Error out
+	 * otherwise.
+	 */
+	if (!recurse && children != NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("constraint must be added to child tables too")));
+
+	foreach(child, children)
+	{
+		Oid			childrelid = lfirst_oid(child);
+		Relation	childrel;
+		AlteredTableInfo *childtab;
+
+		/* find_inheritance_children already got lock */
+		childrel = table_open(childrelid, NoLock);
+		CheckTableNotInUse(childrel, "ALTER TABLE");
+
+		/* Find or create work queue entry for this table */
+		childtab = ATGetQueueEntry(wqueue, childrel);
+
+		/* Recurse to child */
+		ATAddCheckConstraint(wqueue, childtab, childrel,
+							 constr, recurse, true, is_readd, lockmode);
+
+		table_close(childrel, NoLock);
+	}
+
+	return address;
+}
+
+/*
+ * Add a foreign-key constraint to a single table; return the new constraint's
+ * address.
+ *
+ * Subroutine for ATExecAddConstraint.  Must already hold exclusive
+ * lock on the rel, and have done appropriate validity checks for it.
+ * We do permissions checks here, however.
+ *
+ * When the referenced or referencing tables (or both) are partitioned,
+ * multiple pg_constraint rows are required -- one for each partitioned table
+ * and each partition on each side (fortunately, not one for every combination
+ * thereof).  We also need action triggers on each leaf partition on the
+ * referenced side, and check triggers on each leaf partition on the
+ * referencing side.
+ */
+static ObjectAddress
+ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
+						  Constraint *fkconstraint,
+						  bool recurse, bool recursing, LOCKMODE lockmode)
+{
+	Relation	pkrel;
+	int16		pkattnum[INDEX_MAX_KEYS];
+	int16		fkattnum[INDEX_MAX_KEYS];
+	Oid			pktypoid[INDEX_MAX_KEYS];
+	Oid			fktypoid[INDEX_MAX_KEYS];
+	Oid			opclasses[INDEX_MAX_KEYS];
+	Oid			pfeqoperators[INDEX_MAX_KEYS];
+	Oid			ppeqoperators[INDEX_MAX_KEYS];
+	Oid			ffeqoperators[INDEX_MAX_KEYS];
+	int16		fkdelsetcols[INDEX_MAX_KEYS];
+	int			i;
+	int			numfks,
+				numpks,
+				numfkdelsetcols;
+	Oid			indexOid;
+	bool		old_check_ok;
+	ObjectAddress address;
+	ListCell   *old_pfeqop_item = list_head(fkconstraint->old_conpfeqop);
+
+	/*
+	 * Grab ShareRowExclusiveLock on the pk table, so that someone doesn't
+	 * delete rows out from under us.
+	 */
+	if (OidIsValid(fkconstraint->old_pktable_oid))
+		pkrel = table_open(fkconstraint->old_pktable_oid, ShareRowExclusiveLock);
+	else
+		pkrel = table_openrv(fkconstraint->pktable, ShareRowExclusiveLock);
+
+	/*
+	 * Validity checks (permission checks wait till we have the column
+	 * numbers)
+	 */
+	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		if (!recurse)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot use ONLY for foreign key on partitioned table \"%s\" referencing relation \"%s\"",
+							RelationGetRelationName(rel),
+							RelationGetRelationName(pkrel))));
+		if (fkconstraint->skip_validation && !fkconstraint->initially_valid)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot add NOT VALID foreign key on partitioned table \"%s\" referencing relation \"%s\"",
+							RelationGetRelationName(rel),
+							RelationGetRelationName(pkrel)),
+					 errdetail("This feature is not yet supported on partitioned tables.")));
+	}
+
+	if (pkrel->rd_rel->relkind != RELKIND_RELATION &&
+		pkrel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("referenced relation \"%s\" is not a table",
+						RelationGetRelationName(pkrel))));
+
+	if (!allowSystemTableMods && IsSystemRelation(pkrel))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						RelationGetRelationName(pkrel))));
+
+	/*
+	 * References from permanent or unlogged tables to temp tables, and from
+	 * permanent tables to unlogged tables, are disallowed because the
+	 * referenced data can vanish out from under us.  References from temp
+	 * tables to any other table type are also disallowed, because other
+	 * backends might need to run the RI triggers on the perm table, but they
+	 * can't reliably see tuples in the local buffers of other backends.
+	 */
+	switch (rel->rd_rel->relpersistence)
+	{
+		case RELPERSISTENCE_PERMANENT:
+			if (!RelationIsPermanent(pkrel))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+						 errmsg("constraints on permanent tables may reference only permanent tables")));
+			break;
+		case RELPERSISTENCE_UNLOGGED:
+			if (!RelationIsPermanent(pkrel)
+				&& pkrel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+						 errmsg("constraints on unlogged tables may reference only permanent or unlogged tables")));
+			break;
+		case RELPERSISTENCE_TEMP:
+			if (pkrel->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+						 errmsg("constraints on temporary tables may reference only temporary tables")));
+			if (!pkrel->rd_islocaltemp || !rel->rd_islocaltemp)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+						 errmsg("constraints on temporary tables must involve temporary tables of this session")));
+			break;
+	}
+
+	/*
+	 * Look up the referencing attributes to make sure they exist, and record
+	 * their attnums and type OIDs.
+	 */
+	MemSet(pkattnum, 0, sizeof(pkattnum));
+	MemSet(fkattnum, 0, sizeof(fkattnum));
+	MemSet(pktypoid, 0, sizeof(pktypoid));
+	MemSet(fktypoid, 0, sizeof(fktypoid));
+	MemSet(opclasses, 0, sizeof(opclasses));
+	MemSet(pfeqoperators, 0, sizeof(pfeqoperators));
+	MemSet(ppeqoperators, 0, sizeof(ppeqoperators));
+	MemSet(ffeqoperators, 0, sizeof(ffeqoperators));
+	MemSet(fkdelsetcols, 0, sizeof(fkdelsetcols));
+
+	numfks = transformColumnNameList(RelationGetRelid(rel),
+									 fkconstraint->fk_attrs,
+									 fkattnum, fktypoid);
+
+	numfkdelsetcols = transformColumnNameList(RelationGetRelid(rel),
+											  fkconstraint->fk_del_set_cols,
+											  fkdelsetcols, NULL);
+	validateFkOnDeleteSetColumns(numfks, fkattnum,
+								 numfkdelsetcols, fkdelsetcols,
+								 fkconstraint->fk_del_set_cols);
+
+	/*
+	 * If the attribute list for the referenced table was omitted, lookup the
+	 * definition of the primary key and use it.  Otherwise, validate the
+	 * supplied attribute list.  In either case, discover the index OID and
+	 * index opclasses, and the attnums and type OIDs of the attributes.
+	 */
+	if (fkconstraint->pk_attrs == NIL)
+	{
+		numpks = transformFkeyGetPrimaryKey(pkrel, &indexOid,
+											&fkconstraint->pk_attrs,
+											pkattnum, pktypoid,
+											opclasses);
+	}
+	else
+	{
+		numpks = transformColumnNameList(RelationGetRelid(pkrel),
+										 fkconstraint->pk_attrs,
+										 pkattnum, pktypoid);
+		/* Look for an index matching the column list */
+		indexOid = transformFkeyCheckAttrs(pkrel, numpks, pkattnum,
+										   opclasses);
+	}
+
+	/*
+	 * Now we can check permissions.
+	 */
+	checkFkeyPermissions(pkrel, pkattnum, numpks);
+
+	/*
+	 * Check some things for generated columns.
+	 */
+	for (i = 0; i < numfks; i++)
+	{
+		char		attgenerated = TupleDescAttr(RelationGetDescr(rel), fkattnum[i] - 1)->attgenerated;
+
+		if (attgenerated)
+		{
+			/*
+			 * Check restrictions on UPDATE/DELETE actions, per SQL standard
+			 */
+			if (fkconstraint->fk_upd_action == FKCONSTR_ACTION_SETNULL ||
+				fkconstraint->fk_upd_action == FKCONSTR_ACTION_SETDEFAULT ||
+				fkconstraint->fk_upd_action == FKCONSTR_ACTION_CASCADE)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("invalid %s action for foreign key constraint containing generated column",
+								"ON UPDATE")));
+			if (fkconstraint->fk_del_action == FKCONSTR_ACTION_SETNULL ||
+				fkconstraint->fk_del_action == FKCONSTR_ACTION_SETDEFAULT)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("invalid %s action for foreign key constraint containing generated column",
+								"ON DELETE")));
+		}
+	}
+
+	/*
+	 * Look up the equality operators to use in the constraint.
+	 *
+	 * Note that we have to be careful about the difference between the actual
+	 * PK column type and the opclass' declared input type, which might be
+	 * only binary-compatible with it.  The declared opcintype is the right
+	 * thing to probe pg_amop with.
+	 */
+	if (numfks != numpks)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_FOREIGN_KEY),
+				 errmsg("number of referencing and referenced columns for foreign key disagree")));
+
+	/*
+	 * On the strength of a previous constraint, we might avoid scanning
+	 * tables to validate this one.  See below.
+	 */
+	old_check_ok = (fkconstraint->old_conpfeqop != NIL);
+	Assert(!old_check_ok || numfks == list_length(fkconstraint->old_conpfeqop));
+
+	for (i = 0; i < numpks; i++)
+	{
+		Oid			pktype = pktypoid[i];
+		Oid			fktype = fktypoid[i];
+		Oid			fktyped;
+		HeapTuple	cla_ht;
+		Form_pg_opclass cla_tup;
+		Oid			amid;
+		Oid			opfamily;
+		Oid			opcintype;
+		Oid			pfeqop;
+		Oid			ppeqop;
+		Oid			ffeqop;
+		int16		eqstrategy;
+		Oid			pfeqop_right;
+
+		/* We need several fields out of the pg_opclass entry */
+		cla_ht = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclasses[i]));
+		if (!HeapTupleIsValid(cla_ht))
+			elog(ERROR, "cache lookup failed for opclass %u", opclasses[i]);
+		cla_tup = (Form_pg_opclass) GETSTRUCT(cla_ht);
+		amid = cla_tup->opcmethod;
+		opfamily = cla_tup->opcfamily;
+		opcintype = cla_tup->opcintype;
+		ReleaseSysCache(cla_ht);
+
+		/*
+		 * Check it's a btree; currently this can never fail since no other
+		 * index AMs support unique indexes.  If we ever did have other types
+		 * of unique indexes, we'd need a way to determine which operator
+		 * strategy number is equality.  (Is it reasonable to insist that
+		 * every such index AM use btree's number for equality?)
+		 */
+		if (amid != BTREE_AM_OID)
+			elog(ERROR, "only b-tree indexes are supported for foreign keys");
+		eqstrategy = BTEqualStrategyNumber;
+
+		/*
+		 * There had better be a primary equality operator for the index.
+		 * We'll use it for PK = PK comparisons.
+		 */
+		ppeqop = get_opfamily_member(opfamily, opcintype, opcintype,
+									 eqstrategy);
+
+		if (!OidIsValid(ppeqop))
+			elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+				 eqstrategy, opcintype, opcintype, opfamily);
+
+		/*
+		 * Are there equality operators that take exactly the FK type? Assume
+		 * we should look through any domain here.
+		 */
+		fktyped = getBaseType(fktype);
+
+		pfeqop = get_opfamily_member(opfamily, opcintype, fktyped,
+									 eqstrategy);
+		if (OidIsValid(pfeqop))
+		{
+			pfeqop_right = fktyped;
+			ffeqop = get_opfamily_member(opfamily, fktyped, fktyped,
+										 eqstrategy);
+		}
+		else
+		{
+			/* keep compiler quiet */
+			pfeqop_right = InvalidOid;
+			ffeqop = InvalidOid;
+		}
+
+		if (!(OidIsValid(pfeqop) && OidIsValid(ffeqop)))
+		{
+			/*
+			 * Otherwise, look for an implicit cast from the FK type to the
+			 * opcintype, and if found, use the primary equality operator.
+			 * This is a bit tricky because opcintype might be a polymorphic
+			 * type such as ANYARRAY or ANYENUM; so what we have to test is
+			 * whether the two actual column types can be concurrently cast to
+			 * that type.  (Otherwise, we'd fail to reject combinations such
+			 * as int[] and point[].)
+			 */
+			Oid			input_typeids[2];
+			Oid			target_typeids[2];
+
+			input_typeids[0] = pktype;
+			input_typeids[1] = fktype;
+			target_typeids[0] = opcintype;
+			target_typeids[1] = opcintype;
+			if (can_coerce_type(2, input_typeids, target_typeids,
+								COERCION_IMPLICIT))
+			{
+				pfeqop = ffeqop = ppeqop;
+				pfeqop_right = opcintype;
+			}
+		}
+
+		if (!(OidIsValid(pfeqop) && OidIsValid(ffeqop)))
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("foreign key constraint \"%s\" cannot be implemented",
+							fkconstraint->conname),
+					 errdetail("Key columns \"%s\" and \"%s\" "
+							   "are of incompatible types: %s and %s.",
+							   strVal(list_nth(fkconstraint->fk_attrs, i)),
+							   strVal(list_nth(fkconstraint->pk_attrs, i)),
+							   format_type_be(fktype),
+							   format_type_be(pktype))));
+
+		if (old_check_ok)
+		{
+			/*
+			 * When a pfeqop changes, revalidate the constraint.  We could
+			 * permit intra-opfamily changes, but that adds subtle complexity
+			 * without any concrete benefit for core types.  We need not
+			 * assess ppeqop or ffeqop, which RI_Initial_Check() does not use.
+			 */
+			old_check_ok = (pfeqop == lfirst_oid(old_pfeqop_item));
+			old_pfeqop_item = lnext(fkconstraint->old_conpfeqop,
+									old_pfeqop_item);
+		}
+		if (old_check_ok)
+		{
+			Oid			old_fktype;
+			Oid			new_fktype;
+			CoercionPathType old_pathtype;
+			CoercionPathType new_pathtype;
+			Oid			old_castfunc;
+			Oid			new_castfunc;
+			Form_pg_attribute attr = TupleDescAttr(tab->oldDesc,
+												   fkattnum[i] - 1);
+
+			/*
+			 * Identify coercion pathways from each of the old and new FK-side
+			 * column types to the right (foreign) operand type of the pfeqop.
+			 * We may assume that pg_constraint.conkey is not changing.
+			 */
+			old_fktype = attr->atttypid;
+			new_fktype = fktype;
+			old_pathtype = findFkeyCast(pfeqop_right, old_fktype,
+										&old_castfunc);
+			new_pathtype = findFkeyCast(pfeqop_right, new_fktype,
+										&new_castfunc);
+
+			/*
+			 * Upon a change to the cast from the FK column to its pfeqop
+			 * operand, revalidate the constraint.  For this evaluation, a
+			 * binary coercion cast is equivalent to no cast at all.  While
+			 * type implementors should design implicit casts with an eye
+			 * toward consistency of operations like equality, we cannot
+			 * assume here that they have done so.
+			 *
+			 * A function with a polymorphic argument could change behavior
+			 * arbitrarily in response to get_fn_expr_argtype().  Therefore,
+			 * when the cast destination is polymorphic, we only avoid
+			 * revalidation if the input type has not changed at all.  Given
+			 * just the core data types and operator classes, this requirement
+			 * prevents no would-be optimizations.
+			 *
+			 * If the cast converts from a base type to a domain thereon, then
+			 * that domain type must be the opcintype of the unique index.
+			 * Necessarily, the primary key column must then be of the domain
+			 * type.  Since the constraint was previously valid, all values on
+			 * the foreign side necessarily exist on the primary side and in
+			 * turn conform to the domain.  Consequently, we need not treat
+			 * domains specially here.
+			 *
+			 * Since we require that all collations share the same notion of
+			 * equality (which they do, because texteq reduces to bitwise
+			 * equality), we don't compare collation here.
+			 *
+			 * We need not directly consider the PK type.  It's necessarily
+			 * binary coercible to the opcintype of the unique index column,
+			 * and ri_triggers.c will only deal with PK datums in terms of
+			 * that opcintype.  Changing the opcintype also changes pfeqop.
+			 */
+			old_check_ok = (new_pathtype == old_pathtype &&
+							new_castfunc == old_castfunc &&
+							(!IsPolymorphicType(pfeqop_right) ||
+							 new_fktype == old_fktype));
+		}
+
+		pfeqoperators[i] = pfeqop;
+		ppeqoperators[i] = ppeqop;
+		ffeqoperators[i] = ffeqop;
+	}
+
+	/*
+	 * Create all the constraint and trigger objects, recursing to partitions
+	 * as necessary.  First handle the referenced side.
+	 */
+	address = addFkRecurseReferenced(wqueue, fkconstraint, rel, pkrel,
+									 indexOid,
+									 InvalidOid,	/* no parent constraint */
+									 numfks,
+									 pkattnum,
+									 fkattnum,
+									 pfeqoperators,
+									 ppeqoperators,
+									 ffeqoperators,
+									 numfkdelsetcols,
+									 fkdelsetcols,
+									 old_check_ok,
+									 InvalidOid, InvalidOid);
+
+	/* Now handle the referencing side. */
+	addFkRecurseReferencing(wqueue, fkconstraint, rel, pkrel,
+							indexOid,
+							address.objectId,
+							numfks,
+							pkattnum,
+							fkattnum,
+							pfeqoperators,
+							ppeqoperators,
+							ffeqoperators,
+							numfkdelsetcols,
+							fkdelsetcols,
+							old_check_ok,
+							lockmode,
+							InvalidOid, InvalidOid);
+
+	/*
+	 * Done.  Close pk table, but keep lock until we've committed.
+	 */
+	table_close(pkrel, NoLock);
+
+	return address;
+}
+
+/*
+ * validateFkOnDeleteSetColumns
+ *		Verifies that columns used in ON DELETE SET NULL/DEFAULT (...)
+ *		column lists are valid.
+ */
+void
+validateFkOnDeleteSetColumns(int numfks, const int16 *fkattnums,
+							 int numfksetcols, const int16 *fksetcolsattnums,
+							 List *fksetcols)
+{
+	for (int i = 0; i < numfksetcols; i++)
+	{
+		int16		setcol_attnum = fksetcolsattnums[i];
+		bool		seen = false;
+
+		for (int j = 0; j < numfks; j++)
+		{
+			if (fkattnums[j] == setcol_attnum)
+			{
+				seen = true;
+				break;
+			}
+		}
+
+		if (!seen)
+		{
+			char	   *col = strVal(list_nth(fksetcols, i));
+
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+					 errmsg("column \"%s\" referenced in ON DELETE SET action must be part of foreign key", col)));
+		}
+	}
+}
+
+/*
+ * addFkRecurseReferenced
+ *		subroutine for ATAddForeignKeyConstraint; recurses on the referenced
+ *		side of the constraint
+ *
+ * Create pg_constraint rows for the referenced side of the constraint,
+ * referencing the parent of the referencing side; also create action triggers
+ * on leaf partitions.  If the table is partitioned, recurse to handle each
+ * partition.
+ *
+ * wqueue is the ALTER TABLE work queue; can be NULL when not running as part
+ * of an ALTER TABLE sequence.
+ * fkconstraint is the constraint being added.
+ * rel is the root referencing relation.
+ * pkrel is the referenced relation; might be a partition, if recursing.
+ * indexOid is the OID of the index (on pkrel) implementing this constraint.
+ * parentConstr is the OID of a parent constraint; InvalidOid if this is a
+ * top-level constraint.
+ * numfks is the number of columns in the foreign key
+ * pkattnum is the attnum array of referenced attributes.
+ * fkattnum is the attnum array of referencing attributes.
+ * numfkdelsetcols is the number of columns in the ON DELETE SET NULL/DEFAULT
+ *      (...) clause
+ * fkdelsetcols is the attnum array of the columns in the ON DELETE SET
+ *      NULL/DEFAULT clause
+ * pf/pp/ffeqoperators are OID array of operators between columns.
+ * old_check_ok signals that this constraint replaces an existing one that
+ * was already validated (thus this one doesn't need validation).
+ * parentDelTrigger and parentUpdTrigger, when being recursively called on
+ * a partition, are the OIDs of the parent action triggers for DELETE and
+ * UPDATE respectively.
+ */
+static ObjectAddress
+addFkRecurseReferenced(List **wqueue, Constraint *fkconstraint, Relation rel,
+					   Relation pkrel, Oid indexOid, Oid parentConstr,
+					   int numfks,
+					   int16 *pkattnum, int16 *fkattnum, Oid *pfeqoperators,
+					   Oid *ppeqoperators, Oid *ffeqoperators,
+					   int numfkdelsetcols, int16 *fkdelsetcols,
+					   bool old_check_ok,
+					   Oid parentDelTrigger, Oid parentUpdTrigger)
+{
+	ObjectAddress address;
+	Oid			constrOid;
+	char	   *conname;
+	bool		conislocal;
+	int			coninhcount;
+	bool		connoinherit;
+	Oid			deleteTriggerOid,
+				updateTriggerOid;
+
+	/*
+	 * Verify relkind for each referenced partition.  At the top level, this
+	 * is redundant with a previous check, but we need it when recursing.
+	 */
+	if (pkrel->rd_rel->relkind != RELKIND_RELATION &&
+		pkrel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("referenced relation \"%s\" is not a table",
+						RelationGetRelationName(pkrel))));
+
+	/*
+	 * Caller supplies us with a constraint name; however, it may be used in
+	 * this partition, so come up with a different one in that case.
+	 */
+	if (ConstraintNameIsUsed(CONSTRAINT_RELATION,
+							 RelationGetRelid(rel),
+							 fkconstraint->conname))
+		conname = ChooseConstraintName(RelationGetRelationName(rel),
+									   ChooseForeignKeyConstraintNameAddition(fkconstraint->fk_attrs),
+									   "fkey",
+									   RelationGetNamespace(rel), NIL);
+	else
+		conname = fkconstraint->conname;
+
+	if (OidIsValid(parentConstr))
+	{
+		conislocal = false;
+		coninhcount = 1;
+		connoinherit = false;
+	}
+	else
+	{
+		conislocal = true;
+		coninhcount = 0;
+
+		/*
+		 * always inherit for partitioned tables, never for legacy inheritance
+		 */
+		connoinherit = rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE;
+	}
+
+	/*
+	 * Record the FK constraint in pg_constraint.
+	 */
+	constrOid = CreateConstraintEntry(conname,
+									  RelationGetNamespace(rel),
+									  CONSTRAINT_FOREIGN,
+									  fkconstraint->deferrable,
+									  fkconstraint->initdeferred,
+									  fkconstraint->initially_valid,
+									  parentConstr,
+									  RelationGetRelid(rel),
+									  fkattnum,
+									  numfks,
+									  numfks,
+									  InvalidOid,	/* not a domain constraint */
+									  indexOid,
+									  RelationGetRelid(pkrel),
+									  pkattnum,
+									  pfeqoperators,
+									  ppeqoperators,
+									  ffeqoperators,
+									  numfks,
+									  fkconstraint->fk_upd_action,
+									  fkconstraint->fk_del_action,
+									  fkdelsetcols,
+									  numfkdelsetcols,
+									  fkconstraint->fk_matchtype,
+									  NULL, /* no exclusion constraint */
+									  NULL, /* no check constraint */
+									  NULL,
+									  conislocal,	/* islocal */
+									  coninhcount,	/* inhcount */
+									  connoinherit, /* conNoInherit */
+									  false);	/* is_internal */
+
+	ObjectAddressSet(address, ConstraintRelationId, constrOid);
+
+	/*
+	 * Mark the child constraint as part of the parent constraint; it must not
+	 * be dropped on its own.  (This constraint is deleted when the partition
+	 * is detached, but a special check needs to occur that the partition
+	 * contains no referenced values.)
+	 */
+	if (OidIsValid(parentConstr))
+	{
+		ObjectAddress referenced;
+
+		ObjectAddressSet(referenced, ConstraintRelationId, parentConstr);
+		recordDependencyOn(&address, &referenced, DEPENDENCY_INTERNAL);
+	}
+
+	/* make new constraint visible, in case we add more */
+	CommandCounterIncrement();
+
+	/*
+	 * Create the action triggers that enforce the constraint.
+	 */
+	createForeignKeyActionTriggers(rel, RelationGetRelid(pkrel),
+								   fkconstraint,
+								   constrOid, indexOid,
+								   parentDelTrigger, parentUpdTrigger,
+								   &deleteTriggerOid, &updateTriggerOid);
+
+	/*
+	 * If the referenced table is partitioned, recurse on ourselves to handle
+	 * each partition.  We need one pg_constraint row created for each
+	 * partition in addition to the pg_constraint row for the parent table.
+	 */
+	if (pkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		PartitionDesc pd = RelationGetPartitionDesc(pkrel, true);
+
+		for (int i = 0; i < pd->nparts; i++)
+		{
+			Relation	partRel;
+			AttrMap    *map;
+			AttrNumber *mapped_pkattnum;
+			Oid			partIndexId;
+
+			partRel = table_open(pd->oids[i], ShareRowExclusiveLock);
+
+			/*
+			 * Map the attribute numbers in the referenced side of the FK
+			 * definition to match the partition's column layout.
+			 */
+			map = build_attrmap_by_name_if_req(RelationGetDescr(partRel),
+											   RelationGetDescr(pkrel));
+			if (map)
+			{
+				mapped_pkattnum = palloc(sizeof(AttrNumber) * numfks);
+				for (int j = 0; j < numfks; j++)
+					mapped_pkattnum[j] = map->attnums[pkattnum[j] - 1];
+			}
+			else
+				mapped_pkattnum = pkattnum;
+
+			/* do the deed */
+			partIndexId = index_get_partition(partRel, indexOid);
+			if (!OidIsValid(partIndexId))
+				elog(ERROR, "index for %u not found in partition %s",
+					 indexOid, RelationGetRelationName(partRel));
+			addFkRecurseReferenced(wqueue, fkconstraint, rel, partRel,
+								   partIndexId, constrOid, numfks,
+								   mapped_pkattnum, fkattnum,
+								   pfeqoperators, ppeqoperators, ffeqoperators,
+								   numfkdelsetcols, fkdelsetcols,
+								   old_check_ok,
+								   deleteTriggerOid, updateTriggerOid);
+
+			/* Done -- clean up (but keep the lock) */
+			table_close(partRel, NoLock);
+			if (map)
+			{
+				pfree(mapped_pkattnum);
+				free_attrmap(map);
+			}
+		}
+	}
+
+	return address;
+}
+
+/*
+ * addFkRecurseReferencing
+ *		subroutine for ATAddForeignKeyConstraint and CloneFkReferencing
+ *
+ * If the referencing relation is a plain relation, create the necessary check
+ * triggers that implement the constraint, and set up for Phase 3 constraint
+ * verification.  If the referencing relation is a partitioned table, then
+ * we create a pg_constraint row for it and recurse on this routine for each
+ * partition.
+ *
+ * We assume that the referenced relation is locked against concurrent
+ * deletions.  If it's a partitioned relation, every partition must be so
+ * locked.
+ *
+ * wqueue is the ALTER TABLE work queue; can be NULL when not running as part
+ * of an ALTER TABLE sequence.
+ * fkconstraint is the constraint being added.
+ * rel is the referencing relation; might be a partition, if recursing.
+ * pkrel is the root referenced relation.
+ * indexOid is the OID of the index (on pkrel) implementing this constraint.
+ * parentConstr is the OID of the parent constraint (there is always one).
+ * numfks is the number of columns in the foreign key
+ * pkattnum is the attnum array of referenced attributes.
+ * fkattnum is the attnum array of referencing attributes.
+ * pf/pp/ffeqoperators are OID array of operators between columns.
+ * numfkdelsetcols is the number of columns in the ON DELETE SET NULL/DEFAULT
+ *      (...) clause
+ * fkdelsetcols is the attnum array of the columns in the ON DELETE SET
+ *      NULL/DEFAULT clause
+ * old_check_ok signals that this constraint replaces an existing one that
+ *		was already validated (thus this one doesn't need validation).
+ * lockmode is the lockmode to acquire on partitions when recursing.
+ * parentInsTrigger and parentUpdTrigger, when being recursively called on
+ * a partition, are the OIDs of the parent check triggers for INSERT and
+ * UPDATE respectively.
+ */
+static void
+addFkRecurseReferencing(List **wqueue, Constraint *fkconstraint, Relation rel,
+						Relation pkrel, Oid indexOid, Oid parentConstr,
+						int numfks, int16 *pkattnum, int16 *fkattnum,
+						Oid *pfeqoperators, Oid *ppeqoperators, Oid *ffeqoperators,
+						int numfkdelsetcols, int16 *fkdelsetcols,
+						bool old_check_ok, LOCKMODE lockmode,
+						Oid parentInsTrigger, Oid parentUpdTrigger)
+{
+	Oid			insertTriggerOid,
+				updateTriggerOid;
+
+	AssertArg(OidIsValid(parentConstr));
+
+	if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("foreign key constraints are not supported on foreign tables")));
+
+	/*
+	 * Add the check triggers to it and, if necessary, schedule it to be
+	 * checked in Phase 3.
+	 *
+	 * If the relation is partitioned, drill down to do it to its partitions.
+	 */
+	createForeignKeyCheckTriggers(RelationGetRelid(rel),
+								  RelationGetRelid(pkrel),
+								  fkconstraint,
+								  parentConstr,
+								  indexOid,
+								  parentInsTrigger, parentUpdTrigger,
+								  &insertTriggerOid, &updateTriggerOid);
+
+	if (rel->rd_rel->relkind == RELKIND_RELATION)
+	{
+		/*
+		 * Tell Phase 3 to check that the constraint is satisfied by existing
+		 * rows. We can skip this during table creation, when requested
+		 * explicitly by specifying NOT VALID in an ADD FOREIGN KEY command,
+		 * and when we're recreating a constraint following a SET DATA TYPE
+		 * operation that did not impugn its validity.
+		 */
+		if (wqueue && !old_check_ok && !fkconstraint->skip_validation)
+		{
+			NewConstraint *newcon;
+			AlteredTableInfo *tab;
+
+			tab = ATGetQueueEntry(wqueue, rel);
+
+			newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
+			newcon->name = get_constraint_name(parentConstr);
+			newcon->contype = CONSTR_FOREIGN;
+			newcon->refrelid = RelationGetRelid(pkrel);
+			newcon->refindid = indexOid;
+			newcon->conid = parentConstr;
+			newcon->qual = (Node *) fkconstraint;
+
+			tab->constraints = lappend(tab->constraints, newcon);
+		}
+	}
+	else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		PartitionDesc pd = RelationGetPartitionDesc(rel, true);
+		Relation	trigrel;
+
+		/*
+		 * Triggers of the foreign keys will be manipulated a bunch of times
+		 * in the loop below.  To avoid repeatedly opening/closing the trigger
+		 * catalog relation, we open it here and pass it to the subroutines
+		 * called below.
+		 */
+		trigrel = table_open(TriggerRelationId, RowExclusiveLock);
+
+		/*
+		 * Recurse to take appropriate action on each partition; either we
+		 * find an existing constraint to reparent to ours, or we create a new
+		 * one.
+		 */
+		for (int i = 0; i < pd->nparts; i++)
+		{
+			Oid			partitionId = pd->oids[i];
+			Relation	partition = table_open(partitionId, lockmode);
+			List	   *partFKs;
+			AttrMap    *attmap;
+			AttrNumber	mapped_fkattnum[INDEX_MAX_KEYS];
+			bool		attached;
+			char	   *conname;
+			Oid			constrOid;
+			ObjectAddress address,
+						referenced;
+			ListCell   *cell;
+
+			CheckTableNotInUse(partition, "ALTER TABLE");
+
+			attmap = build_attrmap_by_name(RelationGetDescr(partition),
+										   RelationGetDescr(rel));
+			for (int j = 0; j < numfks; j++)
+				mapped_fkattnum[j] = attmap->attnums[fkattnum[j] - 1];
+
+			/* Check whether an existing constraint can be repurposed */
+			partFKs = copyObject(RelationGetFKeyList(partition));
+			attached = false;
+			foreach(cell, partFKs)
+			{
+				ForeignKeyCacheInfo *fk;
+
+				fk = lfirst_node(ForeignKeyCacheInfo, cell);
+				if (tryAttachPartitionForeignKey(fk,
+												 partitionId,
+												 parentConstr,
+												 numfks,
+												 mapped_fkattnum,
+												 pkattnum,
+												 pfeqoperators,
+												 insertTriggerOid,
+												 updateTriggerOid,
+												 trigrel))
+				{
+					attached = true;
+					break;
+				}
+			}
+			if (attached)
+			{
+				table_close(partition, NoLock);
+				continue;
+			}
+
+			/*
+			 * No luck finding a good constraint to reuse; create our own.
+			 */
+			if (ConstraintNameIsUsed(CONSTRAINT_RELATION,
+									 RelationGetRelid(partition),
+									 fkconstraint->conname))
+				conname = ChooseConstraintName(RelationGetRelationName(partition),
+											   ChooseForeignKeyConstraintNameAddition(fkconstraint->fk_attrs),
+											   "fkey",
+											   RelationGetNamespace(partition), NIL);
+			else
+				conname = fkconstraint->conname;
+			constrOid =
+				CreateConstraintEntry(conname,
+									  RelationGetNamespace(partition),
+									  CONSTRAINT_FOREIGN,
+									  fkconstraint->deferrable,
+									  fkconstraint->initdeferred,
+									  fkconstraint->initially_valid,
+									  parentConstr,
+									  partitionId,
+									  mapped_fkattnum,
+									  numfks,
+									  numfks,
+									  InvalidOid,
+									  indexOid,
+									  RelationGetRelid(pkrel),
+									  pkattnum,
+									  pfeqoperators,
+									  ppeqoperators,
+									  ffeqoperators,
+									  numfks,
+									  fkconstraint->fk_upd_action,
+									  fkconstraint->fk_del_action,
+									  fkdelsetcols,
+									  numfkdelsetcols,
+									  fkconstraint->fk_matchtype,
+									  NULL,
+									  NULL,
+									  NULL,
+									  false,
+									  1,
+									  false,
+									  false);
+
+			/*
+			 * Give this constraint partition-type dependencies on the parent
+			 * constraint as well as the table.
+			 */
+			ObjectAddressSet(address, ConstraintRelationId, constrOid);
+			ObjectAddressSet(referenced, ConstraintRelationId, parentConstr);
+			recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_PRI);
+			ObjectAddressSet(referenced, RelationRelationId, partitionId);
+			recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_SEC);
+
+			/* Make all this visible before recursing */
+			CommandCounterIncrement();
+
+			/* call ourselves to finalize the creation and we're done */
+			addFkRecurseReferencing(wqueue, fkconstraint, partition, pkrel,
+									indexOid,
+									constrOid,
+									numfks,
+									pkattnum,
+									mapped_fkattnum,
+									pfeqoperators,
+									ppeqoperators,
+									ffeqoperators,
+									numfkdelsetcols,
+									fkdelsetcols,
+									old_check_ok,
+									lockmode,
+									insertTriggerOid,
+									updateTriggerOid);
+
+			table_close(partition, NoLock);
+		}
+
+		table_close(trigrel, RowExclusiveLock);
+	}
+}
+
+/*
+ * CloneForeignKeyConstraints
+ *		Clone foreign keys from a partitioned table to a newly acquired
+ *		partition.
+ *
+ * partitionRel is a partition of parentRel, so we can be certain that it has
+ * the same columns with the same datatypes.  The columns may be in different
+ * order, though.
+ *
+ * wqueue must be passed to set up phase 3 constraint checking, unless the
+ * referencing-side partition is known to be empty (such as in CREATE TABLE /
+ * PARTITION OF).
+ */
+static void
+CloneForeignKeyConstraints(List **wqueue, Relation parentRel,
+						   Relation partitionRel)
+{
+	/* This only works for declarative partitioning */
+	Assert(parentRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+
+	/*
+	 * Clone constraints for which the parent is on the referenced side.
+	 */
+	CloneFkReferenced(parentRel, partitionRel);
+
+	/*
+	 * Now clone constraints where the parent is on the referencing side.
+	 */
+	CloneFkReferencing(wqueue, parentRel, partitionRel);
+}
+
+/*
+ * CloneFkReferenced
+ *		Subroutine for CloneForeignKeyConstraints
+ *
+ * Find all the FKs that have the parent relation on the referenced side;
+ * clone those constraints to the given partition.  This is to be called
+ * when the partition is being created or attached.
+ *
+ * This ignores self-referencing FKs; those are handled by CloneFkReferencing.
+ *
+ * This recurses to partitions, if the relation being attached is partitioned.
+ * Recursion is done by calling addFkRecurseReferenced.
+ */
+static void
+CloneFkReferenced(Relation parentRel, Relation partitionRel)
+{
+	Relation	pg_constraint;
+	AttrMap    *attmap;
+	ListCell   *cell;
+	SysScanDesc scan;
+	ScanKeyData key[2];
+	HeapTuple	tuple;
+	List	   *clone = NIL;
+	Relation	trigrel;
+
+	/*
+	 * Search for any constraints where this partition's parent is in the
+	 * referenced side.  However, we must not clone any constraint whose
+	 * parent constraint is also going to be cloned, to avoid duplicates.  So
+	 * do it in two steps: first construct the list of constraints to clone,
+	 * then go over that list cloning those whose parents are not in the list.
+	 * (We must not rely on the parent being seen first, since the catalog
+	 * scan could return children first.)
+	 */
+	pg_constraint = table_open(ConstraintRelationId, RowShareLock);
+	ScanKeyInit(&key[0],
+				Anum_pg_constraint_confrelid, BTEqualStrategyNumber,
+				F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(parentRel)));
+	ScanKeyInit(&key[1],
+				Anum_pg_constraint_contype, BTEqualStrategyNumber,
+				F_CHAREQ, CharGetDatum(CONSTRAINT_FOREIGN));
+	/* This is a seqscan, as we don't have a usable index ... */
+	scan = systable_beginscan(pg_constraint, InvalidOid, true,
+							  NULL, 2, key);
+	while ((tuple = systable_getnext(scan)) != NULL)
+	{
+		Form_pg_constraint constrForm = (Form_pg_constraint) GETSTRUCT(tuple);
+
+		clone = lappend_oid(clone, constrForm->oid);
+	}
+	systable_endscan(scan);
+	table_close(pg_constraint, RowShareLock);
+
+	/*
+	 * Triggers of the foreign keys will be manipulated a bunch of times in
+	 * the loop below.  To avoid repeatedly opening/closing the trigger
+	 * catalog relation, we open it here and pass it to the subroutines called
+	 * below.
+	 */
+	trigrel = table_open(TriggerRelationId, RowExclusiveLock);
+
+	attmap = build_attrmap_by_name(RelationGetDescr(partitionRel),
+								   RelationGetDescr(parentRel));
+	foreach(cell, clone)
+	{
+		Oid			constrOid = lfirst_oid(cell);
+		Form_pg_constraint constrForm;
+		Relation	fkRel;
+		Oid			indexOid;
+		Oid			partIndexId;
+		int			numfks;
+		AttrNumber	conkey[INDEX_MAX_KEYS];
+		AttrNumber	mapped_confkey[INDEX_MAX_KEYS];
+		AttrNumber	confkey[INDEX_MAX_KEYS];
+		Oid			conpfeqop[INDEX_MAX_KEYS];
+		Oid			conppeqop[INDEX_MAX_KEYS];
+		Oid			conffeqop[INDEX_MAX_KEYS];
+		int			numfkdelsetcols;
+		AttrNumber	confdelsetcols[INDEX_MAX_KEYS];
+		Constraint *fkconstraint;
+		Oid			deleteTriggerOid,
+					updateTriggerOid;
+
+		tuple = SearchSysCache1(CONSTROID, constrOid);
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "cache lookup failed for constraint %u", constrOid);
+		constrForm = (Form_pg_constraint) GETSTRUCT(tuple);
+
+		/*
+		 * As explained above: don't try to clone a constraint for which we're
+		 * going to clone the parent.
+		 */
+		if (list_member_oid(clone, constrForm->conparentid))
+		{
+			ReleaseSysCache(tuple);
+			continue;
+		}
+
+		/*
+		 * Don't clone self-referencing foreign keys, which can be in the
+		 * partitioned table or in the partition-to-be.
+		 */
+		if (constrForm->conrelid == RelationGetRelid(parentRel) ||
+			constrForm->conrelid == RelationGetRelid(partitionRel))
+		{
+			ReleaseSysCache(tuple);
+			continue;
+		}
+
+		/*
+		 * Because we're only expanding the key space at the referenced side,
+		 * we don't need to prevent any operation in the referencing table, so
+		 * AccessShareLock suffices (assumes that dropping the constraint
+		 * acquires AEL).
+		 */
+		fkRel = table_open(constrForm->conrelid, AccessShareLock);
+
+		indexOid = constrForm->conindid;
+		DeconstructFkConstraintRow(tuple,
+								   &numfks,
+								   conkey,
+								   confkey,
+								   conpfeqop,
+								   conppeqop,
+								   conffeqop,
+								   &numfkdelsetcols,
+								   confdelsetcols);
+
+		for (int i = 0; i < numfks; i++)
+			mapped_confkey[i] = attmap->attnums[confkey[i] - 1];
+
+		fkconstraint = makeNode(Constraint);
+		fkconstraint->contype = CONSTRAINT_FOREIGN;
+		fkconstraint->conname = NameStr(constrForm->conname);
+		fkconstraint->deferrable = constrForm->condeferrable;
+		fkconstraint->initdeferred = constrForm->condeferred;
+		fkconstraint->location = -1;
+		fkconstraint->pktable = NULL;
+		/* ->fk_attrs determined below */
+		fkconstraint->pk_attrs = NIL;
+		fkconstraint->fk_matchtype = constrForm->confmatchtype;
+		fkconstraint->fk_upd_action = constrForm->confupdtype;
+		fkconstraint->fk_del_action = constrForm->confdeltype;
+		fkconstraint->fk_del_set_cols = NIL;
+		fkconstraint->old_conpfeqop = NIL;
+		fkconstraint->old_pktable_oid = InvalidOid;
+		fkconstraint->skip_validation = false;
+		fkconstraint->initially_valid = true;
+
+		/* set up colnames that are used to generate the constraint name */
+		for (int i = 0; i < numfks; i++)
+		{
+			Form_pg_attribute att;
+
+			att = TupleDescAttr(RelationGetDescr(fkRel),
+								conkey[i] - 1);
+			fkconstraint->fk_attrs = lappend(fkconstraint->fk_attrs,
+											 makeString(NameStr(att->attname)));
+		}
+
+		/*
+		 * Add the new foreign key constraint pointing to the new partition.
+		 * Because this new partition appears in the referenced side of the
+		 * constraint, we don't need to set up for Phase 3 check.
+		 */
+		partIndexId = index_get_partition(partitionRel, indexOid);
+		if (!OidIsValid(partIndexId))
+			elog(ERROR, "index for %u not found in partition %s",
+				 indexOid, RelationGetRelationName(partitionRel));
+
+		/*
+		 * Get the "action" triggers belonging to the constraint to pass as
+		 * parent OIDs for similar triggers that will be created on the
+		 * partition in addFkRecurseReferenced().
+		 */
+		GetForeignKeyActionTriggers(trigrel, constrOid,
+									constrForm->confrelid, constrForm->conrelid,
+									&deleteTriggerOid, &updateTriggerOid);
+
+		addFkRecurseReferenced(NULL,
+							   fkconstraint,
+							   fkRel,
+							   partitionRel,
+							   partIndexId,
+							   constrOid,
+							   numfks,
+							   mapped_confkey,
+							   conkey,
+							   conpfeqop,
+							   conppeqop,
+							   conffeqop,
+							   numfkdelsetcols,
+							   confdelsetcols,
+							   true,
+							   deleteTriggerOid,
+							   updateTriggerOid);
+
+		table_close(fkRel, NoLock);
+		ReleaseSysCache(tuple);
+	}
+
+	table_close(trigrel, RowExclusiveLock);
+}
+
+/*
+ * CloneFkReferencing
+ *		Subroutine for CloneForeignKeyConstraints
+ *
+ * For each FK constraint of the parent relation in the given list, find an
+ * equivalent constraint in its partition relation that can be reparented;
+ * if one cannot be found, create a new constraint in the partition as its
+ * child.
+ *
+ * If wqueue is given, it is used to set up phase-3 verification for each
+ * cloned constraint; if omitted, we assume that such verification is not
+ * needed (example: the partition is being created anew).
+ */
+static void
+CloneFkReferencing(List **wqueue, Relation parentRel, Relation partRel)
+{
+	AttrMap    *attmap;
+	List	   *partFKs;
+	List	   *clone = NIL;
+	ListCell   *cell;
+	Relation	trigrel;
+
+	/* obtain a list of constraints that we need to clone */
+	foreach(cell, RelationGetFKeyList(parentRel))
+	{
+		ForeignKeyCacheInfo *fk = lfirst(cell);
+
+		clone = lappend_oid(clone, fk->conoid);
+	}
+
+	/*
+	 * Silently do nothing if there's nothing to do.  In particular, this
+	 * avoids throwing a spurious error for foreign tables.
+	 */
+	if (clone == NIL)
+		return;
+
+	if (partRel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("foreign key constraints are not supported on foreign tables")));
+
+	/*
+	 * Triggers of the foreign keys will be manipulated a bunch of times in
+	 * the loop below.  To avoid repeatedly opening/closing the trigger
+	 * catalog relation, we open it here and pass it to the subroutines called
+	 * below.
+	 */
+	trigrel = table_open(TriggerRelationId, RowExclusiveLock);
+
+	/*
+	 * The constraint key may differ, if the columns in the partition are
+	 * different.  This map is used to convert them.
+	 */
+	attmap = build_attrmap_by_name(RelationGetDescr(partRel),
+								   RelationGetDescr(parentRel));
+
+	partFKs = copyObject(RelationGetFKeyList(partRel));
+
+	foreach(cell, clone)
+	{
+		Oid			parentConstrOid = lfirst_oid(cell);
+		Form_pg_constraint constrForm;
+		Relation	pkrel;
+		HeapTuple	tuple;
+		int			numfks;
+		AttrNumber	conkey[INDEX_MAX_KEYS];
+		AttrNumber	mapped_conkey[INDEX_MAX_KEYS];
+		AttrNumber	confkey[INDEX_MAX_KEYS];
+		Oid			conpfeqop[INDEX_MAX_KEYS];
+		Oid			conppeqop[INDEX_MAX_KEYS];
+		Oid			conffeqop[INDEX_MAX_KEYS];
+		int			numfkdelsetcols;
+		AttrNumber	confdelsetcols[INDEX_MAX_KEYS];
+		Constraint *fkconstraint;
+		bool		attached;
+		Oid			indexOid;
+		Oid			constrOid;
+		ObjectAddress address,
+					referenced;
+		ListCell   *cell;
+		Oid			insertTriggerOid,
+					updateTriggerOid;
+
+		tuple = SearchSysCache1(CONSTROID, parentConstrOid);
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "cache lookup failed for constraint %u",
+				 parentConstrOid);
+		constrForm = (Form_pg_constraint) GETSTRUCT(tuple);
+
+		/* Don't clone constraints whose parents are being cloned */
+		if (list_member_oid(clone, constrForm->conparentid))
+		{
+			ReleaseSysCache(tuple);
+			continue;
+		}
+
+		/*
+		 * Need to prevent concurrent deletions.  If pkrel is a partitioned
+		 * relation, that means to lock all partitions.
+		 */
+		pkrel = table_open(constrForm->confrelid, ShareRowExclusiveLock);
+		if (pkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+			(void) find_all_inheritors(RelationGetRelid(pkrel),
+									   ShareRowExclusiveLock, NULL);
+
+		DeconstructFkConstraintRow(tuple, &numfks, conkey, confkey,
+								   conpfeqop, conppeqop, conffeqop,
+								   &numfkdelsetcols, confdelsetcols);
+		for (int i = 0; i < numfks; i++)
+			mapped_conkey[i] = attmap->attnums[conkey[i] - 1];
+
+		/*
+		 * Get the "check" triggers belonging to the constraint to pass as
+		 * parent OIDs for similar triggers that will be created on the
+		 * partition in addFkRecurseReferencing().  They are also passed to
+		 * tryAttachPartitionForeignKey() below to simply assign as parents to
+		 * the partition's existing "check" triggers, that is, if the
+		 * corresponding constraints is deemed attachable to the parent
+		 * constraint.
+		 */
+		GetForeignKeyCheckTriggers(trigrel, constrForm->oid,
+								   constrForm->confrelid, constrForm->conrelid,
+								   &insertTriggerOid, &updateTriggerOid);
+
+		/*
+		 * Before creating a new constraint, see whether any existing FKs are
+		 * fit for the purpose.  If one is, attach the parent constraint to
+		 * it, and don't clone anything.  This way we avoid the expensive
+		 * verification step and don't end up with a duplicate FK, and we
+		 * don't need to recurse to partitions for this constraint.
+		 */
+		attached = false;
+		foreach(cell, partFKs)
+		{
+			ForeignKeyCacheInfo *fk = lfirst_node(ForeignKeyCacheInfo, cell);
+
+			if (tryAttachPartitionForeignKey(fk,
+											 RelationGetRelid(partRel),
+											 parentConstrOid,
+											 numfks,
+											 mapped_conkey,
+											 confkey,
+											 conpfeqop,
+											 insertTriggerOid,
+											 updateTriggerOid,
+											 trigrel))
+			{
+				attached = true;
+				table_close(pkrel, NoLock);
+				break;
+			}
+		}
+		if (attached)
+		{
+			ReleaseSysCache(tuple);
+			continue;
+		}
+
+		/* No dice.  Set up to create our own constraint */
+		fkconstraint = makeNode(Constraint);
+		fkconstraint->contype = CONSTRAINT_FOREIGN;
+		/* ->conname determined below */
+		fkconstraint->deferrable = constrForm->condeferrable;
+		fkconstraint->initdeferred = constrForm->condeferred;
+		fkconstraint->location = -1;
+		fkconstraint->pktable = NULL;
+		/* ->fk_attrs determined below */
+		fkconstraint->pk_attrs = NIL;
+		fkconstraint->fk_matchtype = constrForm->confmatchtype;
+		fkconstraint->fk_upd_action = constrForm->confupdtype;
+		fkconstraint->fk_del_action = constrForm->confdeltype;
+		fkconstraint->fk_del_set_cols = NIL;
+		fkconstraint->old_conpfeqop = NIL;
+		fkconstraint->old_pktable_oid = InvalidOid;
+		fkconstraint->skip_validation = false;
+		fkconstraint->initially_valid = true;
+		for (int i = 0; i < numfks; i++)
+		{
+			Form_pg_attribute att;
+
+			att = TupleDescAttr(RelationGetDescr(partRel),
+								mapped_conkey[i] - 1);
+			fkconstraint->fk_attrs = lappend(fkconstraint->fk_attrs,
+											 makeString(NameStr(att->attname)));
+		}
+		if (ConstraintNameIsUsed(CONSTRAINT_RELATION,
+								 RelationGetRelid(partRel),
+								 NameStr(constrForm->conname)))
+			fkconstraint->conname =
+				ChooseConstraintName(RelationGetRelationName(partRel),
+									 ChooseForeignKeyConstraintNameAddition(fkconstraint->fk_attrs),
+									 "fkey",
+									 RelationGetNamespace(partRel), NIL);
+		else
+			fkconstraint->conname = pstrdup(NameStr(constrForm->conname));
+
+		indexOid = constrForm->conindid;
+		constrOid =
+			CreateConstraintEntry(fkconstraint->conname,
+								  constrForm->connamespace,
+								  CONSTRAINT_FOREIGN,
+								  fkconstraint->deferrable,
+								  fkconstraint->initdeferred,
+								  constrForm->convalidated,
+								  parentConstrOid,
+								  RelationGetRelid(partRel),
+								  mapped_conkey,
+								  numfks,
+								  numfks,
+								  InvalidOid,	/* not a domain constraint */
+								  indexOid,
+								  constrForm->confrelid,	/* same foreign rel */
+								  confkey,
+								  conpfeqop,
+								  conppeqop,
+								  conffeqop,
+								  numfks,
+								  fkconstraint->fk_upd_action,
+								  fkconstraint->fk_del_action,
+								  confdelsetcols,
+								  numfkdelsetcols,
+								  fkconstraint->fk_matchtype,
+								  NULL,
+								  NULL,
+								  NULL,
+								  false,	/* islocal */
+								  1,	/* inhcount */
+								  false,	/* conNoInherit */
+								  true);
+
+		/* Set up partition dependencies for the new constraint */
+		ObjectAddressSet(address, ConstraintRelationId, constrOid);
+		ObjectAddressSet(referenced, ConstraintRelationId, parentConstrOid);
+		recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_PRI);
+		ObjectAddressSet(referenced, RelationRelationId,
+						 RelationGetRelid(partRel));
+		recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_SEC);
+
+		/* Done with the cloned constraint's tuple */
+		ReleaseSysCache(tuple);
+
+		/* Make all this visible before recursing */
+		CommandCounterIncrement();
+
+		addFkRecurseReferencing(wqueue,
+								fkconstraint,
+								partRel,
+								pkrel,
+								indexOid,
+								constrOid,
+								numfks,
+								confkey,
+								mapped_conkey,
+								conpfeqop,
+								conppeqop,
+								conffeqop,
+								numfkdelsetcols,
+								confdelsetcols,
+								false,	/* no old check exists */
+								AccessExclusiveLock,
+								insertTriggerOid,
+								updateTriggerOid);
+		table_close(pkrel, NoLock);
+	}
+
+	table_close(trigrel, RowExclusiveLock);
+}
+
+/*
+ * When the parent of a partition receives [the referencing side of] a foreign
+ * key, we must propagate that foreign key to the partition.  However, the
+ * partition might already have an equivalent foreign key; this routine
+ * compares the given ForeignKeyCacheInfo (in the partition) to the FK defined
+ * by the other parameters.  If they are equivalent, create the link between
+ * the two constraints and return true.
+ *
+ * If the given FK does not match the one defined by rest of the params,
+ * return false.
+ */
+static bool
+tryAttachPartitionForeignKey(ForeignKeyCacheInfo *fk,
+							 Oid partRelid,
+							 Oid parentConstrOid,
+							 int numfks,
+							 AttrNumber *mapped_conkey,
+							 AttrNumber *confkey,
+							 Oid *conpfeqop,
+							 Oid parentInsTrigger,
+							 Oid parentUpdTrigger,
+							 Relation trigrel)
+{
+	HeapTuple	parentConstrTup;
+	Form_pg_constraint parentConstr;
+	HeapTuple	partcontup;
+	Form_pg_constraint partConstr;
+	ScanKeyData key;
+	SysScanDesc scan;
+	HeapTuple	trigtup;
+	Oid			insertTriggerOid,
+				updateTriggerOid;
+
+	parentConstrTup = SearchSysCache1(CONSTROID,
+									  ObjectIdGetDatum(parentConstrOid));
+	if (!HeapTupleIsValid(parentConstrTup))
+		elog(ERROR, "cache lookup failed for constraint %u", parentConstrOid);
+	parentConstr = (Form_pg_constraint) GETSTRUCT(parentConstrTup);
+
+	/*
+	 * Do some quick & easy initial checks.  If any of these fail, we cannot
+	 * use this constraint.
+	 */
+	if (fk->confrelid != parentConstr->confrelid || fk->nkeys != numfks)
+	{
+		ReleaseSysCache(parentConstrTup);
+		return false;
+	}
+	for (int i = 0; i < numfks; i++)
+	{
+		if (fk->conkey[i] != mapped_conkey[i] ||
+			fk->confkey[i] != confkey[i] ||
+			fk->conpfeqop[i] != conpfeqop[i])
+		{
+			ReleaseSysCache(parentConstrTup);
+			return false;
+		}
+	}
+
+	/*
+	 * Looks good so far; do some more extensive checks.  Presumably the check
+	 * for 'convalidated' could be dropped, since we don't really care about
+	 * that, but let's be careful for now.
+	 */
+	partcontup = SearchSysCache1(CONSTROID,
+								 ObjectIdGetDatum(fk->conoid));
+	if (!HeapTupleIsValid(partcontup))
+		elog(ERROR, "cache lookup failed for constraint %u", fk->conoid);
+	partConstr = (Form_pg_constraint) GETSTRUCT(partcontup);
+	if (OidIsValid(partConstr->conparentid) ||
+		!partConstr->convalidated ||
+		partConstr->condeferrable != parentConstr->condeferrable ||
+		partConstr->condeferred != parentConstr->condeferred ||
+		partConstr->confupdtype != parentConstr->confupdtype ||
+		partConstr->confdeltype != parentConstr->confdeltype ||
+		partConstr->confmatchtype != parentConstr->confmatchtype)
+	{
+		ReleaseSysCache(parentConstrTup);
+		ReleaseSysCache(partcontup);
+		return false;
+	}
+
+	ReleaseSysCache(partcontup);
+	ReleaseSysCache(parentConstrTup);
+
+	/*
+	 * Looks good!  Attach this constraint.  The action triggers in the new
+	 * partition become redundant -- the parent table already has equivalent
+	 * ones, and those will be able to reach the partition.  Remove the ones
+	 * in the partition.  We identify them because they have our constraint
+	 * OID, as well as being on the referenced rel.
+	 */
+	ScanKeyInit(&key,
+				Anum_pg_trigger_tgconstraint,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(fk->conoid));
+	scan = systable_beginscan(trigrel, TriggerConstraintIndexId, true,
+							  NULL, 1, &key);
+	while ((trigtup = systable_getnext(scan)) != NULL)
+	{
+		Form_pg_trigger trgform = (Form_pg_trigger) GETSTRUCT(trigtup);
+		ObjectAddress trigger;
+
+		if (trgform->tgconstrrelid != fk->conrelid)
+			continue;
+		if (trgform->tgrelid != fk->confrelid)
+			continue;
+
+		/*
+		 * The constraint is originally set up to contain this trigger as an
+		 * implementation object, so there's a dependency record that links
+		 * the two; however, since the trigger is no longer needed, we remove
+		 * the dependency link in order to be able to drop the trigger while
+		 * keeping the constraint intact.
+		 */
+		deleteDependencyRecordsFor(TriggerRelationId,
+								   trgform->oid,
+								   false);
+		/* make dependency deletion visible to performDeletion */
+		CommandCounterIncrement();
+		ObjectAddressSet(trigger, TriggerRelationId,
+						 trgform->oid);
+		performDeletion(&trigger, DROP_RESTRICT, 0);
+		/* make trigger drop visible, in case the loop iterates */
+		CommandCounterIncrement();
+	}
+
+	systable_endscan(scan);
+
+	ConstraintSetParentConstraint(fk->conoid, parentConstrOid, partRelid);
+
+	/*
+	 * Like the constraint, attach partition's "check" triggers to the
+	 * corresponding parent triggers.
+	 */
+	GetForeignKeyCheckTriggers(trigrel,
+							   fk->conoid, fk->confrelid, fk->conrelid,
+							   &insertTriggerOid, &updateTriggerOid);
+	Assert(OidIsValid(insertTriggerOid) && OidIsValid(parentInsTrigger));
+	TriggerSetParentTrigger(trigrel, insertTriggerOid, parentInsTrigger,
+							partRelid);
+	Assert(OidIsValid(updateTriggerOid) && OidIsValid(parentUpdTrigger));
+	TriggerSetParentTrigger(trigrel, updateTriggerOid, parentUpdTrigger,
+							partRelid);
+
+	CommandCounterIncrement();
+	return true;
+}
+
+/*
+ * GetForeignKeyActionTriggers
+ * 		Returns delete and update "action" triggers of the given relation
+ * 		belonging to the given constraint
+ */
+static void
+GetForeignKeyActionTriggers(Relation trigrel,
+							Oid conoid, Oid confrelid, Oid conrelid,
+							Oid *deleteTriggerOid,
+							Oid *updateTriggerOid)
+{
+	ScanKeyData key;
+	SysScanDesc scan;
+	HeapTuple	trigtup;
+
+	*deleteTriggerOid = *updateTriggerOid = InvalidOid;
+	ScanKeyInit(&key,
+				Anum_pg_trigger_tgconstraint,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(conoid));
+
+	scan = systable_beginscan(trigrel, TriggerConstraintIndexId, true,
+							  NULL, 1, &key);
+	while ((trigtup = systable_getnext(scan)) != NULL)
+	{
+		Form_pg_trigger trgform = (Form_pg_trigger) GETSTRUCT(trigtup);
+
+		if (trgform->tgconstrrelid != conrelid)
+			continue;
+		if (trgform->tgrelid != confrelid)
+			continue;
+		/* Only ever look at "action" triggers on the PK side. */
+		if (RI_FKey_trigger_type(trgform->tgfoid) != RI_TRIGGER_PK)
+			continue;
+		if (TRIGGER_FOR_DELETE(trgform->tgtype))
+		{
+			Assert(*deleteTriggerOid == InvalidOid);
+			*deleteTriggerOid = trgform->oid;
+		}
+		else if (TRIGGER_FOR_UPDATE(trgform->tgtype))
+		{
+			Assert(*updateTriggerOid == InvalidOid);
+			*updateTriggerOid = trgform->oid;
+		}
+#ifndef USE_ASSERT_CHECKING
+		/* In an assert-enabled build, continue looking to find duplicates */
+		if (OidIsValid(*deleteTriggerOid) && OidIsValid(*updateTriggerOid))
+			break;
+#endif
+	}
+
+	if (!OidIsValid(*deleteTriggerOid))
+		elog(ERROR, "could not find ON DELETE action trigger of foreign key constraint %u",
+			 conoid);
+	if (!OidIsValid(*updateTriggerOid))
+		elog(ERROR, "could not find ON UPDATE action trigger of foreign key constraint %u",
+			 conoid);
+
+	systable_endscan(scan);
+}
+
+/*
+ * GetForeignKeyCheckTriggers
+ * 		Returns insert and update "check" triggers of the given relation
+ * 		belonging to the given constraint
+ */
+static void
+GetForeignKeyCheckTriggers(Relation trigrel,
+						   Oid conoid, Oid confrelid, Oid conrelid,
+						   Oid *insertTriggerOid,
+						   Oid *updateTriggerOid)
+{
+	ScanKeyData key;
+	SysScanDesc scan;
+	HeapTuple	trigtup;
+
+	*insertTriggerOid = *updateTriggerOid = InvalidOid;
+	ScanKeyInit(&key,
+				Anum_pg_trigger_tgconstraint,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(conoid));
+
+	scan = systable_beginscan(trigrel, TriggerConstraintIndexId, true,
+							  NULL, 1, &key);
+	while ((trigtup = systable_getnext(scan)) != NULL)
+	{
+		Form_pg_trigger trgform = (Form_pg_trigger) GETSTRUCT(trigtup);
+
+		if (trgform->tgconstrrelid != confrelid)
+			continue;
+		if (trgform->tgrelid != conrelid)
+			continue;
+		/* Only ever look at "check" triggers on the FK side. */
+		if (RI_FKey_trigger_type(trgform->tgfoid) != RI_TRIGGER_FK)
+			continue;
+		if (TRIGGER_FOR_INSERT(trgform->tgtype))
+		{
+			Assert(*insertTriggerOid == InvalidOid);
+			*insertTriggerOid = trgform->oid;
+		}
+		else if (TRIGGER_FOR_UPDATE(trgform->tgtype))
+		{
+			Assert(*updateTriggerOid == InvalidOid);
+			*updateTriggerOid = trgform->oid;
+		}
+#ifndef USE_ASSERT_CHECKING
+		/* In an assert-enabled build, continue looking to find duplicates. */
+		if (OidIsValid(*insertTriggerOid) && OidIsValid(*updateTriggerOid))
+			break;
+#endif
+	}
+
+	if (!OidIsValid(*insertTriggerOid))
+		elog(ERROR, "could not find ON INSERT check triggers of foreign key constraint %u",
+			 conoid);
+	if (!OidIsValid(*updateTriggerOid))
+		elog(ERROR, "could not find ON UPDATE check triggers of foreign key constraint %u",
+			 conoid);
+
+	systable_endscan(scan);
+}
+
+/*
+ * ALTER TABLE ALTER CONSTRAINT
+ *
+ * Update the attributes of a constraint.
+ *
+ * Currently only works for Foreign Key constraints.
+ *
+ * If the constraint is modified, returns its address; otherwise, return
+ * InvalidObjectAddress.
+ */
+static ObjectAddress
+ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd, bool recurse,
+					  bool recursing, LOCKMODE lockmode)
+{
+	Constraint *cmdcon;
+	Relation	conrel;
+	Relation	tgrel;
+	SysScanDesc scan;
+	ScanKeyData skey[3];
+	HeapTuple	contuple;
+	Form_pg_constraint currcon;
+	ObjectAddress address;
+	List	   *otherrelids = NIL;
+	ListCell   *lc;
+
+	cmdcon = castNode(Constraint, cmd->def);
+
+	conrel = table_open(ConstraintRelationId, RowExclusiveLock);
+	tgrel = table_open(TriggerRelationId, RowExclusiveLock);
+
+	/*
+	 * Find and check the target constraint
+	 */
+	ScanKeyInit(&skey[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	ScanKeyInit(&skey[1],
+				Anum_pg_constraint_contypid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(InvalidOid));
+	ScanKeyInit(&skey[2],
+				Anum_pg_constraint_conname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(cmdcon->conname));
+	scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId,
+							  true, NULL, 3, skey);
+
+	/* There can be at most one matching row */
+	if (!HeapTupleIsValid(contuple = systable_getnext(scan)))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("constraint \"%s\" of relation \"%s\" does not exist",
+						cmdcon->conname, RelationGetRelationName(rel))));
+
+	currcon = (Form_pg_constraint) GETSTRUCT(contuple);
+	if (currcon->contype != CONSTRAINT_FOREIGN)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("constraint \"%s\" of relation \"%s\" is not a foreign key constraint",
+						cmdcon->conname, RelationGetRelationName(rel))));
+
+	/*
+	 * If it's not the topmost constraint, raise an error.
+	 *
+	 * Altering a non-topmost constraint leaves some triggers untouched, since
+	 * they are not directly connected to this constraint; also, pg_dump would
+	 * ignore the deferrability status of the individual constraint, since it
+	 * only dumps topmost constraints.  Avoid these problems by refusing this
+	 * operation and telling the user to alter the parent constraint instead.
+	 */
+	if (OidIsValid(currcon->conparentid))
+	{
+		HeapTuple	tp;
+		Oid			parent = currcon->conparentid;
+		char	   *ancestorname = NULL;
+		char	   *ancestortable = NULL;
+
+		/* Loop to find the topmost constraint */
+		while (HeapTupleIsValid(tp = SearchSysCache1(CONSTROID, ObjectIdGetDatum(parent))))
+		{
+			Form_pg_constraint contup = (Form_pg_constraint) GETSTRUCT(tp);
+
+			/* If no parent, this is the constraint we want */
+			if (!OidIsValid(contup->conparentid))
+			{
+				ancestorname = pstrdup(NameStr(contup->conname));
+				ancestortable = get_rel_name(contup->conrelid);
+				ReleaseSysCache(tp);
+				break;
+			}
+
+			parent = contup->conparentid;
+			ReleaseSysCache(tp);
+		}
+
+		ereport(ERROR,
+				(errmsg("cannot alter constraint \"%s\" on relation \"%s\"",
+						cmdcon->conname, RelationGetRelationName(rel)),
+				 ancestorname && ancestortable ?
+				 errdetail("Constraint \"%s\" is derived from constraint \"%s\" of relation \"%s\".",
+						   cmdcon->conname, ancestorname, ancestortable) : 0,
+				 errhint("You may alter the constraint it derives from, instead.")));
+	}
+
+	/*
+	 * Do the actual catalog work.  We can skip changing if already in the
+	 * desired state, but not if a partitioned table: partitions need to be
+	 * processed regardless, in case they had the constraint locally changed.
+	 */
+	address = InvalidObjectAddress;
+	if (currcon->condeferrable != cmdcon->deferrable ||
+		currcon->condeferred != cmdcon->initdeferred ||
+		rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		if (ATExecAlterConstrRecurse(cmdcon, conrel, tgrel, rel, contuple,
+									 &otherrelids, lockmode))
+			ObjectAddressSet(address, ConstraintRelationId, currcon->oid);
+	}
+
+	/*
+	 * ATExecConstrRecurse already invalidated relcache for the relations
+	 * having the constraint itself; here we also invalidate for relations
+	 * that have any triggers that are part of the constraint.
+	 */
+	foreach(lc, otherrelids)
+		CacheInvalidateRelcacheByRelid(lfirst_oid(lc));
+
+	systable_endscan(scan);
+
+	table_close(tgrel, RowExclusiveLock);
+	table_close(conrel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * Recursive subroutine of ATExecAlterConstraint.  Returns true if the
+ * constraint is altered.
+ *
+ * *otherrelids is appended OIDs of relations containing affected triggers.
+ *
+ * Note that we must recurse even when the values are correct, in case
+ * indirect descendants have had their constraints altered locally.
+ * (This could be avoided if we forbade altering constraints in partitions
+ * but existing releases don't do that.)
+ */
+static bool
+ATExecAlterConstrRecurse(Constraint *cmdcon, Relation conrel, Relation tgrel,
+						 Relation rel, HeapTuple contuple, List **otherrelids,
+						 LOCKMODE lockmode)
+{
+	Form_pg_constraint currcon;
+	Oid			conoid;
+	Oid			refrelid;
+	bool		changed = false;
+
+	currcon = (Form_pg_constraint) GETSTRUCT(contuple);
+	conoid = currcon->oid;
+	refrelid = currcon->confrelid;
+
+	/*
+	 * Update pg_constraint with the flags from cmdcon.
+	 *
+	 * If called to modify a constraint that's already in the desired state,
+	 * silently do nothing.
+	 */
+	if (currcon->condeferrable != cmdcon->deferrable ||
+		currcon->condeferred != cmdcon->initdeferred)
+	{
+		HeapTuple	copyTuple;
+		Form_pg_constraint copy_con;
+		HeapTuple	tgtuple;
+		ScanKeyData tgkey;
+		SysScanDesc tgscan;
+
+		copyTuple = heap_copytuple(contuple);
+		copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple);
+		copy_con->condeferrable = cmdcon->deferrable;
+		copy_con->condeferred = cmdcon->initdeferred;
+		CatalogTupleUpdate(conrel, &copyTuple->t_self, copyTuple);
+
+		InvokeObjectPostAlterHook(ConstraintRelationId,
+								  conoid, 0);
+
+		heap_freetuple(copyTuple);
+		changed = true;
+
+		/* Make new constraint flags visible to others */
+		CacheInvalidateRelcache(rel);
+
+		/*
+		 * Now we need to update the multiple entries in pg_trigger that
+		 * implement the constraint.
+		 */
+		ScanKeyInit(&tgkey,
+					Anum_pg_trigger_tgconstraint,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(conoid));
+		tgscan = systable_beginscan(tgrel, TriggerConstraintIndexId, true,
+									NULL, 1, &tgkey);
+		while (HeapTupleIsValid(tgtuple = systable_getnext(tgscan)))
+		{
+			Form_pg_trigger tgform = (Form_pg_trigger) GETSTRUCT(tgtuple);
+			Form_pg_trigger copy_tg;
+			HeapTuple	copyTuple;
+
+			/*
+			 * Remember OIDs of other relation(s) involved in FK constraint.
+			 * (Note: it's likely that we could skip forcing a relcache inval
+			 * for other rels that don't have a trigger whose properties
+			 * change, but let's be conservative.)
+			 */
+			if (tgform->tgrelid != RelationGetRelid(rel))
+				*otherrelids = list_append_unique_oid(*otherrelids,
+													  tgform->tgrelid);
+
+			/*
+			 * Update deferrability of RI_FKey_noaction_del,
+			 * RI_FKey_noaction_upd, RI_FKey_check_ins and RI_FKey_check_upd
+			 * triggers, but not others; see createForeignKeyActionTriggers
+			 * and CreateFKCheckTrigger.
+			 */
+			if (tgform->tgfoid != F_RI_FKEY_NOACTION_DEL &&
+				tgform->tgfoid != F_RI_FKEY_NOACTION_UPD &&
+				tgform->tgfoid != F_RI_FKEY_CHECK_INS &&
+				tgform->tgfoid != F_RI_FKEY_CHECK_UPD)
+				continue;
+
+			copyTuple = heap_copytuple(tgtuple);
+			copy_tg = (Form_pg_trigger) GETSTRUCT(copyTuple);
+
+			copy_tg->tgdeferrable = cmdcon->deferrable;
+			copy_tg->tginitdeferred = cmdcon->initdeferred;
+			CatalogTupleUpdate(tgrel, &copyTuple->t_self, copyTuple);
+
+			InvokeObjectPostAlterHook(TriggerRelationId, tgform->oid, 0);
+
+			heap_freetuple(copyTuple);
+		}
+
+		systable_endscan(tgscan);
+	}
+
+	/*
+	 * If the table at either end of the constraint is partitioned, we need to
+	 * recurse and handle every constraint that is a child of this one.
+	 *
+	 * (This assumes that the recurse flag is forcibly set for partitioned
+	 * tables, and not set for legacy inheritance, though we don't check for
+	 * that here.)
+	 */
+	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
+		get_rel_relkind(refrelid) == RELKIND_PARTITIONED_TABLE)
+	{
+		ScanKeyData pkey;
+		SysScanDesc pscan;
+		HeapTuple	childtup;
+
+		ScanKeyInit(&pkey,
+					Anum_pg_constraint_conparentid,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(conoid));
+
+		pscan = systable_beginscan(conrel, ConstraintParentIndexId,
+								   true, NULL, 1, &pkey);
+
+		while (HeapTupleIsValid(childtup = systable_getnext(pscan)))
+		{
+			Form_pg_constraint childcon = (Form_pg_constraint) GETSTRUCT(childtup);
+			Relation	childrel;
+
+			childrel = table_open(childcon->conrelid, lockmode);
+			ATExecAlterConstrRecurse(cmdcon, conrel, tgrel, childrel, childtup,
+									 otherrelids, lockmode);
+			table_close(childrel, NoLock);
+		}
+
+		systable_endscan(pscan);
+	}
+
+	return changed;
+}
+
+/*
+ * ALTER TABLE VALIDATE CONSTRAINT
+ *
+ * XXX The reason we handle recursion here rather than at Phase 1 is because
+ * there's no good way to skip recursing when handling foreign keys: there is
+ * no need to lock children in that case, yet we wouldn't be able to avoid
+ * doing so at that level.
+ *
+ * Return value is the address of the validated constraint.  If the constraint
+ * was already validated, InvalidObjectAddress is returned.
+ */
+static ObjectAddress
+ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
+						 bool recurse, bool recursing, LOCKMODE lockmode)
+{
+	Relation	conrel;
+	SysScanDesc scan;
+	ScanKeyData skey[3];
+	HeapTuple	tuple;
+	Form_pg_constraint con;
+	ObjectAddress address;
+
+	conrel = table_open(ConstraintRelationId, RowExclusiveLock);
+
+	/*
+	 * Find and check the target constraint
+	 */
+	ScanKeyInit(&skey[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	ScanKeyInit(&skey[1],
+				Anum_pg_constraint_contypid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(InvalidOid));
+	ScanKeyInit(&skey[2],
+				Anum_pg_constraint_conname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(constrName));
+	scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId,
+							  true, NULL, 3, skey);
+
+	/* There can be at most one matching row */
+	if (!HeapTupleIsValid(tuple = systable_getnext(scan)))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("constraint \"%s\" of relation \"%s\" does not exist",
+						constrName, RelationGetRelationName(rel))));
+
+	con = (Form_pg_constraint) GETSTRUCT(tuple);
+	if (con->contype != CONSTRAINT_FOREIGN &&
+		con->contype != CONSTRAINT_CHECK)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("constraint \"%s\" of relation \"%s\" is not a foreign key or check constraint",
+						constrName, RelationGetRelationName(rel))));
+
+	if (!con->convalidated)
+	{
+		AlteredTableInfo *tab;
+		HeapTuple	copyTuple;
+		Form_pg_constraint copy_con;
+
+		if (con->contype == CONSTRAINT_FOREIGN)
+		{
+			NewConstraint *newcon;
+			Constraint *fkconstraint;
+
+			/* Queue validation for phase 3 */
+			fkconstraint = makeNode(Constraint);
+			/* for now this is all we need */
+			fkconstraint->conname = constrName;
+
+			newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
+			newcon->name = constrName;
+			newcon->contype = CONSTR_FOREIGN;
+			newcon->refrelid = con->confrelid;
+			newcon->refindid = con->conindid;
+			newcon->conid = con->oid;
+			newcon->qual = (Node *) fkconstraint;
+
+			/* Find or create work queue entry for this table */
+			tab = ATGetQueueEntry(wqueue, rel);
+			tab->constraints = lappend(tab->constraints, newcon);
+
+			/*
+			 * We disallow creating invalid foreign keys to or from
+			 * partitioned tables, so ignoring the recursion bit is okay.
+			 */
+		}
+		else if (con->contype == CONSTRAINT_CHECK)
+		{
+			List	   *children = NIL;
+			ListCell   *child;
+			NewConstraint *newcon;
+			bool		isnull;
+			Datum		val;
+			char	   *conbin;
+
+			/*
+			 * If we're recursing, the parent has already done this, so skip
+			 * it.  Also, if the constraint is a NO INHERIT constraint, we
+			 * shouldn't try to look for it in the children.
+			 */
+			if (!recursing && !con->connoinherit)
+				children = find_all_inheritors(RelationGetRelid(rel),
+											   lockmode, NULL);
+
+			/*
+			 * For CHECK constraints, we must ensure that we only mark the
+			 * constraint as validated on the parent if it's already validated
+			 * on the children.
+			 *
+			 * We recurse before validating on the parent, to reduce risk of
+			 * deadlocks.
+			 */
+			foreach(child, children)
+			{
+				Oid			childoid = lfirst_oid(child);
+				Relation	childrel;
+
+				if (childoid == RelationGetRelid(rel))
+					continue;
+
+				/*
+				 * If we are told not to recurse, there had better not be any
+				 * child tables, because we can't mark the constraint on the
+				 * parent valid unless it is valid for all child tables.
+				 */
+				if (!recurse)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+							 errmsg("constraint must be validated on child tables too")));
+
+				/* find_all_inheritors already got lock */
+				childrel = table_open(childoid, NoLock);
+
+				ATExecValidateConstraint(wqueue, childrel, constrName, false,
+										 true, lockmode);
+				table_close(childrel, NoLock);
+			}
+
+			/* Queue validation for phase 3 */
+			newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
+			newcon->name = constrName;
+			newcon->contype = CONSTR_CHECK;
+			newcon->refrelid = InvalidOid;
+			newcon->refindid = InvalidOid;
+			newcon->conid = con->oid;
+
+			val = SysCacheGetAttr(CONSTROID, tuple,
+								  Anum_pg_constraint_conbin, &isnull);
+			if (isnull)
+				elog(ERROR, "null conbin for constraint %u", con->oid);
+
+			conbin = TextDatumGetCString(val);
+			newcon->qual = (Node *) stringToNode(conbin);
+
+			/* Find or create work queue entry for this table */
+			tab = ATGetQueueEntry(wqueue, rel);
+			tab->constraints = lappend(tab->constraints, newcon);
+
+			/*
+			 * Invalidate relcache so that others see the new validated
+			 * constraint.
+			 */
+			CacheInvalidateRelcache(rel);
+		}
+
+		/*
+		 * Now update the catalog, while we have the door open.
+		 */
+		copyTuple = heap_copytuple(tuple);
+		copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple);
+		copy_con->convalidated = true;
+		CatalogTupleUpdate(conrel, &copyTuple->t_self, copyTuple);
+
+		InvokeObjectPostAlterHook(ConstraintRelationId, con->oid, 0);
+
+		heap_freetuple(copyTuple);
+
+		ObjectAddressSet(address, ConstraintRelationId, con->oid);
+	}
+	else
+		address = InvalidObjectAddress; /* already validated */
+
+	systable_endscan(scan);
+
+	table_close(conrel, RowExclusiveLock);
+
+	return address;
+}
+
+
+/*
+ * transformColumnNameList - transform list of column names
+ *
+ * Lookup each name and return its attnum and, optionally, type OID
+ *
+ * Note: the name of this function suggests that it's general-purpose,
+ * but actually it's only used to look up names appearing in foreign-key
+ * clauses.  The error messages would need work to use it in other cases,
+ * and perhaps the validity checks as well.
+ */
+static int
+transformColumnNameList(Oid relId, List *colList,
+						int16 *attnums, Oid *atttypids)
+{
+	ListCell   *l;
+	int			attnum;
+
+	attnum = 0;
+	foreach(l, colList)
+	{
+		char	   *attname = strVal(lfirst(l));
+		HeapTuple	atttuple;
+		Form_pg_attribute attform;
+
+		atttuple = SearchSysCacheAttName(relId, attname);
+		if (!HeapTupleIsValid(atttuple))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column \"%s\" referenced in foreign key constraint does not exist",
+							attname)));
+		attform = (Form_pg_attribute) GETSTRUCT(atttuple);
+		if (attform->attnum < 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("system columns cannot be used in foreign keys")));
+		if (attnum >= INDEX_MAX_KEYS)
+			ereport(ERROR,
+					(errcode(ERRCODE_TOO_MANY_COLUMNS),
+					 errmsg("cannot have more than %d keys in a foreign key",
+							INDEX_MAX_KEYS)));
+		attnums[attnum] = attform->attnum;
+		if (atttypids != NULL)
+			atttypids[attnum] = attform->atttypid;
+		ReleaseSysCache(atttuple);
+		attnum++;
+	}
+
+	return attnum;
+}
+
+/*
+ * transformFkeyGetPrimaryKey -
+ *
+ *	Look up the names, attnums, and types of the primary key attributes
+ *	for the pkrel.  Also return the index OID and index opclasses of the
+ *	index supporting the primary key.
+ *
+ *	All parameters except pkrel are output parameters.  Also, the function
+ *	return value is the number of attributes in the primary key.
+ *
+ *	Used when the column list in the REFERENCES specification is omitted.
+ */
+static int
+transformFkeyGetPrimaryKey(Relation pkrel, Oid *indexOid,
+						   List **attnamelist,
+						   int16 *attnums, Oid *atttypids,
+						   Oid *opclasses)
+{
+	List	   *indexoidlist;
+	ListCell   *indexoidscan;
+	HeapTuple	indexTuple = NULL;
+	Form_pg_index indexStruct = NULL;
+	Datum		indclassDatum;
+	bool		isnull;
+	oidvector  *indclass;
+	int			i;
+
+	/*
+	 * Get the list of index OIDs for the table from the relcache, and look up
+	 * each one in the pg_index syscache until we find one marked primary key
+	 * (hopefully there isn't more than one such).  Insist it's valid, too.
+	 */
+	*indexOid = InvalidOid;
+
+	indexoidlist = RelationGetIndexList(pkrel);
+
+	foreach(indexoidscan, indexoidlist)
+	{
+		Oid			indexoid = lfirst_oid(indexoidscan);
+
+		indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
+		if (!HeapTupleIsValid(indexTuple))
+			elog(ERROR, "cache lookup failed for index %u", indexoid);
+		indexStruct = (Form_pg_index) GETSTRUCT(indexTuple);
+		if (indexStruct->indisprimary && indexStruct->indisvalid)
+		{
+			/*
+			 * Refuse to use a deferrable primary key.  This is per SQL spec,
+			 * and there would be a lot of interesting semantic problems if we
+			 * tried to allow it.
+			 */
+			if (!indexStruct->indimmediate)
+				ereport(ERROR,
+						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						 errmsg("cannot use a deferrable primary key for referenced table \"%s\"",
+								RelationGetRelationName(pkrel))));
+
+			*indexOid = indexoid;
+			break;
+		}
+		ReleaseSysCache(indexTuple);
+	}
+
+	list_free(indexoidlist);
+
+	/*
+	 * Check that we found it
+	 */
+	if (!OidIsValid(*indexOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("there is no primary key for referenced table \"%s\"",
+						RelationGetRelationName(pkrel))));
+
+	/* Must get indclass the hard way */
+	indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
+									Anum_pg_index_indclass, &isnull);
+	Assert(!isnull);
+	indclass = (oidvector *) DatumGetPointer(indclassDatum);
+
+	/*
+	 * Now build the list of PK attributes from the indkey definition (we
+	 * assume a primary key cannot have expressional elements)
+	 */
+	*attnamelist = NIL;
+	for (i = 0; i < indexStruct->indnkeyatts; i++)
+	{
+		int			pkattno = indexStruct->indkey.values[i];
+
+		attnums[i] = pkattno;
+		atttypids[i] = attnumTypeId(pkrel, pkattno);
+		opclasses[i] = indclass->values[i];
+		*attnamelist = lappend(*attnamelist,
+							   makeString(pstrdup(NameStr(*attnumAttName(pkrel, pkattno)))));
+	}
+
+	ReleaseSysCache(indexTuple);
+
+	return i;
+}
+
+/*
+ * transformFkeyCheckAttrs -
+ *
+ *	Make sure that the attributes of a referenced table belong to a unique
+ *	(or primary key) constraint.  Return the OID of the index supporting
+ *	the constraint, as well as the opclasses associated with the index
+ *	columns.
+ */
+static Oid
+transformFkeyCheckAttrs(Relation pkrel,
+						int numattrs, int16 *attnums,
+						Oid *opclasses) /* output parameter */
+{
+	Oid			indexoid = InvalidOid;
+	bool		found = false;
+	bool		found_deferrable = false;
+	List	   *indexoidlist;
+	ListCell   *indexoidscan;
+	int			i,
+				j;
+
+	/*
+	 * Reject duplicate appearances of columns in the referenced-columns list.
+	 * Such a case is forbidden by the SQL standard, and even if we thought it
+	 * useful to allow it, there would be ambiguity about how to match the
+	 * list to unique indexes (in particular, it'd be unclear which index
+	 * opclass goes with which FK column).
+	 */
+	for (i = 0; i < numattrs; i++)
+	{
+		for (j = i + 1; j < numattrs; j++)
+		{
+			if (attnums[i] == attnums[j])
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_FOREIGN_KEY),
+						 errmsg("foreign key referenced-columns list must not contain duplicates")));
+		}
+	}
+
+	/*
+	 * Get the list of index OIDs for the table from the relcache, and look up
+	 * each one in the pg_index syscache, and match unique indexes to the list
+	 * of attnums we are given.
+	 */
+	indexoidlist = RelationGetIndexList(pkrel);
+
+	foreach(indexoidscan, indexoidlist)
+	{
+		HeapTuple	indexTuple;
+		Form_pg_index indexStruct;
+
+		indexoid = lfirst_oid(indexoidscan);
+		indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
+		if (!HeapTupleIsValid(indexTuple))
+			elog(ERROR, "cache lookup failed for index %u", indexoid);
+		indexStruct = (Form_pg_index) GETSTRUCT(indexTuple);
+
+		/*
+		 * Must have the right number of columns; must be unique and not a
+		 * partial index; forget it if there are any expressions, too. Invalid
+		 * indexes are out as well.
+		 */
+		if (indexStruct->indnkeyatts == numattrs &&
+			indexStruct->indisunique &&
+			indexStruct->indisvalid &&
+			heap_attisnull(indexTuple, Anum_pg_index_indpred, NULL) &&
+			heap_attisnull(indexTuple, Anum_pg_index_indexprs, NULL))
+		{
+			Datum		indclassDatum;
+			bool		isnull;
+			oidvector  *indclass;
+
+			/* Must get indclass the hard way */
+			indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
+											Anum_pg_index_indclass, &isnull);
+			Assert(!isnull);
+			indclass = (oidvector *) DatumGetPointer(indclassDatum);
+
+			/*
+			 * The given attnum list may match the index columns in any order.
+			 * Check for a match, and extract the appropriate opclasses while
+			 * we're at it.
+			 *
+			 * We know that attnums[] is duplicate-free per the test at the
+			 * start of this function, and we checked above that the number of
+			 * index columns agrees, so if we find a match for each attnums[]
+			 * entry then we must have a one-to-one match in some order.
+			 */
+			for (i = 0; i < numattrs; i++)
+			{
+				found = false;
+				for (j = 0; j < numattrs; j++)
+				{
+					if (attnums[i] == indexStruct->indkey.values[j])
+					{
+						opclasses[i] = indclass->values[j];
+						found = true;
+						break;
+					}
+				}
+				if (!found)
+					break;
+			}
+
+			/*
+			 * Refuse to use a deferrable unique/primary key.  This is per SQL
+			 * spec, and there would be a lot of interesting semantic problems
+			 * if we tried to allow it.
+			 */
+			if (found && !indexStruct->indimmediate)
+			{
+				/*
+				 * Remember that we found an otherwise matching index, so that
+				 * we can generate a more appropriate error message.
+				 */
+				found_deferrable = true;
+				found = false;
+			}
+		}
+		ReleaseSysCache(indexTuple);
+		if (found)
+			break;
+	}
+
+	if (!found)
+	{
+		if (found_deferrable)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("cannot use a deferrable unique constraint for referenced table \"%s\"",
+							RelationGetRelationName(pkrel))));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_FOREIGN_KEY),
+					 errmsg("there is no unique constraint matching given keys for referenced table \"%s\"",
+							RelationGetRelationName(pkrel))));
+	}
+
+	list_free(indexoidlist);
+
+	return indexoid;
+}
+
+/*
+ * findFkeyCast -
+ *
+ *	Wrapper around find_coercion_pathway() for ATAddForeignKeyConstraint().
+ *	Caller has equal regard for binary coercibility and for an exact match.
+*/
+static CoercionPathType
+findFkeyCast(Oid targetTypeId, Oid sourceTypeId, Oid *funcid)
+{
+	CoercionPathType ret;
+
+	if (targetTypeId == sourceTypeId)
+	{
+		ret = COERCION_PATH_RELABELTYPE;
+		*funcid = InvalidOid;
+	}
+	else
+	{
+		ret = find_coercion_pathway(targetTypeId, sourceTypeId,
+									COERCION_IMPLICIT, funcid);
+		if (ret == COERCION_PATH_NONE)
+			/* A previously-relied-upon cast is now gone. */
+			elog(ERROR, "could not find cast from %u to %u",
+				 sourceTypeId, targetTypeId);
+	}
+
+	return ret;
+}
+
+/*
+ * Permissions checks on the referenced table for ADD FOREIGN KEY
+ *
+ * Note: we have already checked that the user owns the referencing table,
+ * else we'd have failed much earlier; no additional checks are needed for it.
+ */
+static void
+checkFkeyPermissions(Relation rel, int16 *attnums, int natts)
+{
+	Oid			roleid = GetUserId();
+	AclResult	aclresult;
+	int			i;
+
+	/* Okay if we have relation-level REFERENCES permission */
+	aclresult = pg_class_aclcheck(RelationGetRelid(rel), roleid,
+								  ACL_REFERENCES);
+	if (aclresult == ACLCHECK_OK)
+		return;
+	/* Else we must have REFERENCES on each column */
+	for (i = 0; i < natts; i++)
+	{
+		aclresult = pg_attribute_aclcheck(RelationGetRelid(rel), attnums[i],
+										  roleid, ACL_REFERENCES);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, get_relkind_objtype(rel->rd_rel->relkind),
+						   RelationGetRelationName(rel));
+	}
+}
+
+/*
+ * Scan the existing rows in a table to verify they meet a proposed FK
+ * constraint.
+ *
+ * Caller must have opened and locked both relations appropriately.
+ */
+static void
+validateForeignKeyConstraint(char *conname,
+							 Relation rel,
+							 Relation pkrel,
+							 Oid pkindOid,
+							 Oid constraintOid)
+{
+	TupleTableSlot *slot;
+	TableScanDesc scan;
+	Trigger		trig;
+	Snapshot	snapshot;
+	MemoryContext oldcxt;
+	MemoryContext perTupCxt;
+
+	ereport(DEBUG1,
+			(errmsg_internal("validating foreign key constraint \"%s\"", conname)));
+
+	/*
+	 * Build a trigger call structure; we'll need it either way.
+	 */
+	MemSet(&trig, 0, sizeof(trig));
+	trig.tgoid = InvalidOid;
+	trig.tgname = conname;
+	trig.tgenabled = TRIGGER_FIRES_ON_ORIGIN;
+	trig.tgisinternal = true;
+	trig.tgconstrrelid = RelationGetRelid(pkrel);
+	trig.tgconstrindid = pkindOid;
+	trig.tgconstraint = constraintOid;
+	trig.tgdeferrable = false;
+	trig.tginitdeferred = false;
+	/* we needn't fill in remaining fields */
+
+	/*
+	 * See if we can do it with a single LEFT JOIN query.  A false result
+	 * indicates we must proceed with the fire-the-trigger method.
+	 */
+	if (RI_Initial_Check(&trig, rel, pkrel))
+		return;
+
+	/*
+	 * Scan through each tuple, calling RI_FKey_check_ins (insert trigger) as
+	 * if that tuple had just been inserted.  If any of those fail, it should
+	 * ereport(ERROR) and that's that.
+	 */
+	snapshot = RegisterSnapshot(GetLatestSnapshot());
+	slot = table_slot_create(rel, NULL);
+	scan = table_beginscan(rel, snapshot, 0, NULL);
+
+	perTupCxt = AllocSetContextCreate(CurrentMemoryContext,
+									  "validateForeignKeyConstraint",
+									  ALLOCSET_SMALL_SIZES);
+	oldcxt = MemoryContextSwitchTo(perTupCxt);
+
+	while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
+	{
+		LOCAL_FCINFO(fcinfo, 0);
+		TriggerData trigdata = {0};
+
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * Make a call to the trigger function
+		 *
+		 * No parameters are passed, but we do set a context
+		 */
+		MemSet(fcinfo, 0, SizeForFunctionCallInfo(0));
+
+		/*
+		 * We assume RI_FKey_check_ins won't look at flinfo...
+		 */
+		trigdata.type = T_TriggerData;
+		trigdata.tg_event = TRIGGER_EVENT_INSERT | TRIGGER_EVENT_ROW;
+		trigdata.tg_relation = rel;
+		trigdata.tg_trigtuple = ExecFetchSlotHeapTuple(slot, false, NULL);
+		trigdata.tg_trigslot = slot;
+		trigdata.tg_trigger = &trig;
+
+		fcinfo->context = (Node *) &trigdata;
+
+		RI_FKey_check_ins(fcinfo);
+
+		MemoryContextReset(perTupCxt);
+	}
+
+	MemoryContextSwitchTo(oldcxt);
+	MemoryContextDelete(perTupCxt);
+	table_endscan(scan);
+	UnregisterSnapshot(snapshot);
+	ExecDropSingleTupleTableSlot(slot);
+}
+
+/*
+ * CreateFKCheckTrigger
+ *		Creates the insert (on_insert=true) or update "check" trigger that
+ *		implements a given foreign key
+ *
+ * Returns the OID of the so created trigger.
+ */
+static Oid
+CreateFKCheckTrigger(Oid myRelOid, Oid refRelOid, Constraint *fkconstraint,
+					 Oid constraintOid, Oid indexOid, Oid parentTrigOid,
+					 bool on_insert)
+{
+	ObjectAddress trigAddress;
+	CreateTrigStmt *fk_trigger;
+
+	/*
+	 * Note: for a self-referential FK (referencing and referenced tables are
+	 * the same), it is important that the ON UPDATE action fires before the
+	 * CHECK action, since both triggers will fire on the same row during an
+	 * UPDATE event; otherwise the CHECK trigger will be checking a non-final
+	 * state of the row.  Triggers fire in name order, so we ensure this by
+	 * using names like "RI_ConstraintTrigger_a_NNNN" for the action triggers
+	 * and "RI_ConstraintTrigger_c_NNNN" for the check triggers.
+	 */
+	fk_trigger = makeNode(CreateTrigStmt);
+	fk_trigger->replace = false;
+	fk_trigger->isconstraint = true;
+	fk_trigger->trigname = "RI_ConstraintTrigger_c";
+	fk_trigger->relation = NULL;
+
+	/* Either ON INSERT or ON UPDATE */
+	if (on_insert)
+	{
+		fk_trigger->funcname = SystemFuncName("RI_FKey_check_ins");
+		fk_trigger->events = TRIGGER_TYPE_INSERT;
+	}
+	else
+	{
+		fk_trigger->funcname = SystemFuncName("RI_FKey_check_upd");
+		fk_trigger->events = TRIGGER_TYPE_UPDATE;
+	}
+
+	fk_trigger->args = NIL;
+	fk_trigger->row = true;
+	fk_trigger->timing = TRIGGER_TYPE_AFTER;
+	fk_trigger->columns = NIL;
+	fk_trigger->whenClause = NULL;
+	fk_trigger->transitionRels = NIL;
+	fk_trigger->deferrable = fkconstraint->deferrable;
+	fk_trigger->initdeferred = fkconstraint->initdeferred;
+	fk_trigger->constrrel = NULL;
+
+	trigAddress = CreateTrigger(fk_trigger, NULL, myRelOid, refRelOid,
+								constraintOid, indexOid, InvalidOid,
+								parentTrigOid, NULL, true, false);
+
+	/* Make changes-so-far visible */
+	CommandCounterIncrement();
+
+	return trigAddress.objectId;
+}
+
+/*
+ * createForeignKeyActionTriggers
+ *		Create the referenced-side "action" triggers that implement a foreign
+ *		key.
+ *
+ * Returns the OIDs of the so created triggers in *deleteTrigOid and
+ * *updateTrigOid.
+ */
+static void
+createForeignKeyActionTriggers(Relation rel, Oid refRelOid, Constraint *fkconstraint,
+							   Oid constraintOid, Oid indexOid,
+							   Oid parentDelTrigger, Oid parentUpdTrigger,
+							   Oid *deleteTrigOid, Oid *updateTrigOid)
+{
+	CreateTrigStmt *fk_trigger;
+	ObjectAddress trigAddress;
+
+	/*
+	 * Build and execute a CREATE CONSTRAINT TRIGGER statement for the ON
+	 * DELETE action on the referenced table.
+	 */
+	fk_trigger = makeNode(CreateTrigStmt);
+	fk_trigger->replace = false;
+	fk_trigger->isconstraint = true;
+	fk_trigger->trigname = "RI_ConstraintTrigger_a";
+	fk_trigger->relation = NULL;
+	fk_trigger->args = NIL;
+	fk_trigger->row = true;
+	fk_trigger->timing = TRIGGER_TYPE_AFTER;
+	fk_trigger->events = TRIGGER_TYPE_DELETE;
+	fk_trigger->columns = NIL;
+	fk_trigger->whenClause = NULL;
+	fk_trigger->transitionRels = NIL;
+	fk_trigger->constrrel = NULL;
+	switch (fkconstraint->fk_del_action)
+	{
+		case FKCONSTR_ACTION_NOACTION:
+			fk_trigger->deferrable = fkconstraint->deferrable;
+			fk_trigger->initdeferred = fkconstraint->initdeferred;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_noaction_del");
+			break;
+		case FKCONSTR_ACTION_RESTRICT:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_restrict_del");
+			break;
+		case FKCONSTR_ACTION_CASCADE:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_cascade_del");
+			break;
+		case FKCONSTR_ACTION_SETNULL:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_setnull_del");
+			break;
+		case FKCONSTR_ACTION_SETDEFAULT:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_setdefault_del");
+			break;
+		default:
+			elog(ERROR, "unrecognized FK action type: %d",
+				 (int) fkconstraint->fk_del_action);
+			break;
+	}
+
+	trigAddress = CreateTrigger(fk_trigger, NULL, refRelOid,
+								RelationGetRelid(rel),
+								constraintOid, indexOid, InvalidOid,
+								parentDelTrigger, NULL, true, false);
+	if (deleteTrigOid)
+		*deleteTrigOid = trigAddress.objectId;
+
+	/* Make changes-so-far visible */
+	CommandCounterIncrement();
+
+	/*
+	 * Build and execute a CREATE CONSTRAINT TRIGGER statement for the ON
+	 * UPDATE action on the referenced table.
+	 */
+	fk_trigger = makeNode(CreateTrigStmt);
+	fk_trigger->replace = false;
+	fk_trigger->isconstraint = true;
+	fk_trigger->trigname = "RI_ConstraintTrigger_a";
+	fk_trigger->relation = NULL;
+	fk_trigger->args = NIL;
+	fk_trigger->row = true;
+	fk_trigger->timing = TRIGGER_TYPE_AFTER;
+	fk_trigger->events = TRIGGER_TYPE_UPDATE;
+	fk_trigger->columns = NIL;
+	fk_trigger->whenClause = NULL;
+	fk_trigger->transitionRels = NIL;
+	fk_trigger->constrrel = NULL;
+	switch (fkconstraint->fk_upd_action)
+	{
+		case FKCONSTR_ACTION_NOACTION:
+			fk_trigger->deferrable = fkconstraint->deferrable;
+			fk_trigger->initdeferred = fkconstraint->initdeferred;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_noaction_upd");
+			break;
+		case FKCONSTR_ACTION_RESTRICT:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_restrict_upd");
+			break;
+		case FKCONSTR_ACTION_CASCADE:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_cascade_upd");
+			break;
+		case FKCONSTR_ACTION_SETNULL:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_setnull_upd");
+			break;
+		case FKCONSTR_ACTION_SETDEFAULT:
+			fk_trigger->deferrable = false;
+			fk_trigger->initdeferred = false;
+			fk_trigger->funcname = SystemFuncName("RI_FKey_setdefault_upd");
+			break;
+		default:
+			elog(ERROR, "unrecognized FK action type: %d",
+				 (int) fkconstraint->fk_upd_action);
+			break;
+	}
+
+	trigAddress = CreateTrigger(fk_trigger, NULL, refRelOid,
+								RelationGetRelid(rel),
+								constraintOid, indexOid, InvalidOid,
+								parentUpdTrigger, NULL, true, false);
+	if (updateTrigOid)
+		*updateTrigOid = trigAddress.objectId;
+}
+
+/*
+ * createForeignKeyCheckTriggers
+ *		Create the referencing-side "check" triggers that implement a foreign
+ *		key.
+ *
+ * Returns the OIDs of the so created triggers in *insertTrigOid and
+ * *updateTrigOid.
+ */
+static void
+createForeignKeyCheckTriggers(Oid myRelOid, Oid refRelOid,
+							  Constraint *fkconstraint, Oid constraintOid,
+							  Oid indexOid,
+							  Oid parentInsTrigger, Oid parentUpdTrigger,
+							  Oid *insertTrigOid, Oid *updateTrigOid)
+{
+	*insertTrigOid = CreateFKCheckTrigger(myRelOid, refRelOid, fkconstraint,
+										  constraintOid, indexOid,
+										  parentInsTrigger, true);
+	*updateTrigOid = CreateFKCheckTrigger(myRelOid, refRelOid, fkconstraint,
+										  constraintOid, indexOid,
+										  parentUpdTrigger, false);
+}
+
+/*
+ * ALTER TABLE DROP CONSTRAINT
+ *
+ * Like DROP COLUMN, we can't use the normal ALTER TABLE recursion mechanism.
+ */
+static void
+ATExecDropConstraint(Relation rel, const char *constrName,
+					 DropBehavior behavior,
+					 bool recurse, bool recursing,
+					 bool missing_ok, LOCKMODE lockmode)
+{
+	List	   *children;
+	ListCell   *child;
+	Relation	conrel;
+	Form_pg_constraint con;
+	SysScanDesc scan;
+	ScanKeyData skey[3];
+	HeapTuple	tuple;
+	bool		found = false;
+	bool		is_no_inherit_constraint = false;
+	char		contype;
+
+	/* At top level, permission check was done in ATPrepCmd, else do it */
+	if (recursing)
+		ATSimplePermissions(AT_DropConstraint, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+	conrel = table_open(ConstraintRelationId, RowExclusiveLock);
+
+	/*
+	 * Find and drop the target constraint
+	 */
+	ScanKeyInit(&skey[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	ScanKeyInit(&skey[1],
+				Anum_pg_constraint_contypid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(InvalidOid));
+	ScanKeyInit(&skey[2],
+				Anum_pg_constraint_conname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(constrName));
+	scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId,
+							  true, NULL, 3, skey);
+
+	/* There can be at most one matching row */
+	if (HeapTupleIsValid(tuple = systable_getnext(scan)))
+	{
+		ObjectAddress conobj;
+
+		con = (Form_pg_constraint) GETSTRUCT(tuple);
+
+		/* Don't drop inherited constraints */
+		if (con->coninhcount > 0 && !recursing)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot drop inherited constraint \"%s\" of relation \"%s\"",
+							constrName, RelationGetRelationName(rel))));
+
+		is_no_inherit_constraint = con->connoinherit;
+		contype = con->contype;
+
+		/*
+		 * If it's a foreign-key constraint, we'd better lock the referenced
+		 * table and check that that's not in use, just as we've already done
+		 * for the constrained table (else we might, eg, be dropping a trigger
+		 * that has unfired events).  But we can/must skip that in the
+		 * self-referential case.
+		 */
+		if (contype == CONSTRAINT_FOREIGN &&
+			con->confrelid != RelationGetRelid(rel))
+		{
+			Relation	frel;
+
+			/* Must match lock taken by RemoveTriggerById: */
+			frel = table_open(con->confrelid, AccessExclusiveLock);
+			CheckTableNotInUse(frel, "ALTER TABLE");
+			table_close(frel, NoLock);
+		}
+
+		/*
+		 * Perform the actual constraint deletion
+		 */
+		conobj.classId = ConstraintRelationId;
+		conobj.objectId = con->oid;
+		conobj.objectSubId = 0;
+
+		performDeletion(&conobj, behavior, 0);
+
+		found = true;
+	}
+
+	systable_endscan(scan);
+
+	if (!found)
+	{
+		if (!missing_ok)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("constraint \"%s\" of relation \"%s\" does not exist",
+							constrName, RelationGetRelationName(rel))));
+		}
+		else
+		{
+			ereport(NOTICE,
+					(errmsg("constraint \"%s\" of relation \"%s\" does not exist, skipping",
+							constrName, RelationGetRelationName(rel))));
+			table_close(conrel, RowExclusiveLock);
+			return;
+		}
+	}
+
+	/*
+	 * For partitioned tables, non-CHECK inherited constraints are dropped via
+	 * the dependency mechanism, so we're done here.
+	 */
+	if (contype != CONSTRAINT_CHECK &&
+		rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		table_close(conrel, RowExclusiveLock);
+		return;
+	}
+
+	/*
+	 * Propagate to children as appropriate.  Unlike most other ALTER
+	 * routines, we have to do this one level of recursion at a time; we can't
+	 * use find_all_inheritors to do it in one pass.
+	 */
+	if (!is_no_inherit_constraint)
+		children = find_inheritance_children(RelationGetRelid(rel), lockmode);
+	else
+		children = NIL;
+
+	/*
+	 * For a partitioned table, if partitions exist and we are told not to
+	 * recurse, it's a user error.  It doesn't make sense to have a constraint
+	 * be defined only on the parent, especially if it's a partitioned table.
+	 */
+	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+		children != NIL && !recurse)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("cannot remove constraint from only the partitioned table when partitions exist"),
+				 errhint("Do not specify the ONLY keyword.")));
+
+	foreach(child, children)
+	{
+		Oid			childrelid = lfirst_oid(child);
+		Relation	childrel;
+		HeapTuple	copy_tuple;
+
+		/* find_inheritance_children already got lock */
+		childrel = table_open(childrelid, NoLock);
+		CheckTableNotInUse(childrel, "ALTER TABLE");
+
+		ScanKeyInit(&skey[0],
+					Anum_pg_constraint_conrelid,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(childrelid));
+		ScanKeyInit(&skey[1],
+					Anum_pg_constraint_contypid,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(InvalidOid));
+		ScanKeyInit(&skey[2],
+					Anum_pg_constraint_conname,
+					BTEqualStrategyNumber, F_NAMEEQ,
+					CStringGetDatum(constrName));
+		scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId,
+								  true, NULL, 3, skey);
+
+		/* There can be at most one matching row */
+		if (!HeapTupleIsValid(tuple = systable_getnext(scan)))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("constraint \"%s\" of relation \"%s\" does not exist",
+							constrName,
+							RelationGetRelationName(childrel))));
+
+		copy_tuple = heap_copytuple(tuple);
+
+		systable_endscan(scan);
+
+		con = (Form_pg_constraint) GETSTRUCT(copy_tuple);
+
+		/* Right now only CHECK constraints can be inherited */
+		if (con->contype != CONSTRAINT_CHECK)
+			elog(ERROR, "inherited constraint is not a CHECK constraint");
+
+		if (con->coninhcount <= 0)	/* shouldn't happen */
+			elog(ERROR, "relation %u has non-inherited constraint \"%s\"",
+				 childrelid, constrName);
+
+		if (recurse)
+		{
+			/*
+			 * If the child constraint has other definition sources, just
+			 * decrement its inheritance count; if not, recurse to delete it.
+			 */
+			if (con->coninhcount == 1 && !con->conislocal)
+			{
+				/* Time to delete this child constraint, too */
+				ATExecDropConstraint(childrel, constrName, behavior,
+									 true, true,
+									 false, lockmode);
+			}
+			else
+			{
+				/* Child constraint must survive my deletion */
+				con->coninhcount--;
+				CatalogTupleUpdate(conrel, &copy_tuple->t_self, copy_tuple);
+
+				/* Make update visible */
+				CommandCounterIncrement();
+			}
+		}
+		else
+		{
+			/*
+			 * If we were told to drop ONLY in this table (no recursion), we
+			 * need to mark the inheritors' constraints as locally defined
+			 * rather than inherited.
+			 */
+			con->coninhcount--;
+			con->conislocal = true;
+
+			CatalogTupleUpdate(conrel, &copy_tuple->t_self, copy_tuple);
+
+			/* Make update visible */
+			CommandCounterIncrement();
+		}
+
+		heap_freetuple(copy_tuple);
+
+		table_close(childrel, NoLock);
+	}
+
+	table_close(conrel, RowExclusiveLock);
+}
+
+/*
+ * ALTER COLUMN TYPE
+ *
+ * Unlike other subcommand types, we do parse transformation for ALTER COLUMN
+ * TYPE during phase 1 --- the AlterTableCmd passed in here is already
+ * transformed (and must be, because we rely on some transformed fields).
+ *
+ * The point of this is that the execution of all ALTER COLUMN TYPEs for a
+ * table will be done "in parallel" during phase 3, so all the USING
+ * expressions should be parsed assuming the original column types.  Also,
+ * this allows a USING expression to refer to a field that will be dropped.
+ *
+ * To make this work safely, AT_PASS_DROP then AT_PASS_ALTER_TYPE must be
+ * the first two execution steps in phase 2; they must not see the effects
+ * of any other subcommand types, since the USING expressions are parsed
+ * against the unmodified table's state.
+ */
+static void
+ATPrepAlterColumnType(List **wqueue,
+					  AlteredTableInfo *tab, Relation rel,
+					  bool recurse, bool recursing,
+					  AlterTableCmd *cmd, LOCKMODE lockmode,
+					  AlterTableUtilityContext *context)
+{
+	char	   *colName = cmd->name;
+	ColumnDef  *def = (ColumnDef *) cmd->def;
+	TypeName   *typeName = def->typeName;
+	Node	   *transform = def->cooked_default;
+	HeapTuple	tuple;
+	Form_pg_attribute attTup;
+	AttrNumber	attnum;
+	Oid			targettype;
+	int32		targettypmod;
+	Oid			targetcollid;
+	NewColumnValue *newval;
+	ParseState *pstate = make_parsestate(NULL);
+	AclResult	aclresult;
+	bool		is_expr;
+
+	if (rel->rd_rel->reloftype && !recursing)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot alter column type of typed table")));
+
+	/* lookup the attribute so we can check inheritance status */
+	tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+	attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = attTup->attnum;
+
+	/* Can't alter a system attribute */
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"",
+						colName)));
+
+	/*
+	 * Don't alter inherited columns.  At outer level, there had better not be
+	 * any inherited definition; when recursing, we assume this was checked at
+	 * the parent level (see below).
+	 */
+	if (attTup->attinhcount > 0 && !recursing)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("cannot alter inherited column \"%s\"",
+						colName)));
+
+	/* Don't alter columns used in the partition key */
+	if (has_partition_attrs(rel,
+							bms_make_singleton(attnum - FirstLowInvalidHeapAttributeNumber),
+							&is_expr))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("cannot alter column \"%s\" because it is part of the partition key of relation \"%s\"",
+						colName, RelationGetRelationName(rel))));
+
+	/* Look up the target type */
+	typenameTypeIdAndMod(NULL, typeName, &targettype, &targettypmod);
+
+	aclresult = pg_type_aclcheck(targettype, GetUserId(), ACL_USAGE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error_type(aclresult, targettype);
+
+	/* And the collation */
+	targetcollid = GetColumnDefCollation(NULL, def, targettype);
+
+	/* make sure datatype is legal for a column */
+	CheckAttributeType(colName, targettype, targetcollid,
+					   list_make1_oid(rel->rd_rel->reltype),
+					   0);
+
+	if (tab->relkind == RELKIND_RELATION ||
+		tab->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		/*
+		 * Set up an expression to transform the old data value to the new
+		 * type. If a USING option was given, use the expression as
+		 * transformed by transformAlterTableStmt, else just take the old
+		 * value and try to coerce it.  We do this first so that type
+		 * incompatibility can be detected before we waste effort, and because
+		 * we need the expression to be parsed against the original table row
+		 * type.
+		 */
+		if (!transform)
+		{
+			transform = (Node *) makeVar(1, attnum,
+										 attTup->atttypid, attTup->atttypmod,
+										 attTup->attcollation,
+										 0);
+		}
+
+		transform = coerce_to_target_type(pstate,
+										  transform, exprType(transform),
+										  targettype, targettypmod,
+										  COERCION_ASSIGNMENT,
+										  COERCE_IMPLICIT_CAST,
+										  -1);
+		if (transform == NULL)
+		{
+			/* error text depends on whether USING was specified or not */
+			if (def->cooked_default != NULL)
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("result of USING clause for column \"%s\""
+								" cannot be cast automatically to type %s",
+								colName, format_type_be(targettype)),
+						 errhint("You might need to add an explicit cast.")));
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("column \"%s\" cannot be cast automatically to type %s",
+								colName, format_type_be(targettype)),
+				/* translator: USING is SQL, don't translate it */
+						 errhint("You might need to specify \"USING %s::%s\".",
+								 quote_identifier(colName),
+								 format_type_with_typemod(targettype,
+														  targettypmod))));
+		}
+
+		/* Fix collations after all else */
+		assign_expr_collations(pstate, transform);
+
+		/* Plan the expr now so we can accurately assess the need to rewrite. */
+		transform = (Node *) expression_planner((Expr *) transform);
+
+		/*
+		 * Add a work queue item to make ATRewriteTable update the column
+		 * contents.
+		 */
+		newval = (NewColumnValue *) palloc0(sizeof(NewColumnValue));
+		newval->attnum = attnum;
+		newval->expr = (Expr *) transform;
+		newval->is_generated = false;
+
+		tab->newvals = lappend(tab->newvals, newval);
+		if (ATColumnChangeRequiresRewrite(transform, attnum))
+			tab->rewrite |= AT_REWRITE_COLUMN_REWRITE;
+	}
+	else if (transform)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a table",
+						RelationGetRelationName(rel))));
+
+	if (!RELKIND_HAS_STORAGE(tab->relkind))
+	{
+		/*
+		 * For relations without storage, do this check now.  Regular tables
+		 * will check it later when the table is being rewritten.
+		 */
+		find_composite_type_dependencies(rel->rd_rel->reltype, rel, NULL);
+	}
+
+	ReleaseSysCache(tuple);
+
+	/*
+	 * Recurse manually by queueing a new command for each child, if
+	 * necessary. We cannot apply ATSimpleRecursion here because we need to
+	 * remap attribute numbers in the USING expression, if any.
+	 *
+	 * If we are told not to recurse, there had better not be any child
+	 * tables; else the alter would put them out of step.
+	 */
+	if (recurse)
+	{
+		Oid			relid = RelationGetRelid(rel);
+		List	   *child_oids,
+				   *child_numparents;
+		ListCell   *lo,
+				   *li;
+
+		child_oids = find_all_inheritors(relid, lockmode,
+										 &child_numparents);
+
+		/*
+		 * find_all_inheritors does the recursive search of the inheritance
+		 * hierarchy, so all we have to do is process all of the relids in the
+		 * list that it returns.
+		 */
+		forboth(lo, child_oids, li, child_numparents)
+		{
+			Oid			childrelid = lfirst_oid(lo);
+			int			numparents = lfirst_int(li);
+			Relation	childrel;
+			HeapTuple	childtuple;
+			Form_pg_attribute childattTup;
+
+			if (childrelid == relid)
+				continue;
+
+			/* find_all_inheritors already got lock */
+			childrel = relation_open(childrelid, NoLock);
+			CheckTableNotInUse(childrel, "ALTER TABLE");
+
+			/*
+			 * Verify that the child doesn't have any inherited definitions of
+			 * this column that came from outside this inheritance hierarchy.
+			 * (renameatt makes a similar test, though in a different way
+			 * because of its different recursion mechanism.)
+			 */
+			childtuple = SearchSysCacheAttName(RelationGetRelid(childrel),
+											   colName);
+			if (!HeapTupleIsValid(childtuple))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_COLUMN),
+						 errmsg("column \"%s\" of relation \"%s\" does not exist",
+								colName, RelationGetRelationName(childrel))));
+			childattTup = (Form_pg_attribute) GETSTRUCT(childtuple);
+
+			if (childattTup->attinhcount > numparents)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+						 errmsg("cannot alter inherited column \"%s\" of relation \"%s\"",
+								colName, RelationGetRelationName(childrel))));
+
+			ReleaseSysCache(childtuple);
+
+			/*
+			 * Remap the attribute numbers.  If no USING expression was
+			 * specified, there is no need for this step.
+			 */
+			if (def->cooked_default)
+			{
+				AttrMap    *attmap;
+				bool		found_whole_row;
+
+				/* create a copy to scribble on */
+				cmd = copyObject(cmd);
+
+				attmap = build_attrmap_by_name(RelationGetDescr(childrel),
+											   RelationGetDescr(rel));
+				((ColumnDef *) cmd->def)->cooked_default =
+					map_variable_attnos(def->cooked_default,
+										1, 0,
+										attmap,
+										InvalidOid, &found_whole_row);
+				if (found_whole_row)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot convert whole-row table reference"),
+							 errdetail("USING expression contains a whole-row table reference.")));
+				pfree(attmap);
+			}
+			ATPrepCmd(wqueue, childrel, cmd, false, true, lockmode, context);
+			relation_close(childrel, NoLock);
+		}
+	}
+	else if (!recursing &&
+			 find_inheritance_children(RelationGetRelid(rel), NoLock) != NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("type of inherited column \"%s\" must be changed in child tables too",
+						colName)));
+
+	if (tab->relkind == RELKIND_COMPOSITE_TYPE)
+		ATTypedTableRecursion(wqueue, rel, cmd, lockmode, context);
+}
+
+/*
+ * When the data type of a column is changed, a rewrite might not be required
+ * if the new type is sufficiently identical to the old one, and the USING
+ * clause isn't trying to insert some other value.  It's safe to skip the
+ * rewrite in these cases:
+ *
+ * - the old type is binary coercible to the new type
+ * - the new type is an unconstrained domain over the old type
+ * - {NEW,OLD} or {OLD,NEW} is {timestamptz,timestamp} and the timezone is UTC
+ *
+ * In the case of a constrained domain, we could get by with scanning the
+ * table and checking the constraint rather than actually rewriting it, but we
+ * don't currently try to do that.
+ */
+static bool
+ATColumnChangeRequiresRewrite(Node *expr, AttrNumber varattno)
+{
+	Assert(expr != NULL);
+
+	for (;;)
+	{
+		/* only one varno, so no need to check that */
+		if (IsA(expr, Var) && ((Var *) expr)->varattno == varattno)
+			return false;
+		else if (IsA(expr, RelabelType))
+			expr = (Node *) ((RelabelType *) expr)->arg;
+		else if (IsA(expr, CoerceToDomain))
+		{
+			CoerceToDomain *d = (CoerceToDomain *) expr;
+
+			if (DomainHasConstraints(d->resulttype))
+				return true;
+			expr = (Node *) d->arg;
+		}
+		else if (IsA(expr, FuncExpr))
+		{
+			FuncExpr   *f = (FuncExpr *) expr;
+
+			switch (f->funcid)
+			{
+				case F_TIMESTAMPTZ_TIMESTAMP:
+				case F_TIMESTAMP_TIMESTAMPTZ:
+					if (TimestampTimestampTzRequiresRewrite())
+						return true;
+					else
+						expr = linitial(f->args);
+					break;
+				default:
+					return true;
+			}
+		}
+		else
+			return true;
+	}
+}
+
+/*
+ * ALTER COLUMN .. SET DATA TYPE
+ *
+ * Return the address of the modified column.
+ */
+static ObjectAddress
+ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel,
+					  AlterTableCmd *cmd, LOCKMODE lockmode)
+{
+	char	   *colName = cmd->name;
+	ColumnDef  *def = (ColumnDef *) cmd->def;
+	TypeName   *typeName = def->typeName;
+	HeapTuple	heapTup;
+	Form_pg_attribute attTup,
+				attOldTup;
+	AttrNumber	attnum;
+	HeapTuple	typeTuple;
+	Form_pg_type tform;
+	Oid			targettype;
+	int32		targettypmod;
+	Oid			targetcollid;
+	Node	   *defaultexpr;
+	Relation	attrelation;
+	Relation	depRel;
+	ScanKeyData key[3];
+	SysScanDesc scan;
+	HeapTuple	depTup;
+	ObjectAddress address;
+
+	/*
+	 * Clear all the missing values if we're rewriting the table, since this
+	 * renders them pointless.
+	 */
+	if (tab->rewrite)
+	{
+		Relation	newrel;
+
+		newrel = table_open(RelationGetRelid(rel), NoLock);
+		RelationClearMissing(newrel);
+		relation_close(newrel, NoLock);
+		/* make sure we don't conflict with later attribute modifications */
+		CommandCounterIncrement();
+	}
+
+	attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+	/* Look up the target column */
+	heapTup = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(heapTup)) /* shouldn't happen */
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+	attTup = (Form_pg_attribute) GETSTRUCT(heapTup);
+	attnum = attTup->attnum;
+	attOldTup = TupleDescAttr(tab->oldDesc, attnum - 1);
+
+	/* Check for multiple ALTER TYPE on same column --- can't cope */
+	if (attTup->atttypid != attOldTup->atttypid ||
+		attTup->atttypmod != attOldTup->atttypmod)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter type of column \"%s\" twice",
+						colName)));
+
+	/* Look up the target type (should not fail, since prep found it) */
+	typeTuple = typenameType(NULL, typeName, &targettypmod);
+	tform = (Form_pg_type) GETSTRUCT(typeTuple);
+	targettype = tform->oid;
+	/* And the collation */
+	targetcollid = GetColumnDefCollation(NULL, def, targettype);
+
+	/*
+	 * If there is a default expression for the column, get it and ensure we
+	 * can coerce it to the new datatype.  (We must do this before changing
+	 * the column type, because build_column_default itself will try to
+	 * coerce, and will not issue the error message we want if it fails.)
+	 *
+	 * We remove any implicit coercion steps at the top level of the old
+	 * default expression; this has been agreed to satisfy the principle of
+	 * least surprise.  (The conversion to the new column type should act like
+	 * it started from what the user sees as the stored expression, and the
+	 * implicit coercions aren't going to be shown.)
+	 */
+	if (attTup->atthasdef)
+	{
+		defaultexpr = build_column_default(rel, attnum);
+		Assert(defaultexpr);
+		defaultexpr = strip_implicit_coercions(defaultexpr);
+		defaultexpr = coerce_to_target_type(NULL,	/* no UNKNOWN params */
+											defaultexpr, exprType(defaultexpr),
+											targettype, targettypmod,
+											COERCION_ASSIGNMENT,
+											COERCE_IMPLICIT_CAST,
+											-1);
+		if (defaultexpr == NULL)
+		{
+			if (attTup->attgenerated)
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("generation expression for column \"%s\" cannot be cast automatically to type %s",
+								colName, format_type_be(targettype))));
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("default for column \"%s\" cannot be cast automatically to type %s",
+								colName, format_type_be(targettype))));
+		}
+	}
+	else
+		defaultexpr = NULL;
+
+	/*
+	 * Find everything that depends on the column (constraints, indexes, etc),
+	 * and record enough information to let us recreate the objects.
+	 *
+	 * The actual recreation does not happen here, but only after we have
+	 * performed all the individual ALTER TYPE operations.  We have to save
+	 * the info before executing ALTER TYPE, though, else the deparser will
+	 * get confused.
+	 */
+	depRel = table_open(DependRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_refclassid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_refobjid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	ScanKeyInit(&key[2],
+				Anum_pg_depend_refobjsubid,
+				BTEqualStrategyNumber, F_INT4EQ,
+				Int32GetDatum((int32) attnum));
+
+	scan = systable_beginscan(depRel, DependReferenceIndexId, true,
+							  NULL, 3, key);
+
+	while (HeapTupleIsValid(depTup = systable_getnext(scan)))
+	{
+		Form_pg_depend foundDep = (Form_pg_depend) GETSTRUCT(depTup);
+		ObjectAddress foundObject;
+
+		foundObject.classId = foundDep->classid;
+		foundObject.objectId = foundDep->objid;
+		foundObject.objectSubId = foundDep->objsubid;
+
+		switch (getObjectClass(&foundObject))
+		{
+			case OCLASS_CLASS:
+				{
+					char		relKind = get_rel_relkind(foundObject.objectId);
+
+					if (relKind == RELKIND_INDEX ||
+						relKind == RELKIND_PARTITIONED_INDEX)
+					{
+						Assert(foundObject.objectSubId == 0);
+						RememberIndexForRebuilding(foundObject.objectId, tab);
+					}
+					else if (relKind == RELKIND_SEQUENCE)
+					{
+						/*
+						 * This must be a SERIAL column's sequence.  We need
+						 * not do anything to it.
+						 */
+						Assert(foundObject.objectSubId == 0);
+					}
+					else
+					{
+						/* Not expecting any other direct dependencies... */
+						elog(ERROR, "unexpected object depending on column: %s",
+							 getObjectDescription(&foundObject, false));
+					}
+					break;
+				}
+
+			case OCLASS_CONSTRAINT:
+				Assert(foundObject.objectSubId == 0);
+				RememberConstraintForRebuilding(foundObject.objectId, tab);
+				break;
+
+			case OCLASS_REWRITE:
+				/* XXX someday see if we can cope with revising views */
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot alter type of a column used by a view or rule"),
+						 errdetail("%s depends on column \"%s\"",
+								   getObjectDescription(&foundObject, false),
+								   colName)));
+				break;
+
+			case OCLASS_TRIGGER:
+
+				/*
+				 * A trigger can depend on a column because the column is
+				 * specified as an update target, or because the column is
+				 * used in the trigger's WHEN condition.  The first case would
+				 * not require any extra work, but the second case would
+				 * require updating the WHEN expression, which will take a
+				 * significant amount of new code.  Since we can't easily tell
+				 * which case applies, we punt for both.  FIXME someday.
+				 */
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot alter type of a column used in a trigger definition"),
+						 errdetail("%s depends on column \"%s\"",
+								   getObjectDescription(&foundObject, false),
+								   colName)));
+				break;
+
+			case OCLASS_POLICY:
+
+				/*
+				 * A policy can depend on a column because the column is
+				 * specified in the policy's USING or WITH CHECK qual
+				 * expressions.  It might be possible to rewrite and recheck
+				 * the policy expression, but punt for now.  It's certainly
+				 * easy enough to remove and recreate the policy; still, FIXME
+				 * someday.
+				 */
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot alter type of a column used in a policy definition"),
+						 errdetail("%s depends on column \"%s\"",
+								   getObjectDescription(&foundObject, false),
+								   colName)));
+				break;
+
+			case OCLASS_DEFAULT:
+				{
+					ObjectAddress col = GetAttrDefaultColumnAddress(foundObject.objectId);
+
+					if (col.objectId == RelationGetRelid(rel) &&
+						col.objectSubId == attnum)
+					{
+						/*
+						 * Ignore the column's own default expression, which
+						 * we will deal with below.
+						 */
+						Assert(defaultexpr);
+					}
+					else
+					{
+						/*
+						 * This must be a reference from the expression of a
+						 * generated column elsewhere in the same table.
+						 * Changing the type of a column that is used by a
+						 * generated column is not allowed by SQL standard, so
+						 * just punt for now.  It might be doable with some
+						 * thinking and effort.
+						 */
+						ereport(ERROR,
+								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+								 errmsg("cannot alter type of a column used by a generated column"),
+								 errdetail("Column \"%s\" is used by generated column \"%s\".",
+										   colName,
+										   get_attname(col.objectId,
+													   col.objectSubId,
+													   false))));
+					}
+					break;
+				}
+
+			case OCLASS_STATISTIC_EXT:
+
+				/*
+				 * Give the extended-stats machinery a chance to fix anything
+				 * that this column type change would break.
+				 */
+				RememberStatisticsForRebuilding(foundObject.objectId, tab);
+				break;
+
+			case OCLASS_PROC:
+			case OCLASS_TYPE:
+			case OCLASS_CAST:
+			case OCLASS_COLLATION:
+			case OCLASS_CONVERSION:
+			case OCLASS_LANGUAGE:
+			case OCLASS_LARGEOBJECT:
+			case OCLASS_OPERATOR:
+			case OCLASS_OPCLASS:
+			case OCLASS_OPFAMILY:
+			case OCLASS_AM:
+			case OCLASS_AMOP:
+			case OCLASS_AMPROC:
+			case OCLASS_SCHEMA:
+			case OCLASS_TSPARSER:
+			case OCLASS_TSDICT:
+			case OCLASS_TSTEMPLATE:
+			case OCLASS_TSCONFIG:
+			case OCLASS_ROLE:
+			case OCLASS_DATABASE:
+			case OCLASS_TBLSPACE:
+			case OCLASS_FDW:
+			case OCLASS_FOREIGN_SERVER:
+			case OCLASS_USER_MAPPING:
+			case OCLASS_DEFACL:
+			case OCLASS_EXTENSION:
+			case OCLASS_EVENT_TRIGGER:
+			case OCLASS_PARAMETER_ACL:
+			case OCLASS_PUBLICATION:
+			case OCLASS_PUBLICATION_NAMESPACE:
+			case OCLASS_PUBLICATION_REL:
+			case OCLASS_SUBSCRIPTION:
+			case OCLASS_TRANSFORM:
+
+				/*
+				 * We don't expect any of these sorts of objects to depend on
+				 * a column.
+				 */
+				elog(ERROR, "unexpected object depending on column: %s",
+					 getObjectDescription(&foundObject, false));
+				break;
+
+				/*
+				 * There's intentionally no default: case here; we want the
+				 * compiler to warn if a new OCLASS hasn't been handled above.
+				 */
+		}
+	}
+
+	systable_endscan(scan);
+
+	/*
+	 * Now scan for dependencies of this column on other things.  The only
+	 * things we should find are the dependency on the column datatype and
+	 * possibly a collation dependency.  Those can be removed.
+	 */
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_classid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_objid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	ScanKeyInit(&key[2],
+				Anum_pg_depend_objsubid,
+				BTEqualStrategyNumber, F_INT4EQ,
+				Int32GetDatum((int32) attnum));
+
+	scan = systable_beginscan(depRel, DependDependerIndexId, true,
+							  NULL, 3, key);
+
+	while (HeapTupleIsValid(depTup = systable_getnext(scan)))
+	{
+		Form_pg_depend foundDep = (Form_pg_depend) GETSTRUCT(depTup);
+		ObjectAddress foundObject;
+
+		foundObject.classId = foundDep->refclassid;
+		foundObject.objectId = foundDep->refobjid;
+		foundObject.objectSubId = foundDep->refobjsubid;
+
+		if (foundDep->deptype != DEPENDENCY_NORMAL)
+			elog(ERROR, "found unexpected dependency type '%c'",
+				 foundDep->deptype);
+		if (!(foundDep->refclassid == TypeRelationId &&
+			  foundDep->refobjid == attTup->atttypid) &&
+			!(foundDep->refclassid == CollationRelationId &&
+			  foundDep->refobjid == attTup->attcollation))
+			elog(ERROR, "found unexpected dependency for column: %s",
+				 getObjectDescription(&foundObject, false));
+
+		CatalogTupleDelete(depRel, &depTup->t_self);
+	}
+
+	systable_endscan(scan);
+
+	table_close(depRel, RowExclusiveLock);
+
+	/*
+	 * Here we go --- change the recorded column type and collation.  (Note
+	 * heapTup is a copy of the syscache entry, so okay to scribble on.) First
+	 * fix up the missing value if any.
+	 */
+	if (attTup->atthasmissing)
+	{
+		Datum		missingval;
+		bool		missingNull;
+
+		/* if rewrite is true the missing value should already be cleared */
+		Assert(tab->rewrite == 0);
+
+		/* Get the missing value datum */
+		missingval = heap_getattr(heapTup,
+								  Anum_pg_attribute_attmissingval,
+								  attrelation->rd_att,
+								  &missingNull);
+
+		/* if it's a null array there is nothing to do */
+
+		if (!missingNull)
+		{
+			/*
+			 * Get the datum out of the array and repack it in a new array
+			 * built with the new type data. We assume that since the table
+			 * doesn't need rewriting, the actual Datum doesn't need to be
+			 * changed, only the array metadata.
+			 */
+
+			int			one = 1;
+			bool		isNull;
+			Datum		valuesAtt[Natts_pg_attribute];
+			bool		nullsAtt[Natts_pg_attribute];
+			bool		replacesAtt[Natts_pg_attribute];
+			HeapTuple	newTup;
+
+			MemSet(valuesAtt, 0, sizeof(valuesAtt));
+			MemSet(nullsAtt, false, sizeof(nullsAtt));
+			MemSet(replacesAtt, false, sizeof(replacesAtt));
+
+			missingval = array_get_element(missingval,
+										   1,
+										   &one,
+										   0,
+										   attTup->attlen,
+										   attTup->attbyval,
+										   attTup->attalign,
+										   &isNull);
+			missingval = PointerGetDatum(construct_array(&missingval,
+														 1,
+														 targettype,
+														 tform->typlen,
+														 tform->typbyval,
+														 tform->typalign));
+
+			valuesAtt[Anum_pg_attribute_attmissingval - 1] = missingval;
+			replacesAtt[Anum_pg_attribute_attmissingval - 1] = true;
+			nullsAtt[Anum_pg_attribute_attmissingval - 1] = false;
+
+			newTup = heap_modify_tuple(heapTup, RelationGetDescr(attrelation),
+									   valuesAtt, nullsAtt, replacesAtt);
+			heap_freetuple(heapTup);
+			heapTup = newTup;
+			attTup = (Form_pg_attribute) GETSTRUCT(heapTup);
+		}
+	}
+
+	attTup->atttypid = targettype;
+	attTup->atttypmod = targettypmod;
+	attTup->attcollation = targetcollid;
+	attTup->attndims = list_length(typeName->arrayBounds);
+	attTup->attlen = tform->typlen;
+	attTup->attbyval = tform->typbyval;
+	attTup->attalign = tform->typalign;
+	attTup->attstorage = tform->typstorage;
+	attTup->attcompression = InvalidCompressionMethod;
+
+	ReleaseSysCache(typeTuple);
+
+	CatalogTupleUpdate(attrelation, &heapTup->t_self, heapTup);
+
+	table_close(attrelation, RowExclusiveLock);
+
+	/* Install dependencies on new datatype and collation */
+	add_column_datatype_dependency(RelationGetRelid(rel), attnum, targettype);
+	add_column_collation_dependency(RelationGetRelid(rel), attnum, targetcollid);
+
+	/*
+	 * Drop any pg_statistic entry for the column, since it's now wrong type
+	 */
+	RemoveStatistics(RelationGetRelid(rel), attnum);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel), attnum);
+
+	/*
+	 * Update the default, if present, by brute force --- remove and re-add
+	 * the default.  Probably unsafe to take shortcuts, since the new version
+	 * may well have additional dependencies.  (It's okay to do this now,
+	 * rather than after other ALTER TYPE commands, since the default won't
+	 * depend on other column types.)
+	 */
+	if (defaultexpr)
+	{
+		/*
+		 * If it's a GENERATED default, drop its dependency records, in
+		 * particular its INTERNAL dependency on the column, which would
+		 * otherwise cause dependency.c to refuse to perform the deletion.
+		 */
+		if (attTup->attgenerated)
+		{
+			Oid			attrdefoid = GetAttrDefaultOid(RelationGetRelid(rel), attnum);
+
+			if (!OidIsValid(attrdefoid))
+				elog(ERROR, "could not find attrdef tuple for relation %u attnum %d",
+					 RelationGetRelid(rel), attnum);
+			(void) deleteDependencyRecordsFor(AttrDefaultRelationId, attrdefoid, false);
+		}
+
+		/*
+		 * Make updates-so-far visible, particularly the new pg_attribute row
+		 * which will be updated again.
+		 */
+		CommandCounterIncrement();
+
+		/*
+		 * We use RESTRICT here for safety, but at present we do not expect
+		 * anything to depend on the default.
+		 */
+		RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT, true,
+						  true);
+
+		StoreAttrDefault(rel, attnum, defaultexpr, true, false);
+	}
+
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+
+	/* Cleanup */
+	heap_freetuple(heapTup);
+
+	return address;
+}
+
+/*
+ * Subroutine for ATExecAlterColumnType: remember that a replica identity
+ * needs to be reset.
+ */
+static void
+RememberReplicaIdentityForRebuilding(Oid indoid, AlteredTableInfo *tab)
+{
+	if (!get_index_isreplident(indoid))
+		return;
+
+	if (tab->replicaIdentityIndex)
+		elog(ERROR, "relation %u has multiple indexes marked as replica identity", tab->relid);
+
+	tab->replicaIdentityIndex = get_rel_name(indoid);
+}
+
+/*
+ * Subroutine for ATExecAlterColumnType: remember any clustered index.
+ */
+static void
+RememberClusterOnForRebuilding(Oid indoid, AlteredTableInfo *tab)
+{
+	if (!get_index_isclustered(indoid))
+		return;
+
+	if (tab->clusterOnIndex)
+		elog(ERROR, "relation %u has multiple clustered indexes", tab->relid);
+
+	tab->clusterOnIndex = get_rel_name(indoid);
+}
+
+/*
+ * Subroutine for ATExecAlterColumnType: remember that a constraint needs
+ * to be rebuilt (which we might already know).
+ */
+static void
+RememberConstraintForRebuilding(Oid conoid, AlteredTableInfo *tab)
+{
+	/*
+	 * This de-duplication check is critical for two independent reasons: we
+	 * mustn't try to recreate the same constraint twice, and if a constraint
+	 * depends on more than one column whose type is to be altered, we must
+	 * capture its definition string before applying any of the column type
+	 * changes.  ruleutils.c will get confused if we ask again later.
+	 */
+	if (!list_member_oid(tab->changedConstraintOids, conoid))
+	{
+		/* OK, capture the constraint's existing definition string */
+		char	   *defstring = pg_get_constraintdef_command(conoid);
+		Oid			indoid;
+
+		tab->changedConstraintOids = lappend_oid(tab->changedConstraintOids,
+												 conoid);
+		tab->changedConstraintDefs = lappend(tab->changedConstraintDefs,
+											 defstring);
+
+		/*
+		 * For the index of a constraint, if any, remember if it is used for
+		 * the table's replica identity or if it is a clustered index, so that
+		 * ATPostAlterTypeCleanup() can queue up commands necessary to restore
+		 * those properties.
+		 */
+		indoid = get_constraint_index(conoid);
+		if (OidIsValid(indoid))
+		{
+			RememberReplicaIdentityForRebuilding(indoid, tab);
+			RememberClusterOnForRebuilding(indoid, tab);
+		}
+	}
+}
+
+/*
+ * Subroutine for ATExecAlterColumnType: remember that an index needs
+ * to be rebuilt (which we might already know).
+ */
+static void
+RememberIndexForRebuilding(Oid indoid, AlteredTableInfo *tab)
+{
+	/*
+	 * This de-duplication check is critical for two independent reasons: we
+	 * mustn't try to recreate the same index twice, and if an index depends
+	 * on more than one column whose type is to be altered, we must capture
+	 * its definition string before applying any of the column type changes.
+	 * ruleutils.c will get confused if we ask again later.
+	 */
+	if (!list_member_oid(tab->changedIndexOids, indoid))
+	{
+		/*
+		 * Before adding it as an index-to-rebuild, we'd better see if it
+		 * belongs to a constraint, and if so rebuild the constraint instead.
+		 * Typically this check fails, because constraint indexes normally
+		 * have only dependencies on their constraint.  But it's possible for
+		 * such an index to also have direct dependencies on table columns,
+		 * for example with a partial exclusion constraint.
+		 */
+		Oid			conoid = get_index_constraint(indoid);
+
+		if (OidIsValid(conoid))
+		{
+			RememberConstraintForRebuilding(conoid, tab);
+		}
+		else
+		{
+			/* OK, capture the index's existing definition string */
+			char	   *defstring = pg_get_indexdef_string(indoid);
+
+			tab->changedIndexOids = lappend_oid(tab->changedIndexOids,
+												indoid);
+			tab->changedIndexDefs = lappend(tab->changedIndexDefs,
+											defstring);
+
+			/*
+			 * Remember if this index is used for the table's replica identity
+			 * or if it is a clustered index, so that ATPostAlterTypeCleanup()
+			 * can queue up commands necessary to restore those properties.
+			 */
+			RememberReplicaIdentityForRebuilding(indoid, tab);
+			RememberClusterOnForRebuilding(indoid, tab);
+		}
+	}
+}
+
+/*
+ * Subroutine for ATExecAlterColumnType: remember that a statistics object
+ * needs to be rebuilt (which we might already know).
+ */
+static void
+RememberStatisticsForRebuilding(Oid stxoid, AlteredTableInfo *tab)
+{
+	/*
+	 * This de-duplication check is critical for two independent reasons: we
+	 * mustn't try to recreate the same statistics object twice, and if the
+	 * statistics object depends on more than one column whose type is to be
+	 * altered, we must capture its definition string before applying any of
+	 * the type changes. ruleutils.c will get confused if we ask again later.
+	 */
+	if (!list_member_oid(tab->changedStatisticsOids, stxoid))
+	{
+		/* OK, capture the statistics object's existing definition string */
+		char	   *defstring = pg_get_statisticsobjdef_string(stxoid);
+
+		tab->changedStatisticsOids = lappend_oid(tab->changedStatisticsOids,
+												 stxoid);
+		tab->changedStatisticsDefs = lappend(tab->changedStatisticsDefs,
+											 defstring);
+	}
+}
+
+/*
+ * Cleanup after we've finished all the ALTER TYPE operations for a
+ * particular relation.  We have to drop and recreate all the indexes
+ * and constraints that depend on the altered columns.  We do the
+ * actual dropping here, but re-creation is managed by adding work
+ * queue entries to do those steps later.
+ */
+static void
+ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
+{
+	ObjectAddress obj;
+	ObjectAddresses *objects;
+	ListCell   *def_item;
+	ListCell   *oid_item;
+
+	/*
+	 * Collect all the constraints and indexes to drop so we can process them
+	 * in a single call.  That way we don't have to worry about dependencies
+	 * among them.
+	 */
+	objects = new_object_addresses();
+
+	/*
+	 * Re-parse the index and constraint definitions, and attach them to the
+	 * appropriate work queue entries.  We do this before dropping because in
+	 * the case of a FOREIGN KEY constraint, we might not yet have exclusive
+	 * lock on the table the constraint is attached to, and we need to get
+	 * that before reparsing/dropping.
+	 *
+	 * We can't rely on the output of deparsing to tell us which relation to
+	 * operate on, because concurrent activity might have made the name
+	 * resolve differently.  Instead, we've got to use the OID of the
+	 * constraint or index we're processing to figure out which relation to
+	 * operate on.
+	 */
+	forboth(oid_item, tab->changedConstraintOids,
+			def_item, tab->changedConstraintDefs)
+	{
+		Oid			oldId = lfirst_oid(oid_item);
+		HeapTuple	tup;
+		Form_pg_constraint con;
+		Oid			relid;
+		Oid			confrelid;
+		char		contype;
+		bool		conislocal;
+
+		tup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(oldId));
+		if (!HeapTupleIsValid(tup)) /* should not happen */
+			elog(ERROR, "cache lookup failed for constraint %u", oldId);
+		con = (Form_pg_constraint) GETSTRUCT(tup);
+		if (OidIsValid(con->conrelid))
+			relid = con->conrelid;
+		else
+		{
+			/* must be a domain constraint */
+			relid = get_typ_typrelid(getBaseType(con->contypid));
+			if (!OidIsValid(relid))
+				elog(ERROR, "could not identify relation associated with constraint %u", oldId);
+		}
+		confrelid = con->confrelid;
+		contype = con->contype;
+		conislocal = con->conislocal;
+		ReleaseSysCache(tup);
+
+		ObjectAddressSet(obj, ConstraintRelationId, oldId);
+		add_exact_object_address(&obj, objects);
+
+		/*
+		 * If the constraint is inherited (only), we don't want to inject a
+		 * new definition here; it'll get recreated when ATAddCheckConstraint
+		 * recurses from adding the parent table's constraint.  But we had to
+		 * carry the info this far so that we can drop the constraint below.
+		 */
+		if (!conislocal)
+			continue;
+
+		/*
+		 * When rebuilding an FK constraint that references the table we're
+		 * modifying, we might not yet have any lock on the FK's table, so get
+		 * one now.  We'll need AccessExclusiveLock for the DROP CONSTRAINT
+		 * step, so there's no value in asking for anything weaker.
+		 */
+		if (relid != tab->relid && contype == CONSTRAINT_FOREIGN)
+			LockRelationOid(relid, AccessExclusiveLock);
+
+		ATPostAlterTypeParse(oldId, relid, confrelid,
+							 (char *) lfirst(def_item),
+							 wqueue, lockmode, tab->rewrite);
+	}
+	forboth(oid_item, tab->changedIndexOids,
+			def_item, tab->changedIndexDefs)
+	{
+		Oid			oldId = lfirst_oid(oid_item);
+		Oid			relid;
+
+		relid = IndexGetRelation(oldId, false);
+		ATPostAlterTypeParse(oldId, relid, InvalidOid,
+							 (char *) lfirst(def_item),
+							 wqueue, lockmode, tab->rewrite);
+
+		ObjectAddressSet(obj, RelationRelationId, oldId);
+		add_exact_object_address(&obj, objects);
+	}
+
+	/* add dependencies for new statistics */
+	forboth(oid_item, tab->changedStatisticsOids,
+			def_item, tab->changedStatisticsDefs)
+	{
+		Oid			oldId = lfirst_oid(oid_item);
+		Oid			relid;
+
+		relid = StatisticsGetRelation(oldId, false);
+		ATPostAlterTypeParse(oldId, relid, InvalidOid,
+							 (char *) lfirst(def_item),
+							 wqueue, lockmode, tab->rewrite);
+
+		ObjectAddressSet(obj, StatisticExtRelationId, oldId);
+		add_exact_object_address(&obj, objects);
+	}
+
+	/*
+	 * Queue up command to restore replica identity index marking
+	 */
+	if (tab->replicaIdentityIndex)
+	{
+		AlterTableCmd *cmd = makeNode(AlterTableCmd);
+		ReplicaIdentityStmt *subcmd = makeNode(ReplicaIdentityStmt);
+
+		subcmd->identity_type = REPLICA_IDENTITY_INDEX;
+		subcmd->name = tab->replicaIdentityIndex;
+		cmd->subtype = AT_ReplicaIdentity;
+		cmd->def = (Node *) subcmd;
+
+		/* do it after indexes and constraints */
+		tab->subcmds[AT_PASS_OLD_CONSTR] =
+			lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd);
+	}
+
+	/*
+	 * Queue up command to restore marking of index used for cluster.
+	 */
+	if (tab->clusterOnIndex)
+	{
+		AlterTableCmd *cmd = makeNode(AlterTableCmd);
+
+		cmd->subtype = AT_ClusterOn;
+		cmd->name = tab->clusterOnIndex;
+
+		/* do it after indexes and constraints */
+		tab->subcmds[AT_PASS_OLD_CONSTR] =
+			lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd);
+	}
+
+	/*
+	 * It should be okay to use DROP_RESTRICT here, since nothing else should
+	 * be depending on these objects.
+	 */
+	performMultipleDeletions(objects, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
+
+	free_object_addresses(objects);
+
+	/*
+	 * The objects will get recreated during subsequent passes over the work
+	 * queue.
+	 */
+}
+
+/*
+ * Parse the previously-saved definition string for a constraint, index or
+ * statistics object against the newly-established column data type(s), and
+ * queue up the resulting command parsetrees for execution.
+ *
+ * This might fail if, for example, you have a WHERE clause that uses an
+ * operator that's not available for the new column type.
+ */
+static void
+ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd,
+					 List **wqueue, LOCKMODE lockmode, bool rewrite)
+{
+	List	   *raw_parsetree_list;
+	List	   *querytree_list;
+	ListCell   *list_item;
+	Relation	rel;
+
+	/*
+	 * We expect that we will get only ALTER TABLE and CREATE INDEX
+	 * statements. Hence, there is no need to pass them through
+	 * parse_analyze_*() or the rewriter, but instead we need to pass them
+	 * through parse_utilcmd.c to make them ready for execution.
+	 */
+	raw_parsetree_list = raw_parser(cmd, RAW_PARSE_DEFAULT);
+	querytree_list = NIL;
+	foreach(list_item, raw_parsetree_list)
+	{
+		RawStmt    *rs = lfirst_node(RawStmt, list_item);
+		Node	   *stmt = rs->stmt;
+
+		if (IsA(stmt, IndexStmt))
+			querytree_list = lappend(querytree_list,
+									 transformIndexStmt(oldRelId,
+														(IndexStmt *) stmt,
+														cmd));
+		else if (IsA(stmt, AlterTableStmt))
+		{
+			List	   *beforeStmts;
+			List	   *afterStmts;
+
+			stmt = (Node *) transformAlterTableStmt(oldRelId,
+													(AlterTableStmt *) stmt,
+													cmd,
+													&beforeStmts,
+													&afterStmts);
+			querytree_list = list_concat(querytree_list, beforeStmts);
+			querytree_list = lappend(querytree_list, stmt);
+			querytree_list = list_concat(querytree_list, afterStmts);
+		}
+		else if (IsA(stmt, CreateStatsStmt))
+			querytree_list = lappend(querytree_list,
+									 transformStatsStmt(oldRelId,
+														(CreateStatsStmt *) stmt,
+														cmd));
+		else
+			querytree_list = lappend(querytree_list, stmt);
+	}
+
+	/* Caller should already have acquired whatever lock we need. */
+	rel = relation_open(oldRelId, NoLock);
+
+	/*
+	 * Attach each generated command to the proper place in the work queue.
+	 * Note this could result in creation of entirely new work-queue entries.
+	 *
+	 * Also note that we have to tweak the command subtypes, because it turns
+	 * out that re-creation of indexes and constraints has to act a bit
+	 * differently from initial creation.
+	 */
+	foreach(list_item, querytree_list)
+	{
+		Node	   *stm = (Node *) lfirst(list_item);
+		AlteredTableInfo *tab;
+
+		tab = ATGetQueueEntry(wqueue, rel);
+
+		if (IsA(stm, IndexStmt))
+		{
+			IndexStmt  *stmt = (IndexStmt *) stm;
+			AlterTableCmd *newcmd;
+
+			if (!rewrite)
+				TryReuseIndex(oldId, stmt);
+			stmt->reset_default_tblspc = true;
+			/* keep the index's comment */
+			stmt->idxcomment = GetComment(oldId, RelationRelationId, 0);
+
+			newcmd = makeNode(AlterTableCmd);
+			newcmd->subtype = AT_ReAddIndex;
+			newcmd->def = (Node *) stmt;
+			tab->subcmds[AT_PASS_OLD_INDEX] =
+				lappend(tab->subcmds[AT_PASS_OLD_INDEX], newcmd);
+		}
+		else if (IsA(stm, AlterTableStmt))
+		{
+			AlterTableStmt *stmt = (AlterTableStmt *) stm;
+			ListCell   *lcmd;
+
+			foreach(lcmd, stmt->cmds)
+			{
+				AlterTableCmd *cmd = lfirst_node(AlterTableCmd, lcmd);
+
+				if (cmd->subtype == AT_AddIndex)
+				{
+					IndexStmt  *indstmt;
+					Oid			indoid;
+
+					indstmt = castNode(IndexStmt, cmd->def);
+					indoid = get_constraint_index(oldId);
+
+					if (!rewrite)
+						TryReuseIndex(indoid, indstmt);
+					/* keep any comment on the index */
+					indstmt->idxcomment = GetComment(indoid,
+													 RelationRelationId, 0);
+					indstmt->reset_default_tblspc = true;
+
+					cmd->subtype = AT_ReAddIndex;
+					tab->subcmds[AT_PASS_OLD_INDEX] =
+						lappend(tab->subcmds[AT_PASS_OLD_INDEX], cmd);
+
+					/* recreate any comment on the constraint */
+					RebuildConstraintComment(tab,
+											 AT_PASS_OLD_INDEX,
+											 oldId,
+											 rel,
+											 NIL,
+											 indstmt->idxname);
+				}
+				else if (cmd->subtype == AT_AddConstraint)
+				{
+					Constraint *con = castNode(Constraint, cmd->def);
+
+					con->old_pktable_oid = refRelId;
+					/* rewriting neither side of a FK */
+					if (con->contype == CONSTR_FOREIGN &&
+						!rewrite && tab->rewrite == 0)
+						TryReuseForeignKey(oldId, con);
+					con->reset_default_tblspc = true;
+					cmd->subtype = AT_ReAddConstraint;
+					tab->subcmds[AT_PASS_OLD_CONSTR] =
+						lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd);
+
+					/* recreate any comment on the constraint */
+					RebuildConstraintComment(tab,
+											 AT_PASS_OLD_CONSTR,
+											 oldId,
+											 rel,
+											 NIL,
+											 con->conname);
+				}
+				else if (cmd->subtype == AT_SetNotNull)
+				{
+					/*
+					 * The parser will create AT_SetNotNull subcommands for
+					 * columns of PRIMARY KEY indexes/constraints, but we need
+					 * not do anything with them here, because the columns'
+					 * NOT NULL marks will already have been propagated into
+					 * the new table definition.
+					 */
+				}
+				else
+					elog(ERROR, "unexpected statement subtype: %d",
+						 (int) cmd->subtype);
+			}
+		}
+		else if (IsA(stm, AlterDomainStmt))
+		{
+			AlterDomainStmt *stmt = (AlterDomainStmt *) stm;
+
+			if (stmt->subtype == 'C')	/* ADD CONSTRAINT */
+			{
+				Constraint *con = castNode(Constraint, stmt->def);
+				AlterTableCmd *cmd = makeNode(AlterTableCmd);
+
+				cmd->subtype = AT_ReAddDomainConstraint;
+				cmd->def = (Node *) stmt;
+				tab->subcmds[AT_PASS_OLD_CONSTR] =
+					lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd);
+
+				/* recreate any comment on the constraint */
+				RebuildConstraintComment(tab,
+										 AT_PASS_OLD_CONSTR,
+										 oldId,
+										 NULL,
+										 stmt->typeName,
+										 con->conname);
+			}
+			else
+				elog(ERROR, "unexpected statement subtype: %d",
+					 (int) stmt->subtype);
+		}
+		else if (IsA(stm, CreateStatsStmt))
+		{
+			CreateStatsStmt *stmt = (CreateStatsStmt *) stm;
+			AlterTableCmd *newcmd;
+
+			/* keep the statistics object's comment */
+			stmt->stxcomment = GetComment(oldId, StatisticExtRelationId, 0);
+
+			newcmd = makeNode(AlterTableCmd);
+			newcmd->subtype = AT_ReAddStatistics;
+			newcmd->def = (Node *) stmt;
+			tab->subcmds[AT_PASS_MISC] =
+				lappend(tab->subcmds[AT_PASS_MISC], newcmd);
+		}
+		else
+			elog(ERROR, "unexpected statement type: %d",
+				 (int) nodeTag(stm));
+	}
+
+	relation_close(rel, NoLock);
+}
+
+/*
+ * Subroutine for ATPostAlterTypeParse() to recreate any existing comment
+ * for a table or domain constraint that is being rebuilt.
+ *
+ * objid is the OID of the constraint.
+ * Pass "rel" for a table constraint, or "domname" (domain's qualified name
+ * as a string list) for a domain constraint.
+ * (We could dig that info, as well as the conname, out of the pg_constraint
+ * entry; but callers already have them so might as well pass them.)
+ */
+static void
+RebuildConstraintComment(AlteredTableInfo *tab, int pass, Oid objid,
+						 Relation rel, List *domname,
+						 const char *conname)
+{
+	CommentStmt *cmd;
+	char	   *comment_str;
+	AlterTableCmd *newcmd;
+
+	/* Look for comment for object wanted, and leave if none */
+	comment_str = GetComment(objid, ConstraintRelationId, 0);
+	if (comment_str == NULL)
+		return;
+
+	/* Build CommentStmt node, copying all input data for safety */
+	cmd = makeNode(CommentStmt);
+	if (rel)
+	{
+		cmd->objtype = OBJECT_TABCONSTRAINT;
+		cmd->object = (Node *)
+			list_make3(makeString(get_namespace_name(RelationGetNamespace(rel))),
+					   makeString(pstrdup(RelationGetRelationName(rel))),
+					   makeString(pstrdup(conname)));
+	}
+	else
+	{
+		cmd->objtype = OBJECT_DOMCONSTRAINT;
+		cmd->object = (Node *)
+			list_make2(makeTypeNameFromNameList(copyObject(domname)),
+					   makeString(pstrdup(conname)));
+	}
+	cmd->comment = comment_str;
+
+	/* Append it to list of commands */
+	newcmd = makeNode(AlterTableCmd);
+	newcmd->subtype = AT_ReAddComment;
+	newcmd->def = (Node *) cmd;
+	tab->subcmds[pass] = lappend(tab->subcmds[pass], newcmd);
+}
+
+/*
+ * Subroutine for ATPostAlterTypeParse().  Calls out to CheckIndexCompatible()
+ * for the real analysis, then mutates the IndexStmt based on that verdict.
+ */
+static void
+TryReuseIndex(Oid oldId, IndexStmt *stmt)
+{
+	if (CheckIndexCompatible(oldId,
+							 stmt->accessMethod,
+							 stmt->indexParams,
+							 stmt->excludeOpNames))
+	{
+		Relation	irel = index_open(oldId, NoLock);
+
+		/* If it's a partitioned index, there is no storage to share. */
+		if (irel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
+		{
+			stmt->oldNode = irel->rd_node.relNode;
+			stmt->oldCreateSubid = irel->rd_createSubid;
+			stmt->oldFirstRelfilenodeSubid = irel->rd_firstRelfilenodeSubid;
+		}
+		index_close(irel, NoLock);
+	}
+}
+
+/*
+ * Subroutine for ATPostAlterTypeParse().
+ *
+ * Stash the old P-F equality operator into the Constraint node, for possible
+ * use by ATAddForeignKeyConstraint() in determining whether revalidation of
+ * this constraint can be skipped.
+ */
+static void
+TryReuseForeignKey(Oid oldId, Constraint *con)
+{
+	HeapTuple	tup;
+	Datum		adatum;
+	bool		isNull;
+	ArrayType  *arr;
+	Oid		   *rawarr;
+	int			numkeys;
+	int			i;
+
+	Assert(con->contype == CONSTR_FOREIGN);
+	Assert(con->old_conpfeqop == NIL);	/* already prepared this node */
+
+	tup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(oldId));
+	if (!HeapTupleIsValid(tup)) /* should not happen */
+		elog(ERROR, "cache lookup failed for constraint %u", oldId);
+
+	adatum = SysCacheGetAttr(CONSTROID, tup,
+							 Anum_pg_constraint_conpfeqop, &isNull);
+	if (isNull)
+		elog(ERROR, "null conpfeqop for constraint %u", oldId);
+	arr = DatumGetArrayTypeP(adatum);	/* ensure not toasted */
+	numkeys = ARR_DIMS(arr)[0];
+	/* test follows the one in ri_FetchConstraintInfo() */
+	if (ARR_NDIM(arr) != 1 ||
+		ARR_HASNULL(arr) ||
+		ARR_ELEMTYPE(arr) != OIDOID)
+		elog(ERROR, "conpfeqop is not a 1-D Oid array");
+	rawarr = (Oid *) ARR_DATA_PTR(arr);
+
+	/* stash a List of the operator Oids in our Constraint node */
+	for (i = 0; i < numkeys; i++)
+		con->old_conpfeqop = lappend_oid(con->old_conpfeqop, rawarr[i]);
+
+	ReleaseSysCache(tup);
+}
+
+/*
+ * ALTER COLUMN .. OPTIONS ( ... )
+ *
+ * Returns the address of the modified column
+ */
+static ObjectAddress
+ATExecAlterColumnGenericOptions(Relation rel,
+								const char *colName,
+								List *options,
+								LOCKMODE lockmode)
+{
+	Relation	ftrel;
+	Relation	attrel;
+	ForeignServer *server;
+	ForeignDataWrapper *fdw;
+	HeapTuple	tuple;
+	HeapTuple	newtuple;
+	bool		isnull;
+	Datum		repl_val[Natts_pg_attribute];
+	bool		repl_null[Natts_pg_attribute];
+	bool		repl_repl[Natts_pg_attribute];
+	Datum		datum;
+	Form_pg_foreign_table fttableform;
+	Form_pg_attribute atttableform;
+	AttrNumber	attnum;
+	ObjectAddress address;
+
+	if (options == NIL)
+		return InvalidObjectAddress;
+
+	/* First, determine FDW validator associated to the foreign table. */
+	ftrel = table_open(ForeignTableRelationId, AccessShareLock);
+	tuple = SearchSysCache1(FOREIGNTABLEREL, rel->rd_id);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("foreign table \"%s\" does not exist",
+						RelationGetRelationName(rel))));
+	fttableform = (Form_pg_foreign_table) GETSTRUCT(tuple);
+	server = GetForeignServer(fttableform->ftserver);
+	fdw = GetForeignDataWrapper(server->fdwid);
+
+	table_close(ftrel, AccessShareLock);
+	ReleaseSysCache(tuple);
+
+	attrel = table_open(AttributeRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						colName, RelationGetRelationName(rel))));
+
+	/* Prevent them from altering a system attribute */
+	atttableform = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = atttableform->attnum;
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"", colName)));
+
+
+	/* Initialize buffers for new tuple values */
+	memset(repl_val, 0, sizeof(repl_val));
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+
+	/* Extract the current options */
+	datum = SysCacheGetAttr(ATTNAME,
+							tuple,
+							Anum_pg_attribute_attfdwoptions,
+							&isnull);
+	if (isnull)
+		datum = PointerGetDatum(NULL);
+
+	/* Transform the options */
+	datum = transformGenericOptions(AttributeRelationId,
+									datum,
+									options,
+									fdw->fdwvalidator);
+
+	if (PointerIsValid(DatumGetPointer(datum)))
+		repl_val[Anum_pg_attribute_attfdwoptions - 1] = datum;
+	else
+		repl_null[Anum_pg_attribute_attfdwoptions - 1] = true;
+
+	repl_repl[Anum_pg_attribute_attfdwoptions - 1] = true;
+
+	/* Everything looks good - update the tuple */
+
+	newtuple = heap_modify_tuple(tuple, RelationGetDescr(attrel),
+								 repl_val, repl_null, repl_repl);
+
+	CatalogTupleUpdate(attrel, &newtuple->t_self, newtuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  atttableform->attnum);
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+
+	ReleaseSysCache(tuple);
+
+	table_close(attrel, RowExclusiveLock);
+
+	heap_freetuple(newtuple);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE OWNER
+ *
+ * recursing is true if we are recursing from a table to its indexes,
+ * sequences, or toast table.  We don't allow the ownership of those things to
+ * be changed separately from the parent table.  Also, we can skip permission
+ * checks (this is necessary not just an optimization, else we'd fail to
+ * handle toast tables properly).
+ *
+ * recursing is also true if ALTER TYPE OWNER is calling us to fix up a
+ * free-standing composite type.
+ */
+void
+ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lockmode)
+{
+	Relation	target_rel;
+	Relation	class_rel;
+	HeapTuple	tuple;
+	Form_pg_class tuple_class;
+
+	/*
+	 * Get exclusive lock till end of transaction on the target table. Use
+	 * relation_open so that we can work on indexes and sequences.
+	 */
+	target_rel = relation_open(relationOid, lockmode);
+
+	/* Get its pg_class tuple, too */
+	class_rel = table_open(RelationRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relationOid));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u", relationOid);
+	tuple_class = (Form_pg_class) GETSTRUCT(tuple);
+
+	/* Can we change the ownership of this tuple? */
+	switch (tuple_class->relkind)
+	{
+		case RELKIND_RELATION:
+		case RELKIND_VIEW:
+		case RELKIND_MATVIEW:
+		case RELKIND_FOREIGN_TABLE:
+		case RELKIND_PARTITIONED_TABLE:
+			/* ok to change owner */
+			break;
+		case RELKIND_INDEX:
+			if (!recursing)
+			{
+				/*
+				 * Because ALTER INDEX OWNER used to be allowed, and in fact
+				 * is generated by old versions of pg_dump, we give a warning
+				 * and do nothing rather than erroring out.  Also, to avoid
+				 * unnecessary chatter while restoring those old dumps, say
+				 * nothing at all if the command would be a no-op anyway.
+				 */
+				if (tuple_class->relowner != newOwnerId)
+					ereport(WARNING,
+							(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+							 errmsg("cannot change owner of index \"%s\"",
+									NameStr(tuple_class->relname)),
+							 errhint("Change the ownership of the index's table, instead.")));
+				/* quick hack to exit via the no-op path */
+				newOwnerId = tuple_class->relowner;
+			}
+			break;
+		case RELKIND_PARTITIONED_INDEX:
+			if (recursing)
+				break;
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot change owner of index \"%s\"",
+							NameStr(tuple_class->relname)),
+					 errhint("Change the ownership of the index's table, instead.")));
+			break;
+		case RELKIND_SEQUENCE:
+			if (!recursing &&
+				tuple_class->relowner != newOwnerId)
+			{
+				/* if it's an owned sequence, disallow changing it by itself */
+				Oid			tableId;
+				int32		colId;
+
+				if (sequenceIsOwned(relationOid, DEPENDENCY_AUTO, &tableId, &colId) ||
+					sequenceIsOwned(relationOid, DEPENDENCY_INTERNAL, &tableId, &colId))
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot change owner of sequence \"%s\"",
+									NameStr(tuple_class->relname)),
+							 errdetail("Sequence \"%s\" is linked to table \"%s\".",
+									   NameStr(tuple_class->relname),
+									   get_rel_name(tableId))));
+			}
+			break;
+		case RELKIND_COMPOSITE_TYPE:
+			if (recursing)
+				break;
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("\"%s\" is a composite type",
+							NameStr(tuple_class->relname)),
+					 errhint("Use ALTER TYPE instead.")));
+			break;
+		case RELKIND_TOASTVALUE:
+			if (recursing)
+				break;
+			/* FALL THRU */
+		default:
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot change owner of relation \"%s\"",
+							NameStr(tuple_class->relname)),
+					 errdetail_relkind_not_supported(tuple_class->relkind)));
+	}
+
+	/*
+	 * If the new owner is the same as the existing owner, consider the
+	 * command to have succeeded.  This is for dump restoration purposes.
+	 */
+	if (tuple_class->relowner != newOwnerId)
+	{
+		Datum		repl_val[Natts_pg_class];
+		bool		repl_null[Natts_pg_class];
+		bool		repl_repl[Natts_pg_class];
+		Acl		   *newAcl;
+		Datum		aclDatum;
+		bool		isNull;
+		HeapTuple	newtuple;
+
+		/* skip permission checks when recursing to index or toast table */
+		if (!recursing)
+		{
+			/* Superusers can always do it */
+			if (!superuser())
+			{
+				Oid			namespaceOid = tuple_class->relnamespace;
+				AclResult	aclresult;
+
+				/* Otherwise, must be owner of the existing object */
+				if (!pg_class_ownercheck(relationOid, GetUserId()))
+					aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relationOid)),
+								   RelationGetRelationName(target_rel));
+
+				/* Must be able to become new owner */
+				check_is_member_of_role(GetUserId(), newOwnerId);
+
+				/* New owner must have CREATE privilege on namespace */
+				aclresult = pg_namespace_aclcheck(namespaceOid, newOwnerId,
+												  ACL_CREATE);
+				if (aclresult != ACLCHECK_OK)
+					aclcheck_error(aclresult, OBJECT_SCHEMA,
+								   get_namespace_name(namespaceOid));
+			}
+		}
+
+		memset(repl_null, false, sizeof(repl_null));
+		memset(repl_repl, false, sizeof(repl_repl));
+
+		repl_repl[Anum_pg_class_relowner - 1] = true;
+		repl_val[Anum_pg_class_relowner - 1] = ObjectIdGetDatum(newOwnerId);
+
+		/*
+		 * Determine the modified ACL for the new owner.  This is only
+		 * necessary when the ACL is non-null.
+		 */
+		aclDatum = SysCacheGetAttr(RELOID, tuple,
+								   Anum_pg_class_relacl,
+								   &isNull);
+		if (!isNull)
+		{
+			newAcl = aclnewowner(DatumGetAclP(aclDatum),
+								 tuple_class->relowner, newOwnerId);
+			repl_repl[Anum_pg_class_relacl - 1] = true;
+			repl_val[Anum_pg_class_relacl - 1] = PointerGetDatum(newAcl);
+		}
+
+		newtuple = heap_modify_tuple(tuple, RelationGetDescr(class_rel), repl_val, repl_null, repl_repl);
+
+		CatalogTupleUpdate(class_rel, &newtuple->t_self, newtuple);
+
+		heap_freetuple(newtuple);
+
+		/*
+		 * We must similarly update any per-column ACLs to reflect the new
+		 * owner; for neatness reasons that's split out as a subroutine.
+		 */
+		change_owner_fix_column_acls(relationOid,
+									 tuple_class->relowner,
+									 newOwnerId);
+
+		/*
+		 * Update owner dependency reference, if any.  A composite type has
+		 * none, because it's tracked for the pg_type entry instead of here;
+		 * indexes and TOAST tables don't have their own entries either.
+		 */
+		if (tuple_class->relkind != RELKIND_COMPOSITE_TYPE &&
+			tuple_class->relkind != RELKIND_INDEX &&
+			tuple_class->relkind != RELKIND_PARTITIONED_INDEX &&
+			tuple_class->relkind != RELKIND_TOASTVALUE)
+			changeDependencyOnOwner(RelationRelationId, relationOid,
+									newOwnerId);
+
+		/*
+		 * Also change the ownership of the table's row type, if it has one
+		 */
+		if (OidIsValid(tuple_class->reltype))
+			AlterTypeOwnerInternal(tuple_class->reltype, newOwnerId);
+
+		/*
+		 * If we are operating on a table or materialized view, also change
+		 * the ownership of any indexes and sequences that belong to the
+		 * relation, as well as its toast table (if it has one).
+		 */
+		if (tuple_class->relkind == RELKIND_RELATION ||
+			tuple_class->relkind == RELKIND_PARTITIONED_TABLE ||
+			tuple_class->relkind == RELKIND_MATVIEW ||
+			tuple_class->relkind == RELKIND_TOASTVALUE)
+		{
+			List	   *index_oid_list;
+			ListCell   *i;
+
+			/* Find all the indexes belonging to this relation */
+			index_oid_list = RelationGetIndexList(target_rel);
+
+			/* For each index, recursively change its ownership */
+			foreach(i, index_oid_list)
+				ATExecChangeOwner(lfirst_oid(i), newOwnerId, true, lockmode);
+
+			list_free(index_oid_list);
+		}
+
+		/* If it has a toast table, recurse to change its ownership */
+		if (tuple_class->reltoastrelid != InvalidOid)
+			ATExecChangeOwner(tuple_class->reltoastrelid, newOwnerId,
+							  true, lockmode);
+
+		/* If it has dependent sequences, recurse to change them too */
+		change_owner_recurse_to_sequences(relationOid, newOwnerId, lockmode);
+	}
+
+	InvokeObjectPostAlterHook(RelationRelationId, relationOid, 0);
+
+	ReleaseSysCache(tuple);
+	table_close(class_rel, RowExclusiveLock);
+	relation_close(target_rel, NoLock);
+}
+
+/*
+ * change_owner_fix_column_acls
+ *
+ * Helper function for ATExecChangeOwner.  Scan the columns of the table
+ * and fix any non-null column ACLs to reflect the new owner.
+ */
+static void
+change_owner_fix_column_acls(Oid relationOid, Oid oldOwnerId, Oid newOwnerId)
+{
+	Relation	attRelation;
+	SysScanDesc scan;
+	ScanKeyData key[1];
+	HeapTuple	attributeTuple;
+
+	attRelation = table_open(AttributeRelationId, RowExclusiveLock);
+	ScanKeyInit(&key[0],
+				Anum_pg_attribute_attrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relationOid));
+	scan = systable_beginscan(attRelation, AttributeRelidNumIndexId,
+							  true, NULL, 1, key);
+	while (HeapTupleIsValid(attributeTuple = systable_getnext(scan)))
+	{
+		Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attributeTuple);
+		Datum		repl_val[Natts_pg_attribute];
+		bool		repl_null[Natts_pg_attribute];
+		bool		repl_repl[Natts_pg_attribute];
+		Acl		   *newAcl;
+		Datum		aclDatum;
+		bool		isNull;
+		HeapTuple	newtuple;
+
+		/* Ignore dropped columns */
+		if (att->attisdropped)
+			continue;
+
+		aclDatum = heap_getattr(attributeTuple,
+								Anum_pg_attribute_attacl,
+								RelationGetDescr(attRelation),
+								&isNull);
+		/* Null ACLs do not require changes */
+		if (isNull)
+			continue;
+
+		memset(repl_null, false, sizeof(repl_null));
+		memset(repl_repl, false, sizeof(repl_repl));
+
+		newAcl = aclnewowner(DatumGetAclP(aclDatum),
+							 oldOwnerId, newOwnerId);
+		repl_repl[Anum_pg_attribute_attacl - 1] = true;
+		repl_val[Anum_pg_attribute_attacl - 1] = PointerGetDatum(newAcl);
+
+		newtuple = heap_modify_tuple(attributeTuple,
+									 RelationGetDescr(attRelation),
+									 repl_val, repl_null, repl_repl);
+
+		CatalogTupleUpdate(attRelation, &newtuple->t_self, newtuple);
+
+		heap_freetuple(newtuple);
+	}
+	systable_endscan(scan);
+	table_close(attRelation, RowExclusiveLock);
+}
+
+/*
+ * change_owner_recurse_to_sequences
+ *
+ * Helper function for ATExecChangeOwner.  Examines pg_depend searching
+ * for sequences that are dependent on serial columns, and changes their
+ * ownership.
+ */
+static void
+change_owner_recurse_to_sequences(Oid relationOid, Oid newOwnerId, LOCKMODE lockmode)
+{
+	Relation	depRel;
+	SysScanDesc scan;
+	ScanKeyData key[2];
+	HeapTuple	tup;
+
+	/*
+	 * SERIAL sequences are those having an auto dependency on one of the
+	 * table's columns (we don't care *which* column, exactly).
+	 */
+	depRel = table_open(DependRelationId, AccessShareLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_refclassid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_refobjid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relationOid));
+	/* we leave refobjsubid unspecified */
+
+	scan = systable_beginscan(depRel, DependReferenceIndexId, true,
+							  NULL, 2, key);
+
+	while (HeapTupleIsValid(tup = systable_getnext(scan)))
+	{
+		Form_pg_depend depForm = (Form_pg_depend) GETSTRUCT(tup);
+		Relation	seqRel;
+
+		/* skip dependencies other than auto dependencies on columns */
+		if (depForm->refobjsubid == 0 ||
+			depForm->classid != RelationRelationId ||
+			depForm->objsubid != 0 ||
+			!(depForm->deptype == DEPENDENCY_AUTO || depForm->deptype == DEPENDENCY_INTERNAL))
+			continue;
+
+		/* Use relation_open just in case it's an index */
+		seqRel = relation_open(depForm->objid, lockmode);
+
+		/* skip non-sequence relations */
+		if (RelationGetForm(seqRel)->relkind != RELKIND_SEQUENCE)
+		{
+			/* No need to keep the lock */
+			relation_close(seqRel, lockmode);
+			continue;
+		}
+
+		/* We don't need to close the sequence while we alter it. */
+		ATExecChangeOwner(depForm->objid, newOwnerId, true, lockmode);
+
+		/* Now we can close it.  Keep the lock till end of transaction. */
+		relation_close(seqRel, NoLock);
+	}
+
+	systable_endscan(scan);
+
+	relation_close(depRel, AccessShareLock);
+}
+
+/*
+ * ALTER TABLE CLUSTER ON
+ *
+ * The only thing we have to do is to change the indisclustered bits.
+ *
+ * Return the address of the new clustering index.
+ */
+static ObjectAddress
+ATExecClusterOn(Relation rel, const char *indexName, LOCKMODE lockmode)
+{
+	Oid			indexOid;
+	ObjectAddress address;
+
+	indexOid = get_relname_relid(indexName, rel->rd_rel->relnamespace);
+
+	if (!OidIsValid(indexOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("index \"%s\" for table \"%s\" does not exist",
+						indexName, RelationGetRelationName(rel))));
+
+	/* Check index is valid to cluster on */
+	check_index_is_clusterable(rel, indexOid, lockmode);
+
+	/* And do the work */
+	mark_index_clustered(rel, indexOid, false);
+
+	ObjectAddressSet(address,
+					 RelationRelationId, indexOid);
+
+	return address;
+}
+
+/*
+ * ALTER TABLE SET WITHOUT CLUSTER
+ *
+ * We have to find any indexes on the table that have indisclustered bit
+ * set and turn it off.
+ */
+static void
+ATExecDropCluster(Relation rel, LOCKMODE lockmode)
+{
+	mark_index_clustered(rel, InvalidOid, false);
+}
+
+/*
+ * Preparation phase for SET ACCESS METHOD
+ *
+ * Check that access method exists.  If it is the same as the table's current
+ * access method, it is a no-op.  Otherwise, a table rewrite is necessary.
+ */
+static void
+ATPrepSetAccessMethod(AlteredTableInfo *tab, Relation rel, const char *amname)
+{
+	Oid			amoid;
+
+	/* Check that the table access method exists */
+	amoid = get_table_am_oid(amname, false);
+
+	if (rel->rd_rel->relam == amoid)
+		return;
+
+	/* Save info for Phase 3 to do the real work */
+	tab->rewrite |= AT_REWRITE_ACCESS_METHOD;
+	tab->newAccessMethod = amoid;
+}
+
+/*
+ * ALTER TABLE SET TABLESPACE
+ */
+static void
+ATPrepSetTableSpace(AlteredTableInfo *tab, Relation rel, const char *tablespacename, LOCKMODE lockmode)
+{
+	Oid			tablespaceId;
+
+	/* Check that the tablespace exists */
+	tablespaceId = get_tablespace_oid(tablespacename, false);
+
+	/* Check permissions except when moving to database's default */
+	if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
+	{
+		AclResult	aclresult;
+
+		aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(), ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_TABLESPACE, tablespacename);
+	}
+
+	/* Save info for Phase 3 to do the real work */
+	if (OidIsValid(tab->newTableSpace))
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("cannot have multiple SET TABLESPACE subcommands")));
+
+	tab->newTableSpace = tablespaceId;
+}
+
+/*
+ * Set, reset, or replace reloptions.
+ */
+static void
+ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation,
+					LOCKMODE lockmode)
+{
+	Oid			relid;
+	Relation	pgclass;
+	HeapTuple	tuple;
+	HeapTuple	newtuple;
+	Datum		datum;
+	bool		isnull;
+	Datum		newOptions;
+	Datum		repl_val[Natts_pg_class];
+	bool		repl_null[Natts_pg_class];
+	bool		repl_repl[Natts_pg_class];
+	static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
+
+	if (defList == NIL && operation != AT_ReplaceRelOptions)
+		return;					/* nothing to do */
+
+	pgclass = table_open(RelationRelationId, RowExclusiveLock);
+
+	/* Fetch heap tuple */
+	relid = RelationGetRelid(rel);
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u", relid);
+
+	if (operation == AT_ReplaceRelOptions)
+	{
+		/*
+		 * If we're supposed to replace the reloptions list, we just pretend
+		 * there were none before.
+		 */
+		datum = (Datum) 0;
+		isnull = true;
+	}
+	else
+	{
+		/* Get the old reloptions */
+		datum = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
+								&isnull);
+	}
+
+	/* Generate new proposed reloptions (text array) */
+	newOptions = transformRelOptions(isnull ? (Datum) 0 : datum,
+									 defList, NULL, validnsps, false,
+									 operation == AT_ResetRelOptions);
+
+	/* Validate */
+	switch (rel->rd_rel->relkind)
+	{
+		case RELKIND_RELATION:
+		case RELKIND_TOASTVALUE:
+		case RELKIND_MATVIEW:
+			(void) heap_reloptions(rel->rd_rel->relkind, newOptions, true);
+			break;
+		case RELKIND_PARTITIONED_TABLE:
+			(void) partitioned_table_reloptions(newOptions, true);
+			break;
+		case RELKIND_VIEW:
+			(void) view_reloptions(newOptions, true);
+			break;
+		case RELKIND_INDEX:
+		case RELKIND_PARTITIONED_INDEX:
+			(void) index_reloptions(rel->rd_indam->amoptions, newOptions, true);
+			break;
+		default:
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot set options for relation \"%s\"",
+							RelationGetRelationName(rel)),
+					 errdetail_relkind_not_supported(rel->rd_rel->relkind)));
+			break;
+	}
+
+	/* Special-case validation of view options */
+	if (rel->rd_rel->relkind == RELKIND_VIEW)
+	{
+		Query	   *view_query = get_view_query(rel);
+		List	   *view_options = untransformRelOptions(newOptions);
+		ListCell   *cell;
+		bool		check_option = false;
+
+		foreach(cell, view_options)
+		{
+			DefElem    *defel = (DefElem *) lfirst(cell);
+
+			if (strcmp(defel->defname, "check_option") == 0)
+				check_option = true;
+		}
+
+		/*
+		 * If the check option is specified, look to see if the view is
+		 * actually auto-updatable or not.
+		 */
+		if (check_option)
+		{
+			const char *view_updatable_error =
+			view_query_is_auto_updatable(view_query, true);
+
+			if (view_updatable_error)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("WITH CHECK OPTION is supported only on automatically updatable views"),
+						 errhint("%s", _(view_updatable_error))));
+		}
+	}
+
+	/*
+	 * All we need do here is update the pg_class row; the new options will be
+	 * propagated into relcaches during post-commit cache inval.
+	 */
+	memset(repl_val, 0, sizeof(repl_val));
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+
+	if (newOptions != (Datum) 0)
+		repl_val[Anum_pg_class_reloptions - 1] = newOptions;
+	else
+		repl_null[Anum_pg_class_reloptions - 1] = true;
+
+	repl_repl[Anum_pg_class_reloptions - 1] = true;
+
+	newtuple = heap_modify_tuple(tuple, RelationGetDescr(pgclass),
+								 repl_val, repl_null, repl_repl);
+
+	CatalogTupleUpdate(pgclass, &newtuple->t_self, newtuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), 0);
+
+	heap_freetuple(newtuple);
+
+	ReleaseSysCache(tuple);
+
+	/* repeat the whole exercise for the toast table, if there's one */
+	if (OidIsValid(rel->rd_rel->reltoastrelid))
+	{
+		Relation	toastrel;
+		Oid			toastid = rel->rd_rel->reltoastrelid;
+
+		toastrel = table_open(toastid, lockmode);
+
+		/* Fetch heap tuple */
+		tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "cache lookup failed for relation %u", toastid);
+
+		if (operation == AT_ReplaceRelOptions)
+		{
+			/*
+			 * If we're supposed to replace the reloptions list, we just
+			 * pretend there were none before.
+			 */
+			datum = (Datum) 0;
+			isnull = true;
+		}
+		else
+		{
+			/* Get the old reloptions */
+			datum = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
+									&isnull);
+		}
+
+		newOptions = transformRelOptions(isnull ? (Datum) 0 : datum,
+										 defList, "toast", validnsps, false,
+										 operation == AT_ResetRelOptions);
+
+		(void) heap_reloptions(RELKIND_TOASTVALUE, newOptions, true);
+
+		memset(repl_val, 0, sizeof(repl_val));
+		memset(repl_null, false, sizeof(repl_null));
+		memset(repl_repl, false, sizeof(repl_repl));
+
+		if (newOptions != (Datum) 0)
+			repl_val[Anum_pg_class_reloptions - 1] = newOptions;
+		else
+			repl_null[Anum_pg_class_reloptions - 1] = true;
+
+		repl_repl[Anum_pg_class_reloptions - 1] = true;
+
+		newtuple = heap_modify_tuple(tuple, RelationGetDescr(pgclass),
+									 repl_val, repl_null, repl_repl);
+
+		CatalogTupleUpdate(pgclass, &newtuple->t_self, newtuple);
+
+		InvokeObjectPostAlterHookArg(RelationRelationId,
+									 RelationGetRelid(toastrel), 0,
+									 InvalidOid, true);
+
+		heap_freetuple(newtuple);
+
+		ReleaseSysCache(tuple);
+
+		table_close(toastrel, NoLock);
+	}
+
+	table_close(pgclass, RowExclusiveLock);
+}
+
+/*
+ * Execute ALTER TABLE SET TABLESPACE for cases where there is no tuple
+ * rewriting to be done, so we just want to copy the data as fast as possible.
+ */
+static void
+ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
+{
+	Relation	rel;
+	Oid			reltoastrelid;
+	Oid			newrelfilenode;
+	RelFileNode newrnode;
+	List	   *reltoastidxids = NIL;
+	ListCell   *lc;
+
+	/*
+	 * Need lock here in case we are recursing to toast table or index
+	 */
+	rel = relation_open(tableOid, lockmode);
+
+	/* Check first if relation can be moved to new tablespace */
+	if (!CheckRelationTableSpaceMove(rel, newTableSpace))
+	{
+		InvokeObjectPostAlterHook(RelationRelationId,
+								  RelationGetRelid(rel), 0);
+		relation_close(rel, NoLock);
+		return;
+	}
+
+	reltoastrelid = rel->rd_rel->reltoastrelid;
+	/* Fetch the list of indexes on toast relation if necessary */
+	if (OidIsValid(reltoastrelid))
+	{
+		Relation	toastRel = relation_open(reltoastrelid, lockmode);
+
+		reltoastidxids = RelationGetIndexList(toastRel);
+		relation_close(toastRel, lockmode);
+	}
+
+	/*
+	 * Relfilenodes are not unique in databases across tablespaces, so we need
+	 * to allocate a new one in the new tablespace.
+	 */
+	newrelfilenode = GetNewRelFileNode(newTableSpace, NULL,
+									   rel->rd_rel->relpersistence);
+
+	/* Open old and new relation */
+	newrnode = rel->rd_node;
+	newrnode.relNode = newrelfilenode;
+	newrnode.spcNode = newTableSpace;
+
+	/* hand off to AM to actually create the new filenode and copy the data */
+	if (rel->rd_rel->relkind == RELKIND_INDEX)
+	{
+		index_copy_data(rel, newrnode);
+	}
+	else
+	{
+		Assert(RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind));
+		table_relation_copy_data(rel, &newrnode);
+	}
+
+	/*
+	 * Update the pg_class row.
+	 *
+	 * NB: This wouldn't work if ATExecSetTableSpace() were allowed to be
+	 * executed on pg_class or its indexes (the above copy wouldn't contain
+	 * the updated pg_class entry), but that's forbidden with
+	 * CheckRelationTableSpaceMove().
+	 */
+	SetRelationTableSpace(rel, newTableSpace, newrelfilenode);
+
+	InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), 0);
+
+	RelationAssumeNewRelfilenode(rel);
+
+	relation_close(rel, NoLock);
+
+	/* Make sure the reltablespace change is visible */
+	CommandCounterIncrement();
+
+	/* Move associated toast relation and/or indexes, too */
+	if (OidIsValid(reltoastrelid))
+		ATExecSetTableSpace(reltoastrelid, newTableSpace, lockmode);
+	foreach(lc, reltoastidxids)
+		ATExecSetTableSpace(lfirst_oid(lc), newTableSpace, lockmode);
+
+	/* Clean up */
+	list_free(reltoastidxids);
+}
+
+/*
+ * Special handling of ALTER TABLE SET TABLESPACE for relations with no
+ * storage that have an interest in preserving tablespace.
+ *
+ * Since these have no storage the tablespace can be updated with a simple
+ * metadata only operation to update the tablespace.
+ */
+static void
+ATExecSetTableSpaceNoStorage(Relation rel, Oid newTableSpace)
+{
+	/*
+	 * Shouldn't be called on relations having storage; these are processed in
+	 * phase 3.
+	 */
+	Assert(!RELKIND_HAS_STORAGE(rel->rd_rel->relkind));
+
+	/* check if relation can be moved to its new tablespace */
+	if (!CheckRelationTableSpaceMove(rel, newTableSpace))
+	{
+		InvokeObjectPostAlterHook(RelationRelationId,
+								  RelationGetRelid(rel),
+								  0);
+		return;
+	}
+
+	/* Update can be done, so change reltablespace */
+	SetRelationTableSpace(rel, newTableSpace, InvalidOid);
+
+	InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), 0);
+
+	/* Make sure the reltablespace change is visible */
+	CommandCounterIncrement();
+}
+
+/*
+ * Alter Table ALL ... SET TABLESPACE
+ *
+ * Allows a user to move all objects of some type in a given tablespace in the
+ * current database to another tablespace.  Objects can be chosen based on the
+ * owner of the object also, to allow users to move only their objects.
+ * The user must have CREATE rights on the new tablespace, as usual.   The main
+ * permissions handling is done by the lower-level table move function.
+ *
+ * All to-be-moved objects are locked first. If NOWAIT is specified and the
+ * lock can't be acquired then we ereport(ERROR).
+ */
+Oid
+AlterTableMoveAll(AlterTableMoveAllStmt *stmt)
+{
+	List	   *relations = NIL;
+	ListCell   *l;
+	ScanKeyData key[1];
+	Relation	rel;
+	TableScanDesc scan;
+	HeapTuple	tuple;
+	Oid			orig_tablespaceoid;
+	Oid			new_tablespaceoid;
+	List	   *role_oids = roleSpecsToIds(stmt->roles);
+
+	/* Ensure we were not asked to move something we can't */
+	if (stmt->objtype != OBJECT_TABLE && stmt->objtype != OBJECT_INDEX &&
+		stmt->objtype != OBJECT_MATVIEW)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("only tables, indexes, and materialized views exist in tablespaces")));
+
+	/* Get the orig and new tablespace OIDs */
+	orig_tablespaceoid = get_tablespace_oid(stmt->orig_tablespacename, false);
+	new_tablespaceoid = get_tablespace_oid(stmt->new_tablespacename, false);
+
+	/* Can't move shared relations in to or out of pg_global */
+	/* This is also checked by ATExecSetTableSpace, but nice to stop earlier */
+	if (orig_tablespaceoid == GLOBALTABLESPACE_OID ||
+		new_tablespaceoid == GLOBALTABLESPACE_OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("cannot move relations in to or out of pg_global tablespace")));
+
+	/*
+	 * Must have CREATE rights on the new tablespace, unless it is the
+	 * database default tablespace (which all users implicitly have CREATE
+	 * rights on).
+	 */
+	if (OidIsValid(new_tablespaceoid) && new_tablespaceoid != MyDatabaseTableSpace)
+	{
+		AclResult	aclresult;
+
+		aclresult = pg_tablespace_aclcheck(new_tablespaceoid, GetUserId(),
+										   ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_TABLESPACE,
+						   get_tablespace_name(new_tablespaceoid));
+	}
+
+	/*
+	 * Now that the checks are done, check if we should set either to
+	 * InvalidOid because it is our database's default tablespace.
+	 */
+	if (orig_tablespaceoid == MyDatabaseTableSpace)
+		orig_tablespaceoid = InvalidOid;
+
+	if (new_tablespaceoid == MyDatabaseTableSpace)
+		new_tablespaceoid = InvalidOid;
+
+	/* no-op */
+	if (orig_tablespaceoid == new_tablespaceoid)
+		return new_tablespaceoid;
+
+	/*
+	 * Walk the list of objects in the tablespace and move them. This will
+	 * only find objects in our database, of course.
+	 */
+	ScanKeyInit(&key[0],
+				Anum_pg_class_reltablespace,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(orig_tablespaceoid));
+
+	rel = table_open(RelationRelationId, AccessShareLock);
+	scan = table_beginscan_catalog(rel, 1, key);
+	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	{
+		Form_pg_class relForm = (Form_pg_class) GETSTRUCT(tuple);
+		Oid			relOid = relForm->oid;
+
+		/*
+		 * Do not move objects in pg_catalog as part of this, if an admin
+		 * really wishes to do so, they can issue the individual ALTER
+		 * commands directly.
+		 *
+		 * Also, explicitly avoid any shared tables, temp tables, or TOAST
+		 * (TOAST will be moved with the main table).
+		 */
+		if (IsCatalogNamespace(relForm->relnamespace) ||
+			relForm->relisshared ||
+			isAnyTempNamespace(relForm->relnamespace) ||
+			IsToastNamespace(relForm->relnamespace))
+			continue;
+
+		/* Only move the object type requested */
+		if ((stmt->objtype == OBJECT_TABLE &&
+			 relForm->relkind != RELKIND_RELATION &&
+			 relForm->relkind != RELKIND_PARTITIONED_TABLE) ||
+			(stmt->objtype == OBJECT_INDEX &&
+			 relForm->relkind != RELKIND_INDEX &&
+			 relForm->relkind != RELKIND_PARTITIONED_INDEX) ||
+			(stmt->objtype == OBJECT_MATVIEW &&
+			 relForm->relkind != RELKIND_MATVIEW))
+			continue;
+
+		/* Check if we are only moving objects owned by certain roles */
+		if (role_oids != NIL && !list_member_oid(role_oids, relForm->relowner))
+			continue;
+
+		/*
+		 * Handle permissions-checking here since we are locking the tables
+		 * and also to avoid doing a bunch of work only to fail part-way. Note
+		 * that permissions will also be checked by AlterTableInternal().
+		 *
+		 * Caller must be considered an owner on the table to move it.
+		 */
+		if (!pg_class_ownercheck(relOid, GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relOid)),
+						   NameStr(relForm->relname));
+
+		if (stmt->nowait &&
+			!ConditionalLockRelationOid(relOid, AccessExclusiveLock))
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_IN_USE),
+					 errmsg("aborting because lock on relation \"%s.%s\" is not available",
+							get_namespace_name(relForm->relnamespace),
+							NameStr(relForm->relname))));
+		else
+			LockRelationOid(relOid, AccessExclusiveLock);
+
+		/* Add to our list of objects to move */
+		relations = lappend_oid(relations, relOid);
+	}
+
+	table_endscan(scan);
+	table_close(rel, AccessShareLock);
+
+	if (relations == NIL)
+		ereport(NOTICE,
+				(errcode(ERRCODE_NO_DATA_FOUND),
+				 errmsg("no matching relations in tablespace \"%s\" found",
+						orig_tablespaceoid == InvalidOid ? "(database default)" :
+						get_tablespace_name(orig_tablespaceoid))));
+
+	/* Everything is locked, loop through and move all of the relations. */
+	foreach(l, relations)
+	{
+		List	   *cmds = NIL;
+		AlterTableCmd *cmd = makeNode(AlterTableCmd);
+
+		cmd->subtype = AT_SetTableSpace;
+		cmd->name = stmt->new_tablespacename;
+
+		cmds = lappend(cmds, cmd);
+
+		EventTriggerAlterTableStart((Node *) stmt);
+		/* OID is set by AlterTableInternal */
+		AlterTableInternal(lfirst_oid(l), cmds, false);
+		EventTriggerAlterTableEnd();
+	}
+
+	return new_tablespaceoid;
+}
+
+static void
+index_copy_data(Relation rel, RelFileNode newrnode)
+{
+	SMgrRelation dstrel;
+
+	dstrel = smgropen(newrnode, rel->rd_backend);
+
+	/*
+	 * Since we copy the file directly without looking at the shared buffers,
+	 * we'd better first flush out any pages of the source relation that are
+	 * in shared buffers.  We assume no new changes will be made while we are
+	 * holding exclusive lock on the rel.
+	 */
+	FlushRelationBuffers(rel);
+
+	/*
+	 * Create and copy all forks of the relation, and schedule unlinking of
+	 * old physical files.
+	 *
+	 * NOTE: any conflict in relfilenode value will be caught in
+	 * RelationCreateStorage().
+	 */
+	RelationCreateStorage(newrnode, rel->rd_rel->relpersistence, true);
+
+	/* copy main fork */
+	RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM,
+						rel->rd_rel->relpersistence);
+
+	/* copy those extra forks that exist */
+	for (ForkNumber forkNum = MAIN_FORKNUM + 1;
+		 forkNum <= MAX_FORKNUM; forkNum++)
+	{
+		if (smgrexists(RelationGetSmgr(rel), forkNum))
+		{
+			smgrcreate(dstrel, forkNum, false);
+
+			/*
+			 * WAL log creation if the relation is persistent, or this is the
+			 * init fork of an unlogged relation.
+			 */
+			if (RelationIsPermanent(rel) ||
+				(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
+				 forkNum == INIT_FORKNUM))
+				log_smgrcreate(&newrnode, forkNum);
+			RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
+								rel->rd_rel->relpersistence);
+		}
+	}
+
+	/* drop old relation, and close new one */
+	RelationDropStorage(rel);
+	smgrclose(dstrel);
+}
+
+/*
+ * ALTER TABLE ENABLE/DISABLE TRIGGER
+ *
+ * We just pass this off to trigger.c.
+ */
+static void
+ATExecEnableDisableTrigger(Relation rel, const char *trigname,
+						   char fires_when, bool skip_system, bool recurse,
+						   LOCKMODE lockmode)
+{
+	EnableDisableTriggerNew2(rel, trigname, InvalidOid,
+							 fires_when, skip_system, recurse,
+							 lockmode);
+}
+
+/*
+ * ALTER TABLE ENABLE/DISABLE RULE
+ *
+ * We just pass this off to rewriteDefine.c.
+ */
+static void
+ATExecEnableDisableRule(Relation rel, const char *rulename,
+						char fires_when, LOCKMODE lockmode)
+{
+	EnableDisableRule(rel, rulename, fires_when);
+}
+
+/*
+ * ALTER TABLE INHERIT
+ *
+ * Add a parent to the child's parents. This verifies that all the columns and
+ * check constraints of the parent appear in the child and that they have the
+ * same data types and expressions.
+ */
+static void
+ATPrepAddInherit(Relation child_rel)
+{
+	if (child_rel->rd_rel->reloftype)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot change inheritance of typed table")));
+
+	if (child_rel->rd_rel->relispartition)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot change inheritance of a partition")));
+
+	if (child_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot change inheritance of partitioned table")));
+}
+
+/*
+ * Return the address of the new parent relation.
+ */
+static ObjectAddress
+ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode)
+{
+	Relation	parent_rel;
+	List	   *children;
+	ObjectAddress address;
+	const char *trigger_name;
+
+	/*
+	 * A self-exclusive lock is needed here.  See the similar case in
+	 * MergeAttributes() for a full explanation.
+	 */
+	parent_rel = table_openrv(parent, ShareUpdateExclusiveLock);
+
+	/*
+	 * Must be owner of both parent and child -- child was checked by
+	 * ATSimplePermissions call in ATPrepCmd
+	 */
+	ATSimplePermissions(AT_AddInherit, parent_rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+	/* Permanent rels cannot inherit from temporary ones */
+	if (parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+		child_rel->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot inherit from temporary relation \"%s\"",
+						RelationGetRelationName(parent_rel))));
+
+	/* If parent rel is temp, it must belong to this session */
+	if (parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+		!parent_rel->rd_islocaltemp)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot inherit from temporary relation of another session")));
+
+	/* Ditto for the child */
+	if (child_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+		!child_rel->rd_islocaltemp)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot inherit to temporary relation of another session")));
+
+	/* Prevent partitioned tables from becoming inheritance parents */
+	if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot inherit from partitioned table \"%s\"",
+						parent->relname)));
+
+	/* Likewise for partitions */
+	if (parent_rel->rd_rel->relispartition)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot inherit from a partition")));
+
+	/*
+	 * Prevent circularity by seeing if proposed parent inherits from child.
+	 * (In particular, this disallows making a rel inherit from itself.)
+	 *
+	 * This is not completely bulletproof because of race conditions: in
+	 * multi-level inheritance trees, someone else could concurrently be
+	 * making another inheritance link that closes the loop but does not join
+	 * either of the rels we have locked.  Preventing that seems to require
+	 * exclusive locks on the entire inheritance tree, which is a cure worse
+	 * than the disease.  find_all_inheritors() will cope with circularity
+	 * anyway, so don't sweat it too much.
+	 *
+	 * We use weakest lock we can on child's children, namely AccessShareLock.
+	 */
+	children = find_all_inheritors(RelationGetRelid(child_rel),
+								   AccessShareLock, NULL);
+
+	if (list_member_oid(children, RelationGetRelid(parent_rel)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_TABLE),
+				 errmsg("circular inheritance not allowed"),
+				 errdetail("\"%s\" is already a child of \"%s\".",
+						   parent->relname,
+						   RelationGetRelationName(child_rel))));
+
+	/*
+	 * If child_rel has row-level triggers with transition tables, we
+	 * currently don't allow it to become an inheritance child.  See also
+	 * prohibitions in ATExecAttachPartition() and CreateTrigger().
+	 */
+	trigger_name = FindTriggerIncompatibleWithInheritance(child_rel->trigdesc);
+	if (trigger_name != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("trigger \"%s\" prevents table \"%s\" from becoming an inheritance child",
+						trigger_name, RelationGetRelationName(child_rel)),
+				 errdetail("ROW triggers with transition tables are not supported in inheritance hierarchies.")));
+
+	/* OK to create inheritance */
+	CreateInheritance(child_rel, parent_rel);
+
+	ObjectAddressSet(address, RelationRelationId,
+					 RelationGetRelid(parent_rel));
+
+	/* keep our lock on the parent relation until commit */
+	table_close(parent_rel, NoLock);
+
+	return address;
+}
+
+/*
+ * CreateInheritance
+ *		Catalog manipulation portion of creating inheritance between a child
+ *		table and a parent table.
+ *
+ * Common to ATExecAddInherit() and ATExecAttachPartition().
+ */
+static void
+CreateInheritance(Relation child_rel, Relation parent_rel)
+{
+	Relation	catalogRelation;
+	SysScanDesc scan;
+	ScanKeyData key;
+	HeapTuple	inheritsTuple;
+	int32		inhseqno;
+
+	/* Note: get RowExclusiveLock because we will write pg_inherits below. */
+	catalogRelation = table_open(InheritsRelationId, RowExclusiveLock);
+
+	/*
+	 * Check for duplicates in the list of parents, and determine the highest
+	 * inhseqno already present; we'll use the next one for the new parent.
+	 * Also, if proposed child is a partition, it cannot already be
+	 * inheriting.
+	 *
+	 * Note: we do not reject the case where the child already inherits from
+	 * the parent indirectly; CREATE TABLE doesn't reject comparable cases.
+	 */
+	ScanKeyInit(&key,
+				Anum_pg_inherits_inhrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(child_rel)));
+	scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId,
+							  true, NULL, 1, &key);
+
+	/* inhseqno sequences start at 1 */
+	inhseqno = 0;
+	while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan)))
+	{
+		Form_pg_inherits inh = (Form_pg_inherits) GETSTRUCT(inheritsTuple);
+
+		if (inh->inhparent == RelationGetRelid(parent_rel))
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_TABLE),
+					 errmsg("relation \"%s\" would be inherited from more than once",
+							RelationGetRelationName(parent_rel))));
+
+		if (inh->inhseqno > inhseqno)
+			inhseqno = inh->inhseqno;
+	}
+	systable_endscan(scan);
+
+	/* Match up the columns and bump attinhcount as needed */
+	MergeAttributesIntoExisting(child_rel, parent_rel);
+
+	/* Match up the constraints and bump coninhcount as needed */
+	MergeConstraintsIntoExisting(child_rel, parent_rel);
+
+	/*
+	 * OK, it looks valid.  Make the catalog entries that show inheritance.
+	 */
+	StoreCatalogInheritance1(RelationGetRelid(child_rel),
+							 RelationGetRelid(parent_rel),
+							 inhseqno + 1,
+							 catalogRelation,
+							 parent_rel->rd_rel->relkind ==
+							 RELKIND_PARTITIONED_TABLE);
+
+	/* Now we're done with pg_inherits */
+	table_close(catalogRelation, RowExclusiveLock);
+}
+
+/*
+ * Obtain the source-text form of the constraint expression for a check
+ * constraint, given its pg_constraint tuple
+ */
+static char *
+decompile_conbin(HeapTuple contup, TupleDesc tupdesc)
+{
+	Form_pg_constraint con;
+	bool		isnull;
+	Datum		attr;
+	Datum		expr;
+
+	con = (Form_pg_constraint) GETSTRUCT(contup);
+	attr = heap_getattr(contup, Anum_pg_constraint_conbin, tupdesc, &isnull);
+	if (isnull)
+		elog(ERROR, "null conbin for constraint %u", con->oid);
+
+	expr = DirectFunctionCall2(pg_get_expr, attr,
+							   ObjectIdGetDatum(con->conrelid));
+	return TextDatumGetCString(expr);
+}
+
+/*
+ * Determine whether two check constraints are functionally equivalent
+ *
+ * The test we apply is to see whether they reverse-compile to the same
+ * source string.  This insulates us from issues like whether attributes
+ * have the same physical column numbers in parent and child relations.
+ */
+static bool
+constraints_equivalent(HeapTuple a, HeapTuple b, TupleDesc tupleDesc)
+{
+	Form_pg_constraint acon = (Form_pg_constraint) GETSTRUCT(a);
+	Form_pg_constraint bcon = (Form_pg_constraint) GETSTRUCT(b);
+
+	if (acon->condeferrable != bcon->condeferrable ||
+		acon->condeferred != bcon->condeferred ||
+		strcmp(decompile_conbin(a, tupleDesc),
+			   decompile_conbin(b, tupleDesc)) != 0)
+		return false;
+	else
+		return true;
+}
+
+/*
+ * Check columns in child table match up with columns in parent, and increment
+ * their attinhcount.
+ *
+ * Called by CreateInheritance
+ *
+ * Currently all parent columns must be found in child. Missing columns are an
+ * error.  One day we might consider creating new columns like CREATE TABLE
+ * does.  However, that is widely unpopular --- in the common use case of
+ * partitioned tables it's a foot-gun.
+ *
+ * The data type must match exactly. If the parent column is NOT NULL then
+ * the child must be as well. Defaults are not compared, however.
+ */
+static void
+MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel)
+{
+	Relation	attrrel;
+	AttrNumber	parent_attno;
+	int			parent_natts;
+	TupleDesc	tupleDesc;
+	HeapTuple	tuple;
+	bool		child_is_partition = false;
+
+	attrrel = table_open(AttributeRelationId, RowExclusiveLock);
+
+	tupleDesc = RelationGetDescr(parent_rel);
+	parent_natts = tupleDesc->natts;
+
+	/* If parent_rel is a partitioned table, child_rel must be a partition */
+	if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		child_is_partition = true;
+
+	for (parent_attno = 1; parent_attno <= parent_natts; parent_attno++)
+	{
+		Form_pg_attribute attribute = TupleDescAttr(tupleDesc,
+													parent_attno - 1);
+		char	   *attributeName = NameStr(attribute->attname);
+
+		/* Ignore dropped columns in the parent. */
+		if (attribute->attisdropped)
+			continue;
+
+		/* Find same column in child (matching on column name). */
+		tuple = SearchSysCacheCopyAttName(RelationGetRelid(child_rel),
+										  attributeName);
+		if (HeapTupleIsValid(tuple))
+		{
+			/* Check they are same type, typmod, and collation */
+			Form_pg_attribute childatt = (Form_pg_attribute) GETSTRUCT(tuple);
+
+			if (attribute->atttypid != childatt->atttypid ||
+				attribute->atttypmod != childatt->atttypmod)
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("child table \"%s\" has different type for column \"%s\"",
+								RelationGetRelationName(child_rel),
+								attributeName)));
+
+			if (attribute->attcollation != childatt->attcollation)
+				ereport(ERROR,
+						(errcode(ERRCODE_COLLATION_MISMATCH),
+						 errmsg("child table \"%s\" has different collation for column \"%s\"",
+								RelationGetRelationName(child_rel),
+								attributeName)));
+
+			/*
+			 * Check child doesn't discard NOT NULL property.  (Other
+			 * constraints are checked elsewhere.)
+			 */
+			if (attribute->attnotnull && !childatt->attnotnull)
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("column \"%s\" in child table must be marked NOT NULL",
+								attributeName)));
+
+			/*
+			 * If parent column is generated, child column must be, too.
+			 */
+			if (attribute->attgenerated && !childatt->attgenerated)
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("column \"%s\" in child table must be a generated column",
+								attributeName)));
+
+			/*
+			 * Check that both generation expressions match.
+			 *
+			 * The test we apply is to see whether they reverse-compile to the
+			 * same source string.  This insulates us from issues like whether
+			 * attributes have the same physical column numbers in parent and
+			 * child relations.  (See also constraints_equivalent().)
+			 */
+			if (attribute->attgenerated && childatt->attgenerated)
+			{
+				TupleConstr *child_constr = child_rel->rd_att->constr;
+				TupleConstr *parent_constr = parent_rel->rd_att->constr;
+				char	   *child_expr = NULL;
+				char	   *parent_expr = NULL;
+
+				Assert(child_constr != NULL);
+				Assert(parent_constr != NULL);
+
+				for (int i = 0; i < child_constr->num_defval; i++)
+				{
+					if (child_constr->defval[i].adnum == childatt->attnum)
+					{
+						child_expr =
+							TextDatumGetCString(DirectFunctionCall2(pg_get_expr,
+																	CStringGetTextDatum(child_constr->defval[i].adbin),
+																	ObjectIdGetDatum(child_rel->rd_id)));
+						break;
+					}
+				}
+				Assert(child_expr != NULL);
+
+				for (int i = 0; i < parent_constr->num_defval; i++)
+				{
+					if (parent_constr->defval[i].adnum == attribute->attnum)
+					{
+						parent_expr =
+							TextDatumGetCString(DirectFunctionCall2(pg_get_expr,
+																	CStringGetTextDatum(parent_constr->defval[i].adbin),
+																	ObjectIdGetDatum(parent_rel->rd_id)));
+						break;
+					}
+				}
+				Assert(parent_expr != NULL);
+
+				if (strcmp(child_expr, parent_expr) != 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_DATATYPE_MISMATCH),
+							 errmsg("column \"%s\" in child table has a conflicting generation expression",
+									attributeName)));
+			}
+
+			/*
+			 * OK, bump the child column's inheritance count.  (If we fail
+			 * later on, this change will just roll back.)
+			 */
+			childatt->attinhcount++;
+
+			/*
+			 * In case of partitions, we must enforce that value of attislocal
+			 * is same in all partitions. (Note: there are only inherited
+			 * attributes in partitions)
+			 */
+			if (child_is_partition)
+			{
+				Assert(childatt->attinhcount == 1);
+				childatt->attislocal = false;
+			}
+
+			CatalogTupleUpdate(attrrel, &tuple->t_self, tuple);
+			heap_freetuple(tuple);
+		}
+		else
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("child table is missing column \"%s\"",
+							attributeName)));
+		}
+	}
+
+	table_close(attrrel, RowExclusiveLock);
+}
+
+/*
+ * Check constraints in child table match up with constraints in parent,
+ * and increment their coninhcount.
+ *
+ * Constraints that are marked ONLY in the parent are ignored.
+ *
+ * Called by CreateInheritance
+ *
+ * Currently all constraints in parent must be present in the child. One day we
+ * may consider adding new constraints like CREATE TABLE does.
+ *
+ * XXX This is O(N^2) which may be an issue with tables with hundreds of
+ * constraints. As long as tables have more like 10 constraints it shouldn't be
+ * a problem though. Even 100 constraints ought not be the end of the world.
+ *
+ * XXX See MergeWithExistingConstraint too if you change this code.
+ */
+static void
+MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel)
+{
+	Relation	catalog_relation;
+	TupleDesc	tuple_desc;
+	SysScanDesc parent_scan;
+	ScanKeyData parent_key;
+	HeapTuple	parent_tuple;
+	bool		child_is_partition = false;
+
+	catalog_relation = table_open(ConstraintRelationId, RowExclusiveLock);
+	tuple_desc = RelationGetDescr(catalog_relation);
+
+	/* If parent_rel is a partitioned table, child_rel must be a partition */
+	if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		child_is_partition = true;
+
+	/* Outer loop scans through the parent's constraint definitions */
+	ScanKeyInit(&parent_key,
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(parent_rel)));
+	parent_scan = systable_beginscan(catalog_relation, ConstraintRelidTypidNameIndexId,
+									 true, NULL, 1, &parent_key);
+
+	while (HeapTupleIsValid(parent_tuple = systable_getnext(parent_scan)))
+	{
+		Form_pg_constraint parent_con = (Form_pg_constraint) GETSTRUCT(parent_tuple);
+		SysScanDesc child_scan;
+		ScanKeyData child_key;
+		HeapTuple	child_tuple;
+		bool		found = false;
+
+		if (parent_con->contype != CONSTRAINT_CHECK)
+			continue;
+
+		/* if the parent's constraint is marked NO INHERIT, it's not inherited */
+		if (parent_con->connoinherit)
+			continue;
+
+		/* Search for a child constraint matching this one */
+		ScanKeyInit(&child_key,
+					Anum_pg_constraint_conrelid,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(RelationGetRelid(child_rel)));
+		child_scan = systable_beginscan(catalog_relation, ConstraintRelidTypidNameIndexId,
+										true, NULL, 1, &child_key);
+
+		while (HeapTupleIsValid(child_tuple = systable_getnext(child_scan)))
+		{
+			Form_pg_constraint child_con = (Form_pg_constraint) GETSTRUCT(child_tuple);
+			HeapTuple	child_copy;
+
+			if (child_con->contype != CONSTRAINT_CHECK)
+				continue;
+
+			if (strcmp(NameStr(parent_con->conname),
+					   NameStr(child_con->conname)) != 0)
+				continue;
+
+			if (!constraints_equivalent(parent_tuple, child_tuple, tuple_desc))
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("child table \"%s\" has different definition for check constraint \"%s\"",
+								RelationGetRelationName(child_rel),
+								NameStr(parent_con->conname))));
+
+			/* If the child constraint is "no inherit" then cannot merge */
+			if (child_con->connoinherit)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("constraint \"%s\" conflicts with non-inherited constraint on child table \"%s\"",
+								NameStr(child_con->conname),
+								RelationGetRelationName(child_rel))));
+
+			/*
+			 * If the child constraint is "not valid" then cannot merge with a
+			 * valid parent constraint
+			 */
+			if (parent_con->convalidated && !child_con->convalidated)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("constraint \"%s\" conflicts with NOT VALID constraint on child table \"%s\"",
+								NameStr(child_con->conname),
+								RelationGetRelationName(child_rel))));
+
+			/*
+			 * OK, bump the child constraint's inheritance count.  (If we fail
+			 * later on, this change will just roll back.)
+			 */
+			child_copy = heap_copytuple(child_tuple);
+			child_con = (Form_pg_constraint) GETSTRUCT(child_copy);
+			child_con->coninhcount++;
+
+			/*
+			 * In case of partitions, an inherited constraint must be
+			 * inherited only once since it cannot have multiple parents and
+			 * it is never considered local.
+			 */
+			if (child_is_partition)
+			{
+				Assert(child_con->coninhcount == 1);
+				child_con->conislocal = false;
+			}
+
+			CatalogTupleUpdate(catalog_relation, &child_copy->t_self, child_copy);
+			heap_freetuple(child_copy);
+
+			found = true;
+			break;
+		}
+
+		systable_endscan(child_scan);
+
+		if (!found)
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("child table is missing constraint \"%s\"",
+							NameStr(parent_con->conname))));
+	}
+
+	systable_endscan(parent_scan);
+	table_close(catalog_relation, RowExclusiveLock);
+}
+
+/*
+ * ALTER TABLE NO INHERIT
+ *
+ * Return value is the address of the relation that is no longer parent.
+ */
+static ObjectAddress
+ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
+{
+	ObjectAddress address;
+	Relation	parent_rel;
+
+	if (rel->rd_rel->relispartition)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot change inheritance of a partition")));
+
+	/*
+	 * AccessShareLock on the parent is probably enough, seeing that DROP
+	 * TABLE doesn't lock parent tables at all.  We need some lock since we'll
+	 * be inspecting the parent's schema.
+	 */
+	parent_rel = table_openrv(parent, AccessShareLock);
+
+	/*
+	 * We don't bother to check ownership of the parent table --- ownership of
+	 * the child is presumed enough rights.
+	 */
+
+	/* Off to RemoveInheritance() where most of the work happens */
+	RemoveInheritance(rel, parent_rel, false);
+
+	ObjectAddressSet(address, RelationRelationId,
+					 RelationGetRelid(parent_rel));
+
+	/* keep our lock on the parent relation until commit */
+	table_close(parent_rel, NoLock);
+
+	return address;
+}
+
+/*
+ * MarkInheritDetached
+ *
+ * Set inhdetachpending for a partition, for ATExecDetachPartition
+ * in concurrent mode.  While at it, verify that no other partition is
+ * already pending detach.
+ */
+static void
+MarkInheritDetached(Relation child_rel, Relation parent_rel)
+{
+	Relation	catalogRelation;
+	SysScanDesc scan;
+	ScanKeyData key;
+	HeapTuple	inheritsTuple;
+	bool		found = false;
+
+	Assert(parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+
+	/*
+	 * Find pg_inherits entries by inhparent.  (We need to scan them all in
+	 * order to verify that no other partition is pending detach.)
+	 */
+	catalogRelation = table_open(InheritsRelationId, RowExclusiveLock);
+	ScanKeyInit(&key,
+				Anum_pg_inherits_inhparent,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(parent_rel)));
+	scan = systable_beginscan(catalogRelation, InheritsParentIndexId,
+							  true, NULL, 1, &key);
+
+	while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan)))
+	{
+		Form_pg_inherits inhForm;
+
+		inhForm = (Form_pg_inherits) GETSTRUCT(inheritsTuple);
+		if (inhForm->inhdetachpending)
+			ereport(ERROR,
+					errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					errmsg("partition \"%s\" already pending detach in partitioned table \"%s.%s\"",
+						   get_rel_name(inhForm->inhrelid),
+						   get_namespace_name(parent_rel->rd_rel->relnamespace),
+						   RelationGetRelationName(parent_rel)),
+					errhint("Use ALTER TABLE ... DETACH PARTITION ... FINALIZE to complete the pending detach operation."));
+
+		if (inhForm->inhrelid == RelationGetRelid(child_rel))
+		{
+			HeapTuple	newtup;
+
+			newtup = heap_copytuple(inheritsTuple);
+			((Form_pg_inherits) GETSTRUCT(newtup))->inhdetachpending = true;
+
+			CatalogTupleUpdate(catalogRelation,
+							   &inheritsTuple->t_self,
+							   newtup);
+			found = true;
+			heap_freetuple(newtup);
+			/* keep looking, to ensure we catch others pending detach */
+		}
+	}
+
+	/* Done */
+	systable_endscan(scan);
+	table_close(catalogRelation, RowExclusiveLock);
+
+	if (!found)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_TABLE),
+				 errmsg("relation \"%s\" is not a partition of relation \"%s\"",
+						RelationGetRelationName(child_rel),
+						RelationGetRelationName(parent_rel))));
+}
+
+/*
+ * RemoveInheritance
+ *
+ * Drop a parent from the child's parents. This just adjusts the attinhcount
+ * and attislocal of the columns and removes the pg_inherit and pg_depend
+ * entries.  expect_detached is passed down to DeleteInheritsTuple, q.v..
+ *
+ * If attinhcount goes to 0 then attislocal gets set to true. If it goes back
+ * up attislocal stays true, which means if a child is ever removed from a
+ * parent then its columns will never be automatically dropped which may
+ * surprise. But at least we'll never surprise by dropping columns someone
+ * isn't expecting to be dropped which would actually mean data loss.
+ *
+ * coninhcount and conislocal for inherited constraints are adjusted in
+ * exactly the same way.
+ *
+ * Common to ATExecDropInherit() and ATExecDetachPartition().
+ */
+static void
+RemoveInheritance(Relation child_rel, Relation parent_rel, bool expect_detached)
+{
+	Relation	catalogRelation;
+	SysScanDesc scan;
+	ScanKeyData key[3];
+	HeapTuple	attributeTuple,
+				constraintTuple;
+	List	   *connames;
+	bool		found;
+	bool		child_is_partition = false;
+
+	/* If parent_rel is a partitioned table, child_rel must be a partition */
+	if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		child_is_partition = true;
+
+	found = DeleteInheritsTuple(RelationGetRelid(child_rel),
+								RelationGetRelid(parent_rel),
+								expect_detached,
+								RelationGetRelationName(child_rel));
+	if (!found)
+	{
+		if (child_is_partition)
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_TABLE),
+					 errmsg("relation \"%s\" is not a partition of relation \"%s\"",
+							RelationGetRelationName(child_rel),
+							RelationGetRelationName(parent_rel))));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_TABLE),
+					 errmsg("relation \"%s\" is not a parent of relation \"%s\"",
+							RelationGetRelationName(parent_rel),
+							RelationGetRelationName(child_rel))));
+	}
+
+	/*
+	 * Search through child columns looking for ones matching parent rel
+	 */
+	catalogRelation = table_open(AttributeRelationId, RowExclusiveLock);
+	ScanKeyInit(&key[0],
+				Anum_pg_attribute_attrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(child_rel)));
+	scan = systable_beginscan(catalogRelation, AttributeRelidNumIndexId,
+							  true, NULL, 1, key);
+	while (HeapTupleIsValid(attributeTuple = systable_getnext(scan)))
+	{
+		Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attributeTuple);
+
+		/* Ignore if dropped or not inherited */
+		if (att->attisdropped)
+			continue;
+		if (att->attinhcount <= 0)
+			continue;
+
+		if (SearchSysCacheExistsAttName(RelationGetRelid(parent_rel),
+										NameStr(att->attname)))
+		{
+			/* Decrement inhcount and possibly set islocal to true */
+			HeapTuple	copyTuple = heap_copytuple(attributeTuple);
+			Form_pg_attribute copy_att = (Form_pg_attribute) GETSTRUCT(copyTuple);
+
+			copy_att->attinhcount--;
+			if (copy_att->attinhcount == 0)
+				copy_att->attislocal = true;
+
+			CatalogTupleUpdate(catalogRelation, &copyTuple->t_self, copyTuple);
+			heap_freetuple(copyTuple);
+		}
+	}
+	systable_endscan(scan);
+	table_close(catalogRelation, RowExclusiveLock);
+
+	/*
+	 * Likewise, find inherited check constraints and disinherit them. To do
+	 * this, we first need a list of the names of the parent's check
+	 * constraints.  (We cheat a bit by only checking for name matches,
+	 * assuming that the expressions will match.)
+	 */
+	catalogRelation = table_open(ConstraintRelationId, RowExclusiveLock);
+	ScanKeyInit(&key[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(parent_rel)));
+	scan = systable_beginscan(catalogRelation, ConstraintRelidTypidNameIndexId,
+							  true, NULL, 1, key);
+
+	connames = NIL;
+
+	while (HeapTupleIsValid(constraintTuple = systable_getnext(scan)))
+	{
+		Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(constraintTuple);
+
+		if (con->contype == CONSTRAINT_CHECK)
+			connames = lappend(connames, pstrdup(NameStr(con->conname)));
+	}
+
+	systable_endscan(scan);
+
+	/* Now scan the child's constraints */
+	ScanKeyInit(&key[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(child_rel)));
+	scan = systable_beginscan(catalogRelation, ConstraintRelidTypidNameIndexId,
+							  true, NULL, 1, key);
+
+	while (HeapTupleIsValid(constraintTuple = systable_getnext(scan)))
+	{
+		Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(constraintTuple);
+		bool		match;
+		ListCell   *lc;
+
+		if (con->contype != CONSTRAINT_CHECK)
+			continue;
+
+		match = false;
+		foreach(lc, connames)
+		{
+			if (strcmp(NameStr(con->conname), (char *) lfirst(lc)) == 0)
+			{
+				match = true;
+				break;
+			}
+		}
+
+		if (match)
+		{
+			/* Decrement inhcount and possibly set islocal to true */
+			HeapTuple	copyTuple = heap_copytuple(constraintTuple);
+			Form_pg_constraint copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple);
+
+			if (copy_con->coninhcount <= 0) /* shouldn't happen */
+				elog(ERROR, "relation %u has non-inherited constraint \"%s\"",
+					 RelationGetRelid(child_rel), NameStr(copy_con->conname));
+
+			copy_con->coninhcount--;
+			if (copy_con->coninhcount == 0)
+				copy_con->conislocal = true;
+
+			CatalogTupleUpdate(catalogRelation, &copyTuple->t_self, copyTuple);
+			heap_freetuple(copyTuple);
+		}
+	}
+
+	systable_endscan(scan);
+	table_close(catalogRelation, RowExclusiveLock);
+
+	drop_parent_dependency(RelationGetRelid(child_rel),
+						   RelationRelationId,
+						   RelationGetRelid(parent_rel),
+						   child_dependency_type(child_is_partition));
+
+	/*
+	 * Post alter hook of this inherits. Since object_access_hook doesn't take
+	 * multiple object identifiers, we relay oid of parent relation using
+	 * auxiliary_id argument.
+	 */
+	InvokeObjectPostAlterHookArg(InheritsRelationId,
+								 RelationGetRelid(child_rel), 0,
+								 RelationGetRelid(parent_rel), false);
+}
+
+/*
+ * Drop the dependency created by StoreCatalogInheritance1 (CREATE TABLE
+ * INHERITS/ALTER TABLE INHERIT -- refclassid will be RelationRelationId) or
+ * heap_create_with_catalog (CREATE TABLE OF/ALTER TABLE OF -- refclassid will
+ * be TypeRelationId).  There's no convenient way to do this, so go trawling
+ * through pg_depend.
+ */
+static void
+drop_parent_dependency(Oid relid, Oid refclassid, Oid refobjid,
+					   DependencyType deptype)
+{
+	Relation	catalogRelation;
+	SysScanDesc scan;
+	ScanKeyData key[3];
+	HeapTuple	depTuple;
+
+	catalogRelation = table_open(DependRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_classid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_objid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relid));
+	ScanKeyInit(&key[2],
+				Anum_pg_depend_objsubid,
+				BTEqualStrategyNumber, F_INT4EQ,
+				Int32GetDatum(0));
+
+	scan = systable_beginscan(catalogRelation, DependDependerIndexId, true,
+							  NULL, 3, key);
+
+	while (HeapTupleIsValid(depTuple = systable_getnext(scan)))
+	{
+		Form_pg_depend dep = (Form_pg_depend) GETSTRUCT(depTuple);
+
+		if (dep->refclassid == refclassid &&
+			dep->refobjid == refobjid &&
+			dep->refobjsubid == 0 &&
+			dep->deptype == deptype)
+			CatalogTupleDelete(catalogRelation, &depTuple->t_self);
+	}
+
+	systable_endscan(scan);
+	table_close(catalogRelation, RowExclusiveLock);
+}
+
+/*
+ * ALTER TABLE OF
+ *
+ * Attach a table to a composite type, as though it had been created with CREATE
+ * TABLE OF.  All attname, atttypid, atttypmod and attcollation must match.  The
+ * subject table must not have inheritance parents.  These restrictions ensure
+ * that you cannot create a configuration impossible with CREATE TABLE OF alone.
+ *
+ * The address of the type is returned.
+ */
+static ObjectAddress
+ATExecAddOf(Relation rel, const TypeName *ofTypename, LOCKMODE lockmode)
+{
+	Oid			relid = RelationGetRelid(rel);
+	Type		typetuple;
+	Form_pg_type typeform;
+	Oid			typeid;
+	Relation	inheritsRelation,
+				relationRelation;
+	SysScanDesc scan;
+	ScanKeyData key;
+	AttrNumber	table_attno,
+				type_attno;
+	TupleDesc	typeTupleDesc,
+				tableTupleDesc;
+	ObjectAddress tableobj,
+				typeobj;
+	HeapTuple	classtuple;
+
+	/* Validate the type. */
+	typetuple = typenameType(NULL, ofTypename, NULL);
+	check_of_type(typetuple);
+	typeform = (Form_pg_type) GETSTRUCT(typetuple);
+	typeid = typeform->oid;
+
+	/* Fail if the table has any inheritance parents. */
+	inheritsRelation = table_open(InheritsRelationId, AccessShareLock);
+	ScanKeyInit(&key,
+				Anum_pg_inherits_inhrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relid));
+	scan = systable_beginscan(inheritsRelation, InheritsRelidSeqnoIndexId,
+							  true, NULL, 1, &key);
+	if (HeapTupleIsValid(systable_getnext(scan)))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("typed tables cannot inherit")));
+	systable_endscan(scan);
+	table_close(inheritsRelation, AccessShareLock);
+
+	/*
+	 * Check the tuple descriptors for compatibility.  Unlike inheritance, we
+	 * require that the order also match.  However, attnotnull need not match.
+	 */
+	typeTupleDesc = lookup_rowtype_tupdesc(typeid, -1);
+	tableTupleDesc = RelationGetDescr(rel);
+	table_attno = 1;
+	for (type_attno = 1; type_attno <= typeTupleDesc->natts; type_attno++)
+	{
+		Form_pg_attribute type_attr,
+					table_attr;
+		const char *type_attname,
+				   *table_attname;
+
+		/* Get the next non-dropped type attribute. */
+		type_attr = TupleDescAttr(typeTupleDesc, type_attno - 1);
+		if (type_attr->attisdropped)
+			continue;
+		type_attname = NameStr(type_attr->attname);
+
+		/* Get the next non-dropped table attribute. */
+		do
+		{
+			if (table_attno > tableTupleDesc->natts)
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("table is missing column \"%s\"",
+								type_attname)));
+			table_attr = TupleDescAttr(tableTupleDesc, table_attno - 1);
+			table_attno++;
+		} while (table_attr->attisdropped);
+		table_attname = NameStr(table_attr->attname);
+
+		/* Compare name. */
+		if (strncmp(table_attname, type_attname, NAMEDATALEN) != 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("table has column \"%s\" where type requires \"%s\"",
+							table_attname, type_attname)));
+
+		/* Compare type. */
+		if (table_attr->atttypid != type_attr->atttypid ||
+			table_attr->atttypmod != type_attr->atttypmod ||
+			table_attr->attcollation != type_attr->attcollation)
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("table \"%s\" has different type for column \"%s\"",
+							RelationGetRelationName(rel), type_attname)));
+	}
+	ReleaseTupleDesc(typeTupleDesc);
+
+	/* Any remaining columns at the end of the table had better be dropped. */
+	for (; table_attno <= tableTupleDesc->natts; table_attno++)
+	{
+		Form_pg_attribute table_attr = TupleDescAttr(tableTupleDesc,
+													 table_attno - 1);
+
+		if (!table_attr->attisdropped)
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("table has extra column \"%s\"",
+							NameStr(table_attr->attname))));
+	}
+
+	/* If the table was already typed, drop the existing dependency. */
+	if (rel->rd_rel->reloftype)
+		drop_parent_dependency(relid, TypeRelationId, rel->rd_rel->reloftype,
+							   DEPENDENCY_NORMAL);
+
+	/* Record a dependency on the new type. */
+	tableobj.classId = RelationRelationId;
+	tableobj.objectId = relid;
+	tableobj.objectSubId = 0;
+	typeobj.classId = TypeRelationId;
+	typeobj.objectId = typeid;
+	typeobj.objectSubId = 0;
+	recordDependencyOn(&tableobj, &typeobj, DEPENDENCY_NORMAL);
+
+	/* Update pg_class.reloftype */
+	relationRelation = table_open(RelationRelationId, RowExclusiveLock);
+	classtuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(classtuple))
+		elog(ERROR, "cache lookup failed for relation %u", relid);
+	((Form_pg_class) GETSTRUCT(classtuple))->reloftype = typeid;
+	CatalogTupleUpdate(relationRelation, &classtuple->t_self, classtuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId, relid, 0);
+
+	heap_freetuple(classtuple);
+	table_close(relationRelation, RowExclusiveLock);
+
+	ReleaseSysCache(typetuple);
+
+	return typeobj;
+}
+
+/*
+ * ALTER TABLE NOT OF
+ *
+ * Detach a typed table from its originating type.  Just clear reloftype and
+ * remove the dependency.
+ */
+static void
+ATExecDropOf(Relation rel, LOCKMODE lockmode)
+{
+	Oid			relid = RelationGetRelid(rel);
+	Relation	relationRelation;
+	HeapTuple	tuple;
+
+	if (!OidIsValid(rel->rd_rel->reloftype))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a typed table",
+						RelationGetRelationName(rel))));
+
+	/*
+	 * We don't bother to check ownership of the type --- ownership of the
+	 * table is presumed enough rights.  No lock required on the type, either.
+	 */
+
+	drop_parent_dependency(relid, TypeRelationId, rel->rd_rel->reloftype,
+						   DEPENDENCY_NORMAL);
+
+	/* Clear pg_class.reloftype */
+	relationRelation = table_open(RelationRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u", relid);
+	((Form_pg_class) GETSTRUCT(tuple))->reloftype = InvalidOid;
+	CatalogTupleUpdate(relationRelation, &tuple->t_self, tuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId, relid, 0);
+
+	heap_freetuple(tuple);
+	table_close(relationRelation, RowExclusiveLock);
+}
+
+/*
+ * relation_mark_replica_identity: Update a table's replica identity
+ *
+ * Iff ri_type = REPLICA_IDENTITY_INDEX, indexOid must be the Oid of a suitable
+ * index. Otherwise, it must be InvalidOid.
+ *
+ * Caller had better hold an exclusive lock on the relation, as the results
+ * of running two of these concurrently wouldn't be pretty.
+ */
+static void
+relation_mark_replica_identity(Relation rel, char ri_type, Oid indexOid,
+							   bool is_internal)
+{
+	Relation	pg_index;
+	Relation	pg_class;
+	HeapTuple	pg_class_tuple;
+	HeapTuple	pg_index_tuple;
+	Form_pg_class pg_class_form;
+	Form_pg_index pg_index_form;
+	ListCell   *index;
+
+	/*
+	 * Check whether relreplident has changed, and update it if so.
+	 */
+	pg_class = table_open(RelationRelationId, RowExclusiveLock);
+	pg_class_tuple = SearchSysCacheCopy1(RELOID,
+										 ObjectIdGetDatum(RelationGetRelid(rel)));
+	if (!HeapTupleIsValid(pg_class_tuple))
+		elog(ERROR, "cache lookup failed for relation \"%s\"",
+			 RelationGetRelationName(rel));
+	pg_class_form = (Form_pg_class) GETSTRUCT(pg_class_tuple);
+	if (pg_class_form->relreplident != ri_type)
+	{
+		pg_class_form->relreplident = ri_type;
+		CatalogTupleUpdate(pg_class, &pg_class_tuple->t_self, pg_class_tuple);
+	}
+	table_close(pg_class, RowExclusiveLock);
+	heap_freetuple(pg_class_tuple);
+
+	/*
+	 * Update the per-index indisreplident flags correctly.
+	 */
+	pg_index = table_open(IndexRelationId, RowExclusiveLock);
+	foreach(index, RelationGetIndexList(rel))
+	{
+		Oid			thisIndexOid = lfirst_oid(index);
+		bool		dirty = false;
+
+		pg_index_tuple = SearchSysCacheCopy1(INDEXRELID,
+											 ObjectIdGetDatum(thisIndexOid));
+		if (!HeapTupleIsValid(pg_index_tuple))
+			elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
+		pg_index_form = (Form_pg_index) GETSTRUCT(pg_index_tuple);
+
+		if (thisIndexOid == indexOid)
+		{
+			/* Set the bit if not already set. */
+			if (!pg_index_form->indisreplident)
+			{
+				dirty = true;
+				pg_index_form->indisreplident = true;
+			}
+		}
+		else
+		{
+			/* Unset the bit if set. */
+			if (pg_index_form->indisreplident)
+			{
+				dirty = true;
+				pg_index_form->indisreplident = false;
+			}
+		}
+
+		if (dirty)
+		{
+			CatalogTupleUpdate(pg_index, &pg_index_tuple->t_self, pg_index_tuple);
+			InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
+										 InvalidOid, is_internal);
+
+			/*
+			 * Invalidate the relcache for the table, so that after we commit
+			 * all sessions will refresh the table's replica identity index
+			 * before attempting any UPDATE or DELETE on the table.  (If we
+			 * changed the table's pg_class row above, then a relcache inval
+			 * is already queued due to that; but we might not have.)
+			 */
+			CacheInvalidateRelcache(rel);
+		}
+		heap_freetuple(pg_index_tuple);
+	}
+
+	table_close(pg_index, RowExclusiveLock);
+}
+
+/*
+ * ALTER TABLE <name> REPLICA IDENTITY ...
+ */
+static void
+ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKMODE lockmode)
+{
+	Oid			indexOid;
+	Relation	indexRel;
+	int			key;
+
+	if (stmt->identity_type == REPLICA_IDENTITY_DEFAULT)
+	{
+		relation_mark_replica_identity(rel, stmt->identity_type, InvalidOid, true);
+		return;
+	}
+	else if (stmt->identity_type == REPLICA_IDENTITY_FULL)
+	{
+		relation_mark_replica_identity(rel, stmt->identity_type, InvalidOid, true);
+		return;
+	}
+	else if (stmt->identity_type == REPLICA_IDENTITY_NOTHING)
+	{
+		relation_mark_replica_identity(rel, stmt->identity_type, InvalidOid, true);
+		return;
+	}
+	else if (stmt->identity_type == REPLICA_IDENTITY_INDEX)
+	{
+		 /* fallthrough */ ;
+	}
+	else
+		elog(ERROR, "unexpected identity type %u", stmt->identity_type);
+
+	/* Check that the index exists */
+	indexOid = get_relname_relid(stmt->name, rel->rd_rel->relnamespace);
+	if (!OidIsValid(indexOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("index \"%s\" for table \"%s\" does not exist",
+						stmt->name, RelationGetRelationName(rel))));
+
+	indexRel = index_open(indexOid, ShareLock);
+
+	/* Check that the index is on the relation we're altering. */
+	if (indexRel->rd_index == NULL ||
+		indexRel->rd_index->indrelid != RelationGetRelid(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not an index for table \"%s\"",
+						RelationGetRelationName(indexRel),
+						RelationGetRelationName(rel))));
+	/* The AM must support uniqueness, and the index must in fact be unique. */
+	if (!indexRel->rd_indam->amcanunique ||
+		!indexRel->rd_index->indisunique)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot use non-unique index \"%s\" as replica identity",
+						RelationGetRelationName(indexRel))));
+	/* Deferred indexes are not guaranteed to be always unique. */
+	if (!indexRel->rd_index->indimmediate)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot use non-immediate index \"%s\" as replica identity",
+						RelationGetRelationName(indexRel))));
+	/* Expression indexes aren't supported. */
+	if (RelationGetIndexExpressions(indexRel) != NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot use expression index \"%s\" as replica identity",
+						RelationGetRelationName(indexRel))));
+	/* Predicate indexes aren't supported. */
+	if (RelationGetIndexPredicate(indexRel) != NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot use partial index \"%s\" as replica identity",
+						RelationGetRelationName(indexRel))));
+
+	/* Check index for nullable columns. */
+	for (key = 0; key < IndexRelationGetNumberOfKeyAttributes(indexRel); key++)
+	{
+		int16		attno = indexRel->rd_index->indkey.values[key];
+		Form_pg_attribute attr;
+
+		/*
+		 * Reject any other system columns.  (Going forward, we'll disallow
+		 * indexes containing such columns in the first place, but they might
+		 * exist in older branches.)
+		 */
+		if (attno <= 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+					 errmsg("index \"%s\" cannot be used as replica identity because column %d is a system column",
+							RelationGetRelationName(indexRel), attno)));
+
+		attr = TupleDescAttr(rel->rd_att, attno - 1);
+		if (!attr->attnotnull)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("index \"%s\" cannot be used as replica identity because column \"%s\" is nullable",
+							RelationGetRelationName(indexRel),
+							NameStr(attr->attname))));
+	}
+
+	/* This index is suitable for use as a replica identity. Mark it. */
+	relation_mark_replica_identity(rel, stmt->identity_type, indexOid, true);
+
+	index_close(indexRel, NoLock);
+}
+
+/*
+ * ALTER TABLE ENABLE/DISABLE ROW LEVEL SECURITY
+ */
+static void
+ATExecSetRowSecurity(Relation rel, bool rls)
+{
+	Relation	pg_class;
+	Oid			relid;
+	HeapTuple	tuple;
+
+	relid = RelationGetRelid(rel);
+
+	/* Pull the record for this relation and update it */
+	pg_class = table_open(RelationRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u", relid);
+
+	((Form_pg_class) GETSTRUCT(tuple))->relrowsecurity = rls;
+	CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
+
+	table_close(pg_class, RowExclusiveLock);
+	heap_freetuple(tuple);
+}
+
+/*
+ * ALTER TABLE FORCE/NO FORCE ROW LEVEL SECURITY
+ */
+static void
+ATExecForceNoForceRowSecurity(Relation rel, bool force_rls)
+{
+	Relation	pg_class;
+	Oid			relid;
+	HeapTuple	tuple;
+
+	relid = RelationGetRelid(rel);
+
+	pg_class = table_open(RelationRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u", relid);
+
+	((Form_pg_class) GETSTRUCT(tuple))->relforcerowsecurity = force_rls;
+	CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
+
+	table_close(pg_class, RowExclusiveLock);
+	heap_freetuple(tuple);
+}
+
+/*
+ * ALTER FOREIGN TABLE <name> OPTIONS (...)
+ */
+static void
+ATExecGenericOptions(Relation rel, List *options)
+{
+	Relation	ftrel;
+	ForeignServer *server;
+	ForeignDataWrapper *fdw;
+	HeapTuple	tuple;
+	bool		isnull;
+	Datum		repl_val[Natts_pg_foreign_table];
+	bool		repl_null[Natts_pg_foreign_table];
+	bool		repl_repl[Natts_pg_foreign_table];
+	Datum		datum;
+	Form_pg_foreign_table tableform;
+
+	if (options == NIL)
+		return;
+
+	ftrel = table_open(ForeignTableRelationId, RowExclusiveLock);
+
+	tuple = SearchSysCacheCopy1(FOREIGNTABLEREL, rel->rd_id);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("foreign table \"%s\" does not exist",
+						RelationGetRelationName(rel))));
+	tableform = (Form_pg_foreign_table) GETSTRUCT(tuple);
+	server = GetForeignServer(tableform->ftserver);
+	fdw = GetForeignDataWrapper(server->fdwid);
+
+	memset(repl_val, 0, sizeof(repl_val));
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+
+	/* Extract the current options */
+	datum = SysCacheGetAttr(FOREIGNTABLEREL,
+							tuple,
+							Anum_pg_foreign_table_ftoptions,
+							&isnull);
+	if (isnull)
+		datum = PointerGetDatum(NULL);
+
+	/* Transform the options */
+	datum = transformGenericOptions(ForeignTableRelationId,
+									datum,
+									options,
+									fdw->fdwvalidator);
+
+	if (PointerIsValid(DatumGetPointer(datum)))
+		repl_val[Anum_pg_foreign_table_ftoptions - 1] = datum;
+	else
+		repl_null[Anum_pg_foreign_table_ftoptions - 1] = true;
+
+	repl_repl[Anum_pg_foreign_table_ftoptions - 1] = true;
+
+	/* Everything looks good - update the tuple */
+
+	tuple = heap_modify_tuple(tuple, RelationGetDescr(ftrel),
+							  repl_val, repl_null, repl_repl);
+
+	CatalogTupleUpdate(ftrel, &tuple->t_self, tuple);
+
+	/*
+	 * Invalidate relcache so that all sessions will refresh any cached plans
+	 * that might depend on the old options.
+	 */
+	CacheInvalidateRelcache(rel);
+
+	InvokeObjectPostAlterHook(ForeignTableRelationId,
+							  RelationGetRelid(rel), 0);
+
+	table_close(ftrel, RowExclusiveLock);
+
+	heap_freetuple(tuple);
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET COMPRESSION
+ *
+ * Return value is the address of the modified column
+ */
+static ObjectAddress
+ATExecSetCompression(AlteredTableInfo *tab,
+					 Relation rel,
+					 const char *column,
+					 Node *newValue,
+					 LOCKMODE lockmode)
+{
+	Relation	attrel;
+	HeapTuple	tuple;
+	Form_pg_attribute atttableform;
+	AttrNumber	attnum;
+	char	   *compression;
+	char		cmethod;
+	ObjectAddress address;
+
+	Assert(IsA(newValue, String));
+	compression = strVal(newValue);
+
+	attrel = table_open(AttributeRelationId, RowExclusiveLock);
+
+	/* copy the cache entry so we can scribble on it below */
+	tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), column);
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column \"%s\" of relation \"%s\" does not exist",
+						column, RelationGetRelationName(rel))));
+
+	/* prevent them from altering a system attribute */
+	atttableform = (Form_pg_attribute) GETSTRUCT(tuple);
+	attnum = atttableform->attnum;
+	if (attnum <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot alter system column \"%s\"", column)));
+
+	/*
+	 * Check that column type is compressible, then get the attribute
+	 * compression method code
+	 */
+	cmethod = GetAttributeCompression(atttableform->atttypid, compression);
+
+	/* update pg_attribute entry */
+	atttableform->attcompression = cmethod;
+	CatalogTupleUpdate(attrel, &tuple->t_self, tuple);
+
+	InvokeObjectPostAlterHook(RelationRelationId,
+							  RelationGetRelid(rel),
+							  attnum);
+
+	/*
+	 * Apply the change to indexes as well (only for simple index columns,
+	 * matching behavior of index.c ConstructTupleDescriptor()).
+	 */
+	SetIndexStorageProperties(rel, attrel, attnum,
+							  false, 0,
+							  true, cmethod,
+							  lockmode);
+
+	heap_freetuple(tuple);
+
+	table_close(attrel, RowExclusiveLock);
+
+	/* make changes visible */
+	CommandCounterIncrement();
+
+	ObjectAddressSubSet(address, RelationRelationId,
+						RelationGetRelid(rel), attnum);
+	return address;
+}
+
+
+/*
+ * Preparation phase for SET LOGGED/UNLOGGED
+ *
+ * This verifies that we're not trying to change a temp table.  Also,
+ * existing foreign key constraints are checked to avoid ending up with
+ * permanent tables referencing unlogged tables.
+ *
+ * Return value is false if the operation is a no-op (in which case the
+ * checks are skipped), otherwise true.
+ */
+static bool
+ATPrepChangePersistence(Relation rel, bool toLogged)
+{
+	Relation	pg_constraint;
+	HeapTuple	tuple;
+	SysScanDesc scan;
+	ScanKeyData skey[1];
+
+	/*
+	 * Disallow changing status for a temp table.  Also verify whether we can
+	 * get away with doing nothing; in such cases we don't need to run the
+	 * checks below, either.
+	 */
+	switch (rel->rd_rel->relpersistence)
+	{
+		case RELPERSISTENCE_TEMP:
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot change logged status of table \"%s\" because it is temporary",
+							RelationGetRelationName(rel)),
+					 errtable(rel)));
+			break;
+		case RELPERSISTENCE_PERMANENT:
+			if (toLogged)
+				/* nothing to do */
+				return false;
+			break;
+		case RELPERSISTENCE_UNLOGGED:
+			if (!toLogged)
+				/* nothing to do */
+				return false;
+			break;
+	}
+
+	/*
+	 * Check that the table is not part any publication when changing to
+	 * UNLOGGED as UNLOGGED tables can't be published.
+	 */
+	if (!toLogged &&
+		list_length(GetRelationPublications(RelationGetRelid(rel))) > 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("cannot change table \"%s\" to unlogged because it is part of a publication",
+						RelationGetRelationName(rel)),
+				 errdetail("Unlogged relations cannot be replicated.")));
+
+	/*
+	 * Check existing foreign key constraints to preserve the invariant that
+	 * permanent tables cannot reference unlogged ones.  Self-referencing
+	 * foreign keys can safely be ignored.
+	 */
+	pg_constraint = table_open(ConstraintRelationId, AccessShareLock);
+
+	/*
+	 * Scan conrelid if changing to permanent, else confrelid.  This also
+	 * determines whether a useful index exists.
+	 */
+	ScanKeyInit(&skey[0],
+				toLogged ? Anum_pg_constraint_conrelid :
+				Anum_pg_constraint_confrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	scan = systable_beginscan(pg_constraint,
+							  toLogged ? ConstraintRelidTypidNameIndexId : InvalidOid,
+							  true, NULL, 1, skey);
+
+	while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+	{
+		Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple);
+
+		if (con->contype == CONSTRAINT_FOREIGN)
+		{
+			Oid			foreignrelid;
+			Relation	foreignrel;
+
+			/* the opposite end of what we used as scankey */
+			foreignrelid = toLogged ? con->confrelid : con->conrelid;
+
+			/* ignore if self-referencing */
+			if (RelationGetRelid(rel) == foreignrelid)
+				continue;
+
+			foreignrel = relation_open(foreignrelid, AccessShareLock);
+
+			if (toLogged)
+			{
+				if (!RelationIsPermanent(foreignrel))
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+							 errmsg("could not change table \"%s\" to logged because it references unlogged table \"%s\"",
+									RelationGetRelationName(rel),
+									RelationGetRelationName(foreignrel)),
+							 errtableconstraint(rel, NameStr(con->conname))));
+			}
+			else
+			{
+				if (RelationIsPermanent(foreignrel))
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+							 errmsg("could not change table \"%s\" to unlogged because it references logged table \"%s\"",
+									RelationGetRelationName(rel),
+									RelationGetRelationName(foreignrel)),
+							 errtableconstraint(rel, NameStr(con->conname))));
+			}
+
+			relation_close(foreignrel, AccessShareLock);
+		}
+	}
+
+	systable_endscan(scan);
+
+	table_close(pg_constraint, AccessShareLock);
+
+	return true;
+}
+
+/*
+ * Execute ALTER TABLE SET SCHEMA
+ */
+ObjectAddress
+AlterTableNamespace(AlterObjectSchemaStmt *stmt, Oid *oldschema)
+{
+	Relation	rel;
+	Oid			relid;
+	Oid			oldNspOid;
+	Oid			nspOid;
+	RangeVar   *newrv;
+	ObjectAddresses *objsMoved;
+	ObjectAddress myself;
+
+	relid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock,
+									 stmt->missing_ok ? RVR_MISSING_OK : 0,
+									 RangeVarCallbackForAlterRelation,
+									 (void *) stmt);
+
+	if (!OidIsValid(relid))
+	{
+		ereport(NOTICE,
+				(errmsg("relation \"%s\" does not exist, skipping",
+						stmt->relation->relname)));
+		return InvalidObjectAddress;
+	}
+
+	rel = relation_open(relid, NoLock);
+
+	oldNspOid = RelationGetNamespace(rel);
+
+	/* If it's an owned sequence, disallow moving it by itself. */
+	if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
+	{
+		Oid			tableId;
+		int32		colId;
+
+		if (sequenceIsOwned(relid, DEPENDENCY_AUTO, &tableId, &colId) ||
+			sequenceIsOwned(relid, DEPENDENCY_INTERNAL, &tableId, &colId))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot move an owned sequence into another schema"),
+					 errdetail("Sequence \"%s\" is linked to table \"%s\".",
+							   RelationGetRelationName(rel),
+							   get_rel_name(tableId))));
+	}
+
+	/* Get and lock schema OID and check its permissions. */
+	newrv = makeRangeVar(stmt->newschema, RelationGetRelationName(rel), -1);
+	nspOid = RangeVarGetAndCheckCreationNamespace(newrv, NoLock, NULL);
+
+	/* common checks on switching namespaces */
+	CheckSetNamespace(oldNspOid, nspOid);
+
+	objsMoved = new_object_addresses();
+	AlterTableNamespaceInternal(rel, oldNspOid, nspOid, objsMoved);
+	free_object_addresses(objsMoved);
+
+	ObjectAddressSet(myself, RelationRelationId, relid);
+
+	if (oldschema)
+		*oldschema = oldNspOid;
+
+	/* close rel, but keep lock until commit */
+	relation_close(rel, NoLock);
+
+	return myself;
+}
+
+/*
+ * The guts of relocating a table or materialized view to another namespace:
+ * besides moving the relation itself, its dependent objects are relocated to
+ * the new schema.
+ */
+void
+AlterTableNamespaceInternal(Relation rel, Oid oldNspOid, Oid nspOid,
+							ObjectAddresses *objsMoved)
+{
+	Relation	classRel;
+
+	Assert(objsMoved != NULL);
+
+	/* OK, modify the pg_class row and pg_depend entry */
+	classRel = table_open(RelationRelationId, RowExclusiveLock);
+
+	AlterRelationNamespaceInternal(classRel, RelationGetRelid(rel), oldNspOid,
+								   nspOid, true, objsMoved);
+
+	/* Fix the table's row type too, if it has one */
+	if (OidIsValid(rel->rd_rel->reltype))
+		AlterTypeNamespaceInternal(rel->rd_rel->reltype,
+								   nspOid, false, false, objsMoved);
+
+	/* Fix other dependent stuff */
+	if (rel->rd_rel->relkind == RELKIND_RELATION ||
+		rel->rd_rel->relkind == RELKIND_MATVIEW ||
+		rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		AlterIndexNamespaces(classRel, rel, oldNspOid, nspOid, objsMoved);
+		AlterSeqNamespaces(classRel, rel, oldNspOid, nspOid,
+						   objsMoved, AccessExclusiveLock);
+		AlterConstraintNamespaces(RelationGetRelid(rel), oldNspOid, nspOid,
+								  false, objsMoved);
+	}
+
+	table_close(classRel, RowExclusiveLock);
+}
+
+/*
+ * The guts of relocating a relation to another namespace: fix the pg_class
+ * entry, and the pg_depend entry if any.  Caller must already have
+ * opened and write-locked pg_class.
+ */
+void
+AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
+							   Oid oldNspOid, Oid newNspOid,
+							   bool hasDependEntry,
+							   ObjectAddresses *objsMoved)
+{
+	HeapTuple	classTup;
+	Form_pg_class classForm;
+	ObjectAddress thisobj;
+	bool		already_done = false;
+
+	classTup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relOid));
+	if (!HeapTupleIsValid(classTup))
+		elog(ERROR, "cache lookup failed for relation %u", relOid);
+	classForm = (Form_pg_class) GETSTRUCT(classTup);
+
+	Assert(classForm->relnamespace == oldNspOid);
+
+	thisobj.classId = RelationRelationId;
+	thisobj.objectId = relOid;
+	thisobj.objectSubId = 0;
+
+	/*
+	 * If the object has already been moved, don't move it again.  If it's
+	 * already in the right place, don't move it, but still fire the object
+	 * access hook.
+	 */
+	already_done = object_address_present(&thisobj, objsMoved);
+	if (!already_done && oldNspOid != newNspOid)
+	{
+		/* check for duplicate name (more friendly than unique-index failure) */
+		if (get_relname_relid(NameStr(classForm->relname),
+							  newNspOid) != InvalidOid)
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_TABLE),
+					 errmsg("relation \"%s\" already exists in schema \"%s\"",
+							NameStr(classForm->relname),
+							get_namespace_name(newNspOid))));
+
+		/* classTup is a copy, so OK to scribble on */
+		classForm->relnamespace = newNspOid;
+
+		CatalogTupleUpdate(classRel, &classTup->t_self, classTup);
+
+		/* Update dependency on schema if caller said so */
+		if (hasDependEntry &&
+			changeDependencyFor(RelationRelationId,
+								relOid,
+								NamespaceRelationId,
+								oldNspOid,
+								newNspOid) != 1)
+			elog(ERROR, "failed to change schema dependency for relation \"%s\"",
+				 NameStr(classForm->relname));
+	}
+	if (!already_done)
+	{
+		add_exact_object_address(&thisobj, objsMoved);
+
+		InvokeObjectPostAlterHook(RelationRelationId, relOid, 0);
+	}
+
+	heap_freetuple(classTup);
+}
+
+/*
+ * Move all indexes for the specified relation to another namespace.
+ *
+ * Note: we assume adequate permission checking was done by the caller,
+ * and that the caller has a suitable lock on the owning relation.
+ */
+static void
+AlterIndexNamespaces(Relation classRel, Relation rel,
+					 Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved)
+{
+	List	   *indexList;
+	ListCell   *l;
+
+	indexList = RelationGetIndexList(rel);
+
+	foreach(l, indexList)
+	{
+		Oid			indexOid = lfirst_oid(l);
+		ObjectAddress thisobj;
+
+		thisobj.classId = RelationRelationId;
+		thisobj.objectId = indexOid;
+		thisobj.objectSubId = 0;
+
+		/*
+		 * Note: currently, the index will not have its own dependency on the
+		 * namespace, so we don't need to do changeDependencyFor(). There's no
+		 * row type in pg_type, either.
+		 *
+		 * XXX this objsMoved test may be pointless -- surely we have a single
+		 * dependency link from a relation to each index?
+		 */
+		if (!object_address_present(&thisobj, objsMoved))
+		{
+			AlterRelationNamespaceInternal(classRel, indexOid,
+										   oldNspOid, newNspOid,
+										   false, objsMoved);
+			add_exact_object_address(&thisobj, objsMoved);
+		}
+	}
+
+	list_free(indexList);
+}
+
+/*
+ * Move all identity and SERIAL-column sequences of the specified relation to another
+ * namespace.
+ *
+ * Note: we assume adequate permission checking was done by the caller,
+ * and that the caller has a suitable lock on the owning relation.
+ */
+static void
+AlterSeqNamespaces(Relation classRel, Relation rel,
+				   Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved,
+				   LOCKMODE lockmode)
+{
+	Relation	depRel;
+	SysScanDesc scan;
+	ScanKeyData key[2];
+	HeapTuple	tup;
+
+	/*
+	 * SERIAL sequences are those having an auto dependency on one of the
+	 * table's columns (we don't care *which* column, exactly).
+	 */
+	depRel = table_open(DependRelationId, AccessShareLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_refclassid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_refobjid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	/* we leave refobjsubid unspecified */
+
+	scan = systable_beginscan(depRel, DependReferenceIndexId, true,
+							  NULL, 2, key);
+
+	while (HeapTupleIsValid(tup = systable_getnext(scan)))
+	{
+		Form_pg_depend depForm = (Form_pg_depend) GETSTRUCT(tup);
+		Relation	seqRel;
+
+		/* skip dependencies other than auto dependencies on columns */
+		if (depForm->refobjsubid == 0 ||
+			depForm->classid != RelationRelationId ||
+			depForm->objsubid != 0 ||
+			!(depForm->deptype == DEPENDENCY_AUTO || depForm->deptype == DEPENDENCY_INTERNAL))
+			continue;
+
+		/* Use relation_open just in case it's an index */
+		seqRel = relation_open(depForm->objid, lockmode);
+
+		/* skip non-sequence relations */
+		if (RelationGetForm(seqRel)->relkind != RELKIND_SEQUENCE)
+		{
+			/* No need to keep the lock */
+			relation_close(seqRel, lockmode);
+			continue;
+		}
+
+		/* Fix the pg_class and pg_depend entries */
+		AlterRelationNamespaceInternal(classRel, depForm->objid,
+									   oldNspOid, newNspOid,
+									   true, objsMoved);
+
+		/*
+		 * Sequences used to have entries in pg_type, but no longer do.  If we
+		 * ever re-instate that, we'll need to move the pg_type entry to the
+		 * new namespace, too (using AlterTypeNamespaceInternal).
+		 */
+		Assert(RelationGetForm(seqRel)->reltype == InvalidOid);
+
+		/* Now we can close it.  Keep the lock till end of transaction. */
+		relation_close(seqRel, NoLock);
+	}
+
+	systable_endscan(scan);
+
+	relation_close(depRel, AccessShareLock);
+}
+
+
+/*
+ * This code supports
+ *	CREATE TEMP TABLE ... ON COMMIT { DROP | PRESERVE ROWS | DELETE ROWS }
+ *
+ * Because we only support this for TEMP tables, it's sufficient to remember
+ * the state in a backend-local data structure.
+ */
+
+/*
+ * Register a newly-created relation's ON COMMIT action.
+ */
+void
+register_on_commit_action(Oid relid, OnCommitAction action)
+{
+	OnCommitItem *oc;
+	MemoryContext oldcxt;
+
+	/*
+	 * We needn't bother registering the relation unless there is an ON COMMIT
+	 * action we need to take.
+	 */
+	if (action == ONCOMMIT_NOOP || action == ONCOMMIT_PRESERVE_ROWS)
+		return;
+
+	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+
+	oc = (OnCommitItem *) palloc(sizeof(OnCommitItem));
+	oc->relid = relid;
+	oc->oncommit = action;
+	oc->creating_subid = GetCurrentSubTransactionId();
+	oc->deleting_subid = InvalidSubTransactionId;
+
+	/*
+	 * We use lcons() here so that ON COMMIT actions are processed in reverse
+	 * order of registration.  That might not be essential but it seems
+	 * reasonable.
+	 */
+	on_commits = lcons(oc, on_commits);
+
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * Unregister any ON COMMIT action when a relation is deleted.
+ *
+ * Actually, we only mark the OnCommitItem entry as to be deleted after commit.
+ */
+void
+remove_on_commit_action(Oid relid)
+{
+	ListCell   *l;
+
+	foreach(l, on_commits)
+	{
+		OnCommitItem *oc = (OnCommitItem *) lfirst(l);
+
+		if (oc->relid == relid)
+		{
+			oc->deleting_subid = GetCurrentSubTransactionId();
+			break;
+		}
+	}
+}
+
+/*
+ * Perform ON COMMIT actions.
+ *
+ * This is invoked just before actually committing, since it's possible
+ * to encounter errors.
+ */
+void
+PreCommit_on_commit_actions(void)
+{
+	ListCell   *l;
+	List	   *oids_to_truncate = NIL;
+	List	   *oids_to_drop = NIL;
+
+	foreach(l, on_commits)
+	{
+		OnCommitItem *oc = (OnCommitItem *) lfirst(l);
+
+		/* Ignore entry if already dropped in this xact */
+		if (oc->deleting_subid != InvalidSubTransactionId)
+			continue;
+
+		switch (oc->oncommit)
+		{
+			case ONCOMMIT_NOOP:
+			case ONCOMMIT_PRESERVE_ROWS:
+				/* Do nothing (there shouldn't be such entries, actually) */
+				break;
+			case ONCOMMIT_DELETE_ROWS:
+
+				/*
+				 * If this transaction hasn't accessed any temporary
+				 * relations, we can skip truncating ON COMMIT DELETE ROWS
+				 * tables, as they must still be empty.
+				 */
+				if ((MyXactFlags & XACT_FLAGS_ACCESSEDTEMPNAMESPACE))
+					oids_to_truncate = lappend_oid(oids_to_truncate, oc->relid);
+				break;
+			case ONCOMMIT_DROP:
+				oids_to_drop = lappend_oid(oids_to_drop, oc->relid);
+				break;
+		}
+	}
+
+	/*
+	 * Truncate relations before dropping so that all dependencies between
+	 * relations are removed after they are worked on.  Doing it like this
+	 * might be a waste as it is possible that a relation being truncated will
+	 * be dropped anyway due to its parent being dropped, but this makes the
+	 * code more robust because of not having to re-check that the relation
+	 * exists at truncation time.
+	 */
+	if (oids_to_truncate != NIL)
+		heap_truncate(oids_to_truncate);
+
+	if (oids_to_drop != NIL)
+	{
+		ObjectAddresses *targetObjects = new_object_addresses();
+		ListCell   *l;
+
+		foreach(l, oids_to_drop)
+		{
+			ObjectAddress object;
+
+			object.classId = RelationRelationId;
+			object.objectId = lfirst_oid(l);
+			object.objectSubId = 0;
+
+			Assert(!object_address_present(&object, targetObjects));
+
+			add_exact_object_address(&object, targetObjects);
+		}
+
+		/*
+		 * Object deletion might involve toast table access (to clean up
+		 * toasted catalog entries), so ensure we have a valid snapshot.
+		 */
+		PushActiveSnapshot(GetTransactionSnapshot());
+
+		/*
+		 * Since this is an automatic drop, rather than one directly initiated
+		 * by the user, we pass the PERFORM_DELETION_INTERNAL flag.
+		 */
+		performMultipleDeletions(targetObjects, DROP_CASCADE,
+								 PERFORM_DELETION_INTERNAL | PERFORM_DELETION_QUIETLY);
+
+		PopActiveSnapshot();
+
+#ifdef USE_ASSERT_CHECKING
+
+		/*
+		 * Note that table deletion will call remove_on_commit_action, so the
+		 * entry should get marked as deleted.
+		 */
+		foreach(l, on_commits)
+		{
+			OnCommitItem *oc = (OnCommitItem *) lfirst(l);
+
+			if (oc->oncommit != ONCOMMIT_DROP)
+				continue;
+
+			Assert(oc->deleting_subid != InvalidSubTransactionId);
+		}
+#endif
+	}
+}
+
+/*
+ * Post-commit or post-abort cleanup for ON COMMIT management.
+ *
+ * All we do here is remove no-longer-needed OnCommitItem entries.
+ *
+ * During commit, remove entries that were deleted during this transaction;
+ * during abort, remove those created during this transaction.
+ */
+void
+AtEOXact_on_commit_actions(bool isCommit)
+{
+	ListCell   *cur_item;
+
+	foreach(cur_item, on_commits)
+	{
+		OnCommitItem *oc = (OnCommitItem *) lfirst(cur_item);
+
+		if (isCommit ? oc->deleting_subid != InvalidSubTransactionId :
+			oc->creating_subid != InvalidSubTransactionId)
+		{
+			/* cur_item must be removed */
+			on_commits = foreach_delete_current(on_commits, cur_item);
+			pfree(oc);
+		}
+		else
+		{
+			/* cur_item must be preserved */
+			oc->creating_subid = InvalidSubTransactionId;
+			oc->deleting_subid = InvalidSubTransactionId;
+		}
+	}
+}
+
+/*
+ * Post-subcommit or post-subabort cleanup for ON COMMIT management.
+ *
+ * During subabort, we can immediately remove entries created during this
+ * subtransaction.  During subcommit, just relabel entries marked during
+ * this subtransaction as being the parent's responsibility.
+ */
+void
+AtEOSubXact_on_commit_actions(bool isCommit, SubTransactionId mySubid,
+							  SubTransactionId parentSubid)
+{
+	ListCell   *cur_item;
+
+	foreach(cur_item, on_commits)
+	{
+		OnCommitItem *oc = (OnCommitItem *) lfirst(cur_item);
+
+		if (!isCommit && oc->creating_subid == mySubid)
+		{
+			/* cur_item must be removed */
+			on_commits = foreach_delete_current(on_commits, cur_item);
+			pfree(oc);
+		}
+		else
+		{
+			/* cur_item must be preserved */
+			if (oc->creating_subid == mySubid)
+				oc->creating_subid = parentSubid;
+			if (oc->deleting_subid == mySubid)
+				oc->deleting_subid = isCommit ? parentSubid : InvalidSubTransactionId;
+		}
+	}
+}
+
+/*
+ * This is intended as a callback for RangeVarGetRelidExtended().  It allows
+ * the relation to be locked only if (1) it's a plain or partitioned table,
+ * materialized view, or TOAST table and (2) the current user is the owner (or
+ * the superuser).  This meets the permission-checking needs of CLUSTER,
+ * REINDEX TABLE, and REFRESH MATERIALIZED VIEW; we expose it here so that it
+ * can be used by all.
+ */
+void
+RangeVarCallbackOwnsTable(const RangeVar *relation,
+						  Oid relId, Oid oldRelId, void *arg)
+{
+	char		relkind;
+
+	/* Nothing to do if the relation was not found. */
+	if (!OidIsValid(relId))
+		return;
+
+	/*
+	 * If the relation does exist, check whether it's an index.  But note that
+	 * the relation might have been dropped between the time we did the name
+	 * lookup and now.  In that case, there's nothing to do.
+	 */
+	relkind = get_rel_relkind(relId);
+	if (!relkind)
+		return;
+	if (relkind != RELKIND_RELATION && relkind != RELKIND_TOASTVALUE &&
+		relkind != RELKIND_MATVIEW && relkind != RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a table or materialized view", relation->relname)));
+
+	/* Check permissions */
+	if (!pg_class_ownercheck(relId, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relId)), relation->relname);
+}
+
+/*
+ * Callback to RangeVarGetRelidExtended() for TRUNCATE processing.
+ */
+static void
+RangeVarCallbackForTruncate(const RangeVar *relation,
+							Oid relId, Oid oldRelId, void *arg)
+{
+	HeapTuple	tuple;
+
+	/* Nothing to do if the relation was not found. */
+	if (!OidIsValid(relId))
+		return;
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relId));
+	if (!HeapTupleIsValid(tuple))	/* should not happen */
+		elog(ERROR, "cache lookup failed for relation %u", relId);
+
+	truncate_check_rel(relId, (Form_pg_class) GETSTRUCT(tuple));
+	truncate_check_perms(relId, (Form_pg_class) GETSTRUCT(tuple));
+
+	ReleaseSysCache(tuple);
+}
+
+/*
+ * Callback to RangeVarGetRelidExtended(), similar to
+ * RangeVarCallbackOwnsTable() but without checks on the type of the relation.
+ */
+void
+RangeVarCallbackOwnsRelation(const RangeVar *relation,
+							 Oid relId, Oid oldRelId, void *arg)
+{
+	HeapTuple	tuple;
+
+	/* Nothing to do if the relation was not found. */
+	if (!OidIsValid(relId))
+		return;
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relId));
+	if (!HeapTupleIsValid(tuple))	/* should not happen */
+		elog(ERROR, "cache lookup failed for relation %u", relId);
+
+	if (!pg_class_ownercheck(relId, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relId)),
+					   relation->relname);
+
+	if (!allowSystemTableMods &&
+		IsSystemClass(relId, (Form_pg_class) GETSTRUCT(tuple)))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						relation->relname)));
+
+	ReleaseSysCache(tuple);
+}
+
+/*
+ * Common RangeVarGetRelid callback for rename, set schema, and alter table
+ * processing.
+ */
+static void
+RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid,
+								 void *arg)
+{
+	Node	   *stmt = (Node *) arg;
+	ObjectType	reltype;
+	HeapTuple	tuple;
+	Form_pg_class classform;
+	AclResult	aclresult;
+	char		relkind;
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(tuple))
+		return;					/* concurrently dropped */
+	classform = (Form_pg_class) GETSTRUCT(tuple);
+	relkind = classform->relkind;
+
+	/* Must own relation. */
+	if (!pg_class_ownercheck(relid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relid)), rv->relname);
+
+	/* No system table modifications unless explicitly allowed. */
+	if (!allowSystemTableMods && IsSystemClass(relid, classform))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						rv->relname)));
+
+	/*
+	 * Extract the specified relation type from the statement parse tree.
+	 *
+	 * Also, for ALTER .. RENAME, check permissions: the user must (still)
+	 * have CREATE rights on the containing namespace.
+	 */
+	if (IsA(stmt, RenameStmt))
+	{
+		aclresult = pg_namespace_aclcheck(classform->relnamespace,
+										  GetUserId(), ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_SCHEMA,
+						   get_namespace_name(classform->relnamespace));
+		reltype = ((RenameStmt *) stmt)->renameType;
+	}
+	else if (IsA(stmt, AlterObjectSchemaStmt))
+		reltype = ((AlterObjectSchemaStmt *) stmt)->objectType;
+
+	else if (IsA(stmt, AlterTableStmt))
+		reltype = ((AlterTableStmt *) stmt)->objtype;
+	else
+	{
+		elog(ERROR, "unrecognized node type: %d", (int) nodeTag(stmt));
+		reltype = OBJECT_TABLE; /* placate compiler */
+	}
+
+	/*
+	 * For compatibility with prior releases, we allow ALTER TABLE to be used
+	 * with most other types of relations (but not composite types). We allow
+	 * similar flexibility for ALTER INDEX in the case of RENAME, but not
+	 * otherwise.  Otherwise, the user must select the correct form of the
+	 * command for the relation at issue.
+	 */
+	if (reltype == OBJECT_SEQUENCE && relkind != RELKIND_SEQUENCE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a sequence", rv->relname)));
+
+	if (reltype == OBJECT_VIEW && relkind != RELKIND_VIEW)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a view", rv->relname)));
+
+	if (reltype == OBJECT_MATVIEW && relkind != RELKIND_MATVIEW)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a materialized view", rv->relname)));
+
+	if (reltype == OBJECT_FOREIGN_TABLE && relkind != RELKIND_FOREIGN_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a foreign table", rv->relname)));
+
+	if (reltype == OBJECT_TYPE && relkind != RELKIND_COMPOSITE_TYPE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a composite type", rv->relname)));
+
+	if (reltype == OBJECT_INDEX && relkind != RELKIND_INDEX &&
+		relkind != RELKIND_PARTITIONED_INDEX
+		&& !IsA(stmt, RenameStmt))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not an index", rv->relname)));
+
+	/*
+	 * Don't allow ALTER TABLE on composite types. We want people to use ALTER
+	 * TYPE for that.
+	 */
+	if (reltype != OBJECT_TYPE && relkind == RELKIND_COMPOSITE_TYPE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is a composite type", rv->relname),
+				 errhint("Use ALTER TYPE instead.")));
+
+	/*
+	 * Don't allow ALTER TABLE .. SET SCHEMA on relations that can't be moved
+	 * to a different schema, such as indexes and TOAST tables.
+	 */
+	if (IsA(stmt, AlterObjectSchemaStmt))
+	{
+		if (relkind == RELKIND_INDEX || relkind == RELKIND_PARTITIONED_INDEX)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot change schema of index \"%s\"",
+							rv->relname),
+					 errhint("Change the schema of the table instead.")));
+		else if (relkind == RELKIND_COMPOSITE_TYPE)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot change schema of composite type \"%s\"",
+							rv->relname),
+					 errhint("Use ALTER TYPE instead.")));
+		else if (relkind == RELKIND_TOASTVALUE)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("cannot change schema of TOAST table \"%s\"",
+							rv->relname),
+					 errhint("Change the schema of the table instead.")));
+	}
+
+	ReleaseSysCache(tuple);
+}
+
+/*
+ * Transform any expressions present in the partition key
+ *
+ * Returns a transformed PartitionSpec, as well as the strategy code
+ */
+static PartitionSpec *
+transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy)
+{
+	PartitionSpec *newspec;
+	ParseState *pstate;
+	ParseNamespaceItem *nsitem;
+	ListCell   *l;
+
+	newspec = makeNode(PartitionSpec);
+
+	newspec->strategy = partspec->strategy;
+	newspec->partParams = NIL;
+	newspec->location = partspec->location;
+
+	/* Parse partitioning strategy name */
+	if (pg_strcasecmp(partspec->strategy, "hash") == 0)
+		*strategy = PARTITION_STRATEGY_HASH;
+	else if (pg_strcasecmp(partspec->strategy, "list") == 0)
+		*strategy = PARTITION_STRATEGY_LIST;
+	else if (pg_strcasecmp(partspec->strategy, "range") == 0)
+		*strategy = PARTITION_STRATEGY_RANGE;
+	else
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("unrecognized partitioning strategy \"%s\"",
+						partspec->strategy)));
+
+	/* Check valid number of columns for strategy */
+	if (*strategy == PARTITION_STRATEGY_LIST &&
+		list_length(partspec->partParams) != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("cannot use \"list\" partition strategy with more than one column")));
+
+	/*
+	 * Create a dummy ParseState and insert the target relation as its sole
+	 * rangetable entry.  We need a ParseState for transformExpr.
+	 */
+	pstate = make_parsestate(NULL);
+	nsitem = addRangeTableEntryForRelation(pstate, rel, AccessShareLock,
+										   NULL, false, true);
+	addNSItemToQuery(pstate, nsitem, true, true, true);
+
+	/* take care of any partition expressions */
+	foreach(l, partspec->partParams)
+	{
+		PartitionElem *pelem = lfirst_node(PartitionElem, l);
+
+		if (pelem->expr)
+		{
+			/* Copy, to avoid scribbling on the input */
+			pelem = copyObject(pelem);
+
+			/* Now do parse transformation of the expression */
+			pelem->expr = transformExpr(pstate, pelem->expr,
+										EXPR_KIND_PARTITION_EXPRESSION);
+
+			/* we have to fix its collations too */
+			assign_expr_collations(pstate, pelem->expr);
+		}
+
+		newspec->partParams = lappend(newspec->partParams, pelem);
+	}
+
+	return newspec;
+}
+
+/*
+ * Compute per-partition-column information from a list of PartitionElems.
+ * Expressions in the PartitionElems must be parse-analyzed already.
+ */
+static void
+ComputePartitionAttrs(ParseState *pstate, Relation rel, List *partParams, AttrNumber *partattrs,
+					  List **partexprs, Oid *partopclass, Oid *partcollation,
+					  char strategy)
+{
+	int			attn;
+	ListCell   *lc;
+	Oid			am_oid;
+
+	attn = 0;
+	foreach(lc, partParams)
+	{
+		PartitionElem *pelem = lfirst_node(PartitionElem, lc);
+		Oid			atttype;
+		Oid			attcollation;
+
+		if (pelem->name != NULL)
+		{
+			/* Simple attribute reference */
+			HeapTuple	atttuple;
+			Form_pg_attribute attform;
+
+			atttuple = SearchSysCacheAttName(RelationGetRelid(rel),
+											 pelem->name);
+			if (!HeapTupleIsValid(atttuple))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_COLUMN),
+						 errmsg("column \"%s\" named in partition key does not exist",
+								pelem->name),
+						 parser_errposition(pstate, pelem->location)));
+			attform = (Form_pg_attribute) GETSTRUCT(atttuple);
+
+			if (attform->attnum <= 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("cannot use system column \"%s\" in partition key",
+								pelem->name),
+						 parser_errposition(pstate, pelem->location)));
+
+			/*
+			 * Generated columns cannot work: They are computed after BEFORE
+			 * triggers, but partition routing is done before all triggers.
+			 */
+			if (attform->attgenerated)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("cannot use generated column in partition key"),
+						 errdetail("Column \"%s\" is a generated column.",
+								   pelem->name),
+						 parser_errposition(pstate, pelem->location)));
+
+			partattrs[attn] = attform->attnum;
+			atttype = attform->atttypid;
+			attcollation = attform->attcollation;
+			ReleaseSysCache(atttuple);
+		}
+		else
+		{
+			/* Expression */
+			Node	   *expr = pelem->expr;
+			char		partattname[16];
+
+			Assert(expr != NULL);
+			atttype = exprType(expr);
+			attcollation = exprCollation(expr);
+
+			/*
+			 * The expression must be of a storable type (e.g., not RECORD).
+			 * The test is the same as for whether a table column is of a safe
+			 * type (which is why we needn't check for the non-expression
+			 * case).
+			 */
+			snprintf(partattname, sizeof(partattname), "%d", attn + 1);
+			CheckAttributeType(partattname,
+							   atttype, attcollation,
+							   NIL, CHKATYPE_IS_PARTKEY);
+
+			/*
+			 * Strip any top-level COLLATE clause.  This ensures that we treat
+			 * "x COLLATE y" and "(x COLLATE y)" alike.
+			 */
+			while (IsA(expr, CollateExpr))
+				expr = (Node *) ((CollateExpr *) expr)->arg;
+
+			if (IsA(expr, Var) &&
+				((Var *) expr)->varattno > 0)
+			{
+				/*
+				 * User wrote "(column)" or "(column COLLATE something)".
+				 * Treat it like simple attribute anyway.
+				 */
+				partattrs[attn] = ((Var *) expr)->varattno;
+			}
+			else
+			{
+				Bitmapset  *expr_attrs = NULL;
+				int			i;
+
+				partattrs[attn] = 0;	/* marks the column as expression */
+				*partexprs = lappend(*partexprs, expr);
+
+				/*
+				 * Try to simplify the expression before checking for
+				 * mutability.  The main practical value of doing it in this
+				 * order is that an inline-able SQL-language function will be
+				 * accepted if its expansion is immutable, whether or not the
+				 * function itself is marked immutable.
+				 *
+				 * Note that expression_planner does not change the passed in
+				 * expression destructively and we have already saved the
+				 * expression to be stored into the catalog above.
+				 */
+				expr = (Node *) expression_planner((Expr *) expr);
+
+				/*
+				 * Partition expression cannot contain mutable functions,
+				 * because a given row must always map to the same partition
+				 * as long as there is no change in the partition boundary
+				 * structure.
+				 */
+				if (contain_mutable_functions(expr))
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("functions in partition key expression must be marked IMMUTABLE")));
+
+				/*
+				 * transformPartitionSpec() should have already rejected
+				 * subqueries, aggregates, window functions, and SRFs, based
+				 * on the EXPR_KIND_ for partition expressions.
+				 */
+
+				/*
+				 * Cannot allow system column references, since that would
+				 * make partition routing impossible: their values won't be
+				 * known yet when we need to do that.
+				 */
+				pull_varattnos(expr, 1, &expr_attrs);
+				for (i = FirstLowInvalidHeapAttributeNumber; i < 0; i++)
+				{
+					if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber,
+									  expr_attrs))
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+								 errmsg("partition key expressions cannot contain system column references")));
+				}
+
+				/*
+				 * Generated columns cannot work: They are computed after
+				 * BEFORE triggers, but partition routing is done before all
+				 * triggers.
+				 */
+				i = -1;
+				while ((i = bms_next_member(expr_attrs, i)) >= 0)
+				{
+					AttrNumber	attno = i + FirstLowInvalidHeapAttributeNumber;
+
+					if (attno > 0 &&
+						TupleDescAttr(RelationGetDescr(rel), attno - 1)->attgenerated)
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+								 errmsg("cannot use generated column in partition key"),
+								 errdetail("Column \"%s\" is a generated column.",
+										   get_attname(RelationGetRelid(rel), attno, false)),
+								 parser_errposition(pstate, pelem->location)));
+				}
+
+				/*
+				 * While it is not exactly *wrong* for a partition expression
+				 * to be a constant, it seems better to reject such keys.
+				 */
+				if (IsA(expr, Const))
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("cannot use constant expression as partition key")));
+			}
+		}
+
+		/*
+		 * Apply collation override if any
+		 */
+		if (pelem->collation)
+			attcollation = get_collation_oid(pelem->collation, false);
+
+		/*
+		 * Check we have a collation iff it's a collatable type.  The only
+		 * expected failures here are (1) COLLATE applied to a noncollatable
+		 * type, or (2) partition expression had an unresolved collation. But
+		 * we might as well code this to be a complete consistency check.
+		 */
+		if (type_is_collatable(atttype))
+		{
+			if (!OidIsValid(attcollation))
+				ereport(ERROR,
+						(errcode(ERRCODE_INDETERMINATE_COLLATION),
+						 errmsg("could not determine which collation to use for partition expression"),
+						 errhint("Use the COLLATE clause to set the collation explicitly.")));
+		}
+		else
+		{
+			if (OidIsValid(attcollation))
+				ereport(ERROR,
+						(errcode(ERRCODE_DATATYPE_MISMATCH),
+						 errmsg("collations are not supported by type %s",
+								format_type_be(atttype))));
+		}
+
+		partcollation[attn] = attcollation;
+
+		/*
+		 * Identify the appropriate operator class.  For list and range
+		 * partitioning, we use a btree operator class; hash partitioning uses
+		 * a hash operator class.
+		 */
+		if (strategy == PARTITION_STRATEGY_HASH)
+			am_oid = HASH_AM_OID;
+		else
+			am_oid = BTREE_AM_OID;
+
+		if (!pelem->opclass)
+		{
+			partopclass[attn] = GetDefaultOpClass(atttype, am_oid);
+
+			if (!OidIsValid(partopclass[attn]))
+			{
+				if (strategy == PARTITION_STRATEGY_HASH)
+					ereport(ERROR,
+							(errcode(ERRCODE_UNDEFINED_OBJECT),
+							 errmsg("data type %s has no default operator class for access method \"%s\"",
+									format_type_be(atttype), "hash"),
+							 errhint("You must specify a hash operator class or define a default hash operator class for the data type.")));
+				else
+					ereport(ERROR,
+							(errcode(ERRCODE_UNDEFINED_OBJECT),
+							 errmsg("data type %s has no default operator class for access method \"%s\"",
+									format_type_be(atttype), "btree"),
+							 errhint("You must specify a btree operator class or define a default btree operator class for the data type.")));
+			}
+		}
+		else
+			partopclass[attn] = ResolveOpClass(pelem->opclass,
+											   atttype,
+											   am_oid == HASH_AM_OID ? "hash" : "btree",
+											   am_oid);
+
+		attn++;
+	}
+}
+
+/*
+ * PartConstraintImpliedByRelConstraint
+ *		Do scanrel's existing constraints imply the partition constraint?
+ *
+ * "Existing constraints" include its check constraints and column-level
+ * NOT NULL constraints.  partConstraint describes the partition constraint,
+ * in implicit-AND form.
+ */
+bool
+PartConstraintImpliedByRelConstraint(Relation scanrel,
+									 List *partConstraint)
+{
+	List	   *existConstraint = NIL;
+	TupleConstr *constr = RelationGetDescr(scanrel)->constr;
+	int			i;
+
+	if (constr && constr->has_not_null)
+	{
+		int			natts = scanrel->rd_att->natts;
+
+		for (i = 1; i <= natts; i++)
+		{
+			Form_pg_attribute att = TupleDescAttr(scanrel->rd_att, i - 1);
+
+			if (att->attnotnull && !att->attisdropped)
+			{
+				NullTest   *ntest = makeNode(NullTest);
+
+				ntest->arg = (Expr *) makeVar(1,
+											  i,
+											  att->atttypid,
+											  att->atttypmod,
+											  att->attcollation,
+											  0);
+				ntest->nulltesttype = IS_NOT_NULL;
+
+				/*
+				 * argisrow=false is correct even for a composite column,
+				 * because attnotnull does not represent a SQL-spec IS NOT
+				 * NULL test in such a case, just IS DISTINCT FROM NULL.
+				 */
+				ntest->argisrow = false;
+				ntest->location = -1;
+				existConstraint = lappend(existConstraint, ntest);
+			}
+		}
+	}
+
+	return ConstraintImpliedByRelConstraint(scanrel, partConstraint, existConstraint);
+}
+
+/*
+ * ConstraintImpliedByRelConstraint
+ *		Do scanrel's existing constraints imply the given constraint?
+ *
+ * testConstraint is the constraint to validate. provenConstraint is a
+ * caller-provided list of conditions which this function may assume
+ * to be true. Both provenConstraint and testConstraint must be in
+ * implicit-AND form, must only contain immutable clauses, and must
+ * contain only Vars with varno = 1.
+ */
+bool
+ConstraintImpliedByRelConstraint(Relation scanrel, List *testConstraint, List *provenConstraint)
+{
+	List	   *existConstraint = list_copy(provenConstraint);
+	TupleConstr *constr = RelationGetDescr(scanrel)->constr;
+	int			num_check,
+				i;
+
+	num_check = (constr != NULL) ? constr->num_check : 0;
+	for (i = 0; i < num_check; i++)
+	{
+		Node	   *cexpr;
+
+		/*
+		 * If this constraint hasn't been fully validated yet, we must ignore
+		 * it here.
+		 */
+		if (!constr->check[i].ccvalid)
+			continue;
+
+		cexpr = stringToNode(constr->check[i].ccbin);
+
+		/*
+		 * Run each expression through const-simplification and
+		 * canonicalization.  It is necessary, because we will be comparing it
+		 * to similarly-processed partition constraint expressions, and may
+		 * fail to detect valid matches without this.
+		 */
+		cexpr = eval_const_expressions(NULL, cexpr);
+		cexpr = (Node *) canonicalize_qual((Expr *) cexpr, true);
+
+		existConstraint = list_concat(existConstraint,
+									  make_ands_implicit((Expr *) cexpr));
+	}
+
+	/*
+	 * Try to make the proof.  Since we are comparing CHECK constraints, we
+	 * need to use weak implication, i.e., we assume existConstraint is
+	 * not-false and try to prove the same for testConstraint.
+	 *
+	 * Note that predicate_implied_by assumes its first argument is known
+	 * immutable.  That should always be true for both NOT NULL and partition
+	 * constraints, so we don't test it here.
+	 */
+	return predicate_implied_by(testConstraint, existConstraint, true);
+}
+
+/*
+ * QueuePartitionConstraintValidation
+ *
+ * Add an entry to wqueue to have the given partition constraint validated by
+ * Phase 3, for the given relation, and all its children.
+ *
+ * We first verify whether the given constraint is implied by pre-existing
+ * relation constraints; if it is, there's no need to scan the table to
+ * validate, so don't queue in that case.
+ */
+static void
+QueuePartitionConstraintValidation(List **wqueue, Relation scanrel,
+								   List *partConstraint,
+								   bool validate_default)
+{
+	/*
+	 * Based on the table's existing constraints, determine whether or not we
+	 * may skip scanning the table.
+	 */
+	if (PartConstraintImpliedByRelConstraint(scanrel, partConstraint))
+	{
+		if (!validate_default)
+			ereport(DEBUG1,
+					(errmsg_internal("partition constraint for table \"%s\" is implied by existing constraints",
+									 RelationGetRelationName(scanrel))));
+		else
+			ereport(DEBUG1,
+					(errmsg_internal("updated partition constraint for default partition \"%s\" is implied by existing constraints",
+									 RelationGetRelationName(scanrel))));
+		return;
+	}
+
+	/*
+	 * Constraints proved insufficient. For plain relations, queue a
+	 * validation item now; for partitioned tables, recurse to process each
+	 * partition.
+	 */
+	if (scanrel->rd_rel->relkind == RELKIND_RELATION)
+	{
+		AlteredTableInfo *tab;
+
+		/* Grab a work queue entry. */
+		tab = ATGetQueueEntry(wqueue, scanrel);
+		Assert(tab->partition_constraint == NULL);
+		tab->partition_constraint = (Expr *) linitial(partConstraint);
+		tab->validate_default = validate_default;
+	}
+	else if (scanrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		PartitionDesc partdesc = RelationGetPartitionDesc(scanrel, true);
+		int			i;
+
+		for (i = 0; i < partdesc->nparts; i++)
+		{
+			Relation	part_rel;
+			List	   *thisPartConstraint;
+
+			/*
+			 * This is the minimum lock we need to prevent deadlocks.
+			 */
+			part_rel = table_open(partdesc->oids[i], AccessExclusiveLock);
+
+			/*
+			 * Adjust the constraint for scanrel so that it matches this
+			 * partition's attribute numbers.
+			 */
+			thisPartConstraint =
+				map_partition_varattnos(partConstraint, 1,
+										part_rel, scanrel);
+
+			QueuePartitionConstraintValidation(wqueue, part_rel,
+											   thisPartConstraint,
+											   validate_default);
+			table_close(part_rel, NoLock);	/* keep lock till commit */
+		}
+	}
+}
+
+/*
+ * ALTER TABLE <name> ATTACH PARTITION <partition-name> FOR VALUES
+ *
+ * Return the address of the newly attached partition.
+ */
+static ObjectAddress
+ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd,
+					  AlterTableUtilityContext *context)
+{
+	Relation	attachrel,
+				catalog;
+	List	   *attachrel_children;
+	List	   *partConstraint;
+	SysScanDesc scan;
+	ScanKeyData skey;
+	AttrNumber	attno;
+	int			natts;
+	TupleDesc	tupleDesc;
+	ObjectAddress address;
+	const char *trigger_name;
+	Oid			defaultPartOid;
+	List	   *partBoundConstraint;
+	ParseState *pstate = make_parsestate(NULL);
+
+	pstate->p_sourcetext = context->queryString;
+
+	/*
+	 * We must lock the default partition if one exists, because attaching a
+	 * new partition will change its partition constraint.
+	 */
+	defaultPartOid =
+		get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, true));
+	if (OidIsValid(defaultPartOid))
+		LockRelationOid(defaultPartOid, AccessExclusiveLock);
+
+	attachrel = table_openrv(cmd->name, AccessExclusiveLock);
+
+	/*
+	 * XXX I think it'd be a good idea to grab locks on all tables referenced
+	 * by FKs at this point also.
+	 */
+
+	/*
+	 * Must be owner of both parent and source table -- parent was checked by
+	 * ATSimplePermissions call in ATPrepCmd
+	 */
+	ATSimplePermissions(AT_AttachPartition, attachrel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+	/* A partition can only have one parent */
+	if (attachrel->rd_rel->relispartition)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is already a partition",
+						RelationGetRelationName(attachrel))));
+
+	if (OidIsValid(attachrel->rd_rel->reloftype))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot attach a typed table as partition")));
+
+	/*
+	 * Table being attached should not already be part of inheritance; either
+	 * as a child table...
+	 */
+	catalog = table_open(InheritsRelationId, AccessShareLock);
+	ScanKeyInit(&skey,
+				Anum_pg_inherits_inhrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(attachrel)));
+	scan = systable_beginscan(catalog, InheritsRelidSeqnoIndexId, true,
+							  NULL, 1, &skey);
+	if (HeapTupleIsValid(systable_getnext(scan)))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot attach inheritance child as partition")));
+	systable_endscan(scan);
+
+	/* ...or as a parent table (except the case when it is partitioned) */
+	ScanKeyInit(&skey,
+				Anum_pg_inherits_inhparent,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(attachrel)));
+	scan = systable_beginscan(catalog, InheritsParentIndexId, true, NULL,
+							  1, &skey);
+	if (HeapTupleIsValid(systable_getnext(scan)) &&
+		attachrel->rd_rel->relkind == RELKIND_RELATION)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot attach inheritance parent as partition")));
+	systable_endscan(scan);
+	table_close(catalog, AccessShareLock);
+
+	/*
+	 * Prevent circularity by seeing if rel is a partition of attachrel. (In
+	 * particular, this disallows making a rel a partition of itself.)
+	 *
+	 * We do that by checking if rel is a member of the list of attachrel's
+	 * partitions provided the latter is partitioned at all.  We want to avoid
+	 * having to construct this list again, so we request the strongest lock
+	 * on all partitions.  We need the strongest lock, because we may decide
+	 * to scan them if we find out that the table being attached (or its leaf
+	 * partitions) may contain rows that violate the partition constraint. If
+	 * the table has a constraint that would prevent such rows, which by
+	 * definition is present in all the partitions, we need not scan the
+	 * table, nor its partitions.  But we cannot risk a deadlock by taking a
+	 * weaker lock now and the stronger one only when needed.
+	 */
+	attachrel_children = find_all_inheritors(RelationGetRelid(attachrel),
+											 AccessExclusiveLock, NULL);
+	if (list_member_oid(attachrel_children, RelationGetRelid(rel)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_TABLE),
+				 errmsg("circular inheritance not allowed"),
+				 errdetail("\"%s\" is already a child of \"%s\".",
+						   RelationGetRelationName(rel),
+						   RelationGetRelationName(attachrel))));
+
+	/* If the parent is permanent, so must be all of its partitions. */
+	if (rel->rd_rel->relpersistence != RELPERSISTENCE_TEMP &&
+		attachrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot attach a temporary relation as partition of permanent relation \"%s\"",
+						RelationGetRelationName(rel))));
+
+	/* Temp parent cannot have a partition that is itself not a temp */
+	if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+		attachrel->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot attach a permanent relation as partition of temporary relation \"%s\"",
+						RelationGetRelationName(rel))));
+
+	/* If the parent is temp, it must belong to this session */
+	if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+		!rel->rd_islocaltemp)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot attach as partition of temporary relation of another session")));
+
+	/* Ditto for the partition */
+	if (attachrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+		!attachrel->rd_islocaltemp)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot attach temporary relation of another session as partition")));
+
+	/* Check if there are any columns in attachrel that aren't in the parent */
+	tupleDesc = RelationGetDescr(attachrel);
+	natts = tupleDesc->natts;
+	for (attno = 1; attno <= natts; attno++)
+	{
+		Form_pg_attribute attribute = TupleDescAttr(tupleDesc, attno - 1);
+		char	   *attributeName = NameStr(attribute->attname);
+
+		/* Ignore dropped */
+		if (attribute->attisdropped)
+			continue;
+
+		/* Try to find the column in parent (matching on column name) */
+		if (!SearchSysCacheExists2(ATTNAME,
+								   ObjectIdGetDatum(RelationGetRelid(rel)),
+								   CStringGetDatum(attributeName)))
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("table \"%s\" contains column \"%s\" not found in parent \"%s\"",
+							RelationGetRelationName(attachrel), attributeName,
+							RelationGetRelationName(rel)),
+					 errdetail("The new partition may contain only the columns present in parent.")));
+	}
+
+	/*
+	 * If child_rel has row-level triggers with transition tables, we
+	 * currently don't allow it to become a partition.  See also prohibitions
+	 * in ATExecAddInherit() and CreateTrigger().
+	 */
+	trigger_name = FindTriggerIncompatibleWithInheritance(attachrel->trigdesc);
+	if (trigger_name != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("trigger \"%s\" prevents table \"%s\" from becoming a partition",
+						trigger_name, RelationGetRelationName(attachrel)),
+				 errdetail("ROW triggers with transition tables are not supported on partitions.")));
+
+	/*
+	 * Check that the new partition's bound is valid and does not overlap any
+	 * of existing partitions of the parent - note that it does not return on
+	 * error.
+	 */
+	check_new_partition_bound(RelationGetRelationName(attachrel), rel,
+							  cmd->bound, pstate);
+
+	/* OK to create inheritance.  Rest of the checks performed there */
+	CreateInheritance(attachrel, rel);
+
+	/* Update the pg_class entry. */
+	StorePartitionBound(attachrel, rel, cmd->bound);
+
+	/* Ensure there exists a correct set of indexes in the partition. */
+	AttachPartitionEnsureIndexes(rel, attachrel);
+
+	/* and triggers */
+	CloneRowTriggersToPartition(rel, attachrel);
+
+	/*
+	 * Clone foreign key constraints.  Callee is responsible for setting up
+	 * for phase 3 constraint verification.
+	 */
+	CloneForeignKeyConstraints(wqueue, rel, attachrel);
+
+	/*
+	 * Generate partition constraint from the partition bound specification.
+	 * If the parent itself is a partition, make sure to include its
+	 * constraint as well.
+	 */
+	partBoundConstraint = get_qual_from_partbound(rel, cmd->bound);
+	partConstraint = list_concat(partBoundConstraint,
+								 RelationGetPartitionQual(rel));
+
+	/* Skip validation if there are no constraints to validate. */
+	if (partConstraint)
+	{
+		/*
+		 * Run the partition quals through const-simplification similar to
+		 * check constraints.  We skip canonicalize_qual, though, because
+		 * partition quals should be in canonical form already.
+		 */
+		partConstraint =
+			(List *) eval_const_expressions(NULL,
+											(Node *) partConstraint);
+
+		/* XXX this sure looks wrong */
+		partConstraint = list_make1(make_ands_explicit(partConstraint));
+
+		/*
+		 * Adjust the generated constraint to match this partition's attribute
+		 * numbers.
+		 */
+		partConstraint = map_partition_varattnos(partConstraint, 1, attachrel,
+												 rel);
+
+		/* Validate partition constraints against the table being attached. */
+		QueuePartitionConstraintValidation(wqueue, attachrel, partConstraint,
+										   false);
+	}
+
+	/*
+	 * If we're attaching a partition other than the default partition and a
+	 * default one exists, then that partition's partition constraint changes,
+	 * so add an entry to the work queue to validate it, too.  (We must not do
+	 * this when the partition being attached is the default one; we already
+	 * did it above!)
+	 */
+	if (OidIsValid(defaultPartOid))
+	{
+		Relation	defaultrel;
+		List	   *defPartConstraint;
+
+		Assert(!cmd->bound->is_default);
+
+		/* we already hold a lock on the default partition */
+		defaultrel = table_open(defaultPartOid, NoLock);
+		defPartConstraint =
+			get_proposed_default_constraint(partBoundConstraint);
+
+		/*
+		 * Map the Vars in the constraint expression from rel's attnos to
+		 * defaultrel's.
+		 */
+		defPartConstraint =
+			map_partition_varattnos(defPartConstraint,
+									1, defaultrel, rel);
+		QueuePartitionConstraintValidation(wqueue, defaultrel,
+										   defPartConstraint, true);
+
+		/* keep our lock until commit. */
+		table_close(defaultrel, NoLock);
+	}
+
+	ObjectAddressSet(address, RelationRelationId, RelationGetRelid(attachrel));
+
+	/*
+	 * If the partition we just attached is partitioned itself, invalidate
+	 * relcache for all descendent partitions too to ensure that their
+	 * rd_partcheck expression trees are rebuilt; partitions already locked at
+	 * the beginning of this function.
+	 */
+	if (attachrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		ListCell   *l;
+
+		foreach(l, attachrel_children)
+		{
+			CacheInvalidateRelcacheByRelid(lfirst_oid(l));
+		}
+	}
+
+	/* keep our lock until commit */
+	table_close(attachrel, NoLock);
+
+	return address;
+}
+
+/*
+ * AttachPartitionEnsureIndexes
+ *		subroutine for ATExecAttachPartition to create/match indexes
+ *
+ * Enforce the indexing rule for partitioned tables during ALTER TABLE / ATTACH
+ * PARTITION: every partition must have an index attached to each index on the
+ * partitioned table.
+ */
+static void
+AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
+{
+	List	   *idxes;
+	List	   *attachRelIdxs;
+	Relation   *attachrelIdxRels;
+	IndexInfo **attachInfos;
+	int			i;
+	ListCell   *cell;
+	MemoryContext cxt;
+	MemoryContext oldcxt;
+
+	cxt = AllocSetContextCreate(CurrentMemoryContext,
+								"AttachPartitionEnsureIndexes",
+								ALLOCSET_DEFAULT_SIZES);
+	oldcxt = MemoryContextSwitchTo(cxt);
+
+	idxes = RelationGetIndexList(rel);
+	attachRelIdxs = RelationGetIndexList(attachrel);
+	attachrelIdxRels = palloc(sizeof(Relation) * list_length(attachRelIdxs));
+	attachInfos = palloc(sizeof(IndexInfo *) * list_length(attachRelIdxs));
+
+	/* Build arrays of all existing indexes and their IndexInfos */
+	i = 0;
+	foreach(cell, attachRelIdxs)
+	{
+		Oid			cldIdxId = lfirst_oid(cell);
+
+		attachrelIdxRels[i] = index_open(cldIdxId, AccessShareLock);
+		attachInfos[i] = BuildIndexInfo(attachrelIdxRels[i]);
+		i++;
+	}
+
+	/*
+	 * If we're attaching a foreign table, we must fail if any of the indexes
+	 * is a constraint index; otherwise, there's nothing to do here.  Do this
+	 * before starting work, to avoid wasting the effort of building a few
+	 * non-unique indexes before coming across a unique one.
+	 */
+	if (attachrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		foreach(cell, idxes)
+		{
+			Oid			idx = lfirst_oid(cell);
+			Relation	idxRel = index_open(idx, AccessShareLock);
+
+			if (idxRel->rd_index->indisunique ||
+				idxRel->rd_index->indisprimary)
+				ereport(ERROR,
+						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+						 errmsg("cannot attach foreign table \"%s\" as partition of partitioned table \"%s\"",
+								RelationGetRelationName(attachrel),
+								RelationGetRelationName(rel)),
+						 errdetail("Partitioned table \"%s\" contains unique indexes.",
+								   RelationGetRelationName(rel))));
+			index_close(idxRel, AccessShareLock);
+		}
+
+		goto out;
+	}
+
+	/*
+	 * For each index on the partitioned table, find a matching one in the
+	 * partition-to-be; if one is not found, create one.
+	 */
+	foreach(cell, idxes)
+	{
+		Oid			idx = lfirst_oid(cell);
+		Relation	idxRel = index_open(idx, AccessShareLock);
+		IndexInfo  *info;
+		AttrMap    *attmap;
+		bool		found = false;
+		Oid			constraintOid;
+
+		/*
+		 * Ignore indexes in the partitioned table other than partitioned
+		 * indexes.
+		 */
+		if (idxRel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
+		{
+			index_close(idxRel, AccessShareLock);
+			continue;
+		}
+
+		/* construct an indexinfo to compare existing indexes against */
+		info = BuildIndexInfo(idxRel);
+		attmap = build_attrmap_by_name(RelationGetDescr(attachrel),
+									   RelationGetDescr(rel));
+		constraintOid = get_relation_idx_constraint_oid(RelationGetRelid(rel), idx);
+
+		/*
+		 * Scan the list of existing indexes in the partition-to-be, and mark
+		 * the first matching, valid, unattached one we find, if any, as
+		 * partition of the parent index.  If we find one, we're done.
+		 */
+		for (i = 0; i < list_length(attachRelIdxs); i++)
+		{
+			Oid			cldIdxId = RelationGetRelid(attachrelIdxRels[i]);
+			Oid			cldConstrOid = InvalidOid;
+
+			/* does this index have a parent?  if so, can't use it */
+			if (attachrelIdxRels[i]->rd_rel->relispartition)
+				continue;
+
+			/* If this index is invalid, can't use it */
+			if (!attachrelIdxRels[i]->rd_index->indisvalid)
+				continue;
+
+			if (CompareIndexInfo(attachInfos[i], info,
+								 attachrelIdxRels[i]->rd_indcollation,
+								 idxRel->rd_indcollation,
+								 attachrelIdxRels[i]->rd_opfamily,
+								 idxRel->rd_opfamily,
+								 attmap))
+			{
+				/*
+				 * If this index is being created in the parent because of a
+				 * constraint, then the child needs to have a constraint also,
+				 * so look for one.  If there is no such constraint, this
+				 * index is no good, so keep looking.
+				 */
+				if (OidIsValid(constraintOid))
+				{
+					cldConstrOid =
+						get_relation_idx_constraint_oid(RelationGetRelid(attachrel),
+														cldIdxId);
+					/* no dice */
+					if (!OidIsValid(cldConstrOid))
+						continue;
+				}
+
+				/* bingo. */
+				IndexSetParentIndex(attachrelIdxRels[i], idx);
+				if (OidIsValid(constraintOid))
+					ConstraintSetParentConstraint(cldConstrOid, constraintOid,
+												  RelationGetRelid(attachrel));
+				found = true;
+
+				CommandCounterIncrement();
+				break;
+			}
+		}
+
+		/*
+		 * If no suitable index was found in the partition-to-be, create one
+		 * now.
+		 */
+		if (!found)
+		{
+			IndexStmt  *stmt;
+			Oid			constraintOid;
+
+			stmt = generateClonedIndexStmt(NULL,
+										   idxRel, attmap,
+										   &constraintOid);
+			DefineIndex(RelationGetRelid(attachrel), stmt, InvalidOid,
+						RelationGetRelid(idxRel),
+						constraintOid,
+						true, false, false, false, false);
+		}
+
+		index_close(idxRel, AccessShareLock);
+	}
+
+out:
+	/* Clean up. */
+	for (i = 0; i < list_length(attachRelIdxs); i++)
+		index_close(attachrelIdxRels[i], AccessShareLock);
+	MemoryContextSwitchTo(oldcxt);
+	MemoryContextDelete(cxt);
+}
+
+/*
+ * CloneRowTriggersToPartition
+ *		subroutine for ATExecAttachPartition/DefineRelation to create row
+ *		triggers on partitions
+ */
+static void
+CloneRowTriggersToPartition(Relation parent, Relation partition)
+{
+	Relation	pg_trigger;
+	ScanKeyData key;
+	SysScanDesc scan;
+	HeapTuple	tuple;
+	MemoryContext perTupCxt;
+
+	ScanKeyInit(&key, Anum_pg_trigger_tgrelid, BTEqualStrategyNumber,
+				F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(parent)));
+	pg_trigger = table_open(TriggerRelationId, RowExclusiveLock);
+	scan = systable_beginscan(pg_trigger, TriggerRelidNameIndexId,
+							  true, NULL, 1, &key);
+
+	perTupCxt = AllocSetContextCreate(CurrentMemoryContext,
+									  "clone trig", ALLOCSET_SMALL_SIZES);
+
+	while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+	{
+		Form_pg_trigger trigForm = (Form_pg_trigger) GETSTRUCT(tuple);
+		CreateTrigStmt *trigStmt;
+		Node	   *qual = NULL;
+		Datum		value;
+		bool		isnull;
+		List	   *cols = NIL;
+		List	   *trigargs = NIL;
+		MemoryContext oldcxt;
+
+		/*
+		 * Ignore statement-level triggers; those are not cloned.
+		 */
+		if (!TRIGGER_FOR_ROW(trigForm->tgtype))
+			continue;
+
+		/*
+		 * Don't clone internal triggers, because the constraint cloning code
+		 * will.
+		 */
+		if (trigForm->tgisinternal)
+			continue;
+
+		/*
+		 * Complain if we find an unexpected trigger type.
+		 */
+		if (!TRIGGER_FOR_BEFORE(trigForm->tgtype) &&
+			!TRIGGER_FOR_AFTER(trigForm->tgtype))
+			elog(ERROR, "unexpected trigger \"%s\" found",
+				 NameStr(trigForm->tgname));
+
+		/* Use short-lived context for CREATE TRIGGER */
+		oldcxt = MemoryContextSwitchTo(perTupCxt);
+
+		/*
+		 * If there is a WHEN clause, generate a 'cooked' version of it that's
+		 * appropriate for the partition.
+		 */
+		value = heap_getattr(tuple, Anum_pg_trigger_tgqual,
+							 RelationGetDescr(pg_trigger), &isnull);
+		if (!isnull)
+		{
+			qual = stringToNode(TextDatumGetCString(value));
+			qual = (Node *) map_partition_varattnos((List *) qual, PRS2_OLD_VARNO,
+													partition, parent);
+			qual = (Node *) map_partition_varattnos((List *) qual, PRS2_NEW_VARNO,
+													partition, parent);
+		}
+
+		/*
+		 * If there is a column list, transform it to a list of column names.
+		 * Note we don't need to map this list in any way ...
+		 */
+		if (trigForm->tgattr.dim1 > 0)
+		{
+			int			i;
+
+			for (i = 0; i < trigForm->tgattr.dim1; i++)
+			{
+				Form_pg_attribute col;
+
+				col = TupleDescAttr(parent->rd_att,
+									trigForm->tgattr.values[i] - 1);
+				cols = lappend(cols,
+							   makeString(pstrdup(NameStr(col->attname))));
+			}
+		}
+
+		/* Reconstruct trigger arguments list. */
+		if (trigForm->tgnargs > 0)
+		{
+			char	   *p;
+
+			value = heap_getattr(tuple, Anum_pg_trigger_tgargs,
+								 RelationGetDescr(pg_trigger), &isnull);
+			if (isnull)
+				elog(ERROR, "tgargs is null for trigger \"%s\" in partition \"%s\"",
+					 NameStr(trigForm->tgname), RelationGetRelationName(partition));
+
+			p = (char *) VARDATA_ANY(DatumGetByteaPP(value));
+
+			for (int i = 0; i < trigForm->tgnargs; i++)
+			{
+				trigargs = lappend(trigargs, makeString(pstrdup(p)));
+				p += strlen(p) + 1;
+			}
+		}
+
+		trigStmt = makeNode(CreateTrigStmt);
+		trigStmt->replace = false;
+		trigStmt->isconstraint = OidIsValid(trigForm->tgconstraint);
+		trigStmt->trigname = NameStr(trigForm->tgname);
+		trigStmt->relation = NULL;
+		trigStmt->funcname = NULL;	/* passed separately */
+		trigStmt->args = trigargs;
+		trigStmt->row = true;
+		trigStmt->timing = trigForm->tgtype & TRIGGER_TYPE_TIMING_MASK;
+		trigStmt->events = trigForm->tgtype & TRIGGER_TYPE_EVENT_MASK;
+		trigStmt->columns = cols;
+		trigStmt->whenClause = NULL;	/* passed separately */
+		trigStmt->transitionRels = NIL; /* not supported at present */
+		trigStmt->deferrable = trigForm->tgdeferrable;
+		trigStmt->initdeferred = trigForm->tginitdeferred;
+		trigStmt->constrrel = NULL; /* passed separately */
+
+		CreateTriggerFiringOn(trigStmt, NULL, RelationGetRelid(partition),
+							  trigForm->tgconstrrelid, InvalidOid, InvalidOid,
+							  trigForm->tgfoid, trigForm->oid, qual,
+							  false, true, trigForm->tgenabled);
+
+		MemoryContextSwitchTo(oldcxt);
+		MemoryContextReset(perTupCxt);
+	}
+
+	MemoryContextDelete(perTupCxt);
+
+	systable_endscan(scan);
+	table_close(pg_trigger, RowExclusiveLock);
+}
+
+/*
+ * ALTER TABLE DETACH PARTITION
+ *
+ * Return the address of the relation that is no longer a partition of rel.
+ *
+ * If concurrent mode is requested, we run in two transactions.  A side-
+ * effect is that this command cannot run in a multi-part ALTER TABLE.
+ * Currently, that's enforced by the grammar.
+ *
+ * The strategy for concurrency is to first modify the partition's
+ * pg_inherit catalog row to make it visible to everyone that the
+ * partition is detached, lock the partition against writes, and commit
+ * the transaction; anyone who requests the partition descriptor from
+ * that point onwards has to ignore such a partition.  In a second
+ * transaction, we wait until all transactions that could have seen the
+ * partition as attached are gone, then we remove the rest of partition
+ * metadata (pg_inherits and pg_class.relpartbounds).
+ */
+static ObjectAddress
+ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel,
+					  RangeVar *name, bool concurrent)
+{
+	Relation	partRel;
+	ObjectAddress address;
+	Oid			defaultPartOid;
+
+	/*
+	 * We must lock the default partition, because detaching this partition
+	 * will change its partition constraint.
+	 */
+	defaultPartOid =
+		get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, true));
+	if (OidIsValid(defaultPartOid))
+	{
+		/*
+		 * Concurrent detaching when a default partition exists is not
+		 * supported. The main problem is that the default partition
+		 * constraint would change.  And there's a definitional problem: what
+		 * should happen to the tuples that are being inserted that belong to
+		 * the partition being detached?  Putting them on the partition being
+		 * detached would be wrong, since they'd become "lost" after the
+		 * detaching completes but we cannot put them in the default partition
+		 * either until we alter its partition constraint.
+		 *
+		 * I think we could solve this problem if we effected the constraint
+		 * change before committing the first transaction.  But the lock would
+		 * have to remain AEL and it would cause concurrent query planning to
+		 * be blocked, so changing it that way would be even worse.
+		 */
+		if (concurrent)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("cannot detach partitions concurrently when a default partition exists")));
+		LockRelationOid(defaultPartOid, AccessExclusiveLock);
+	}
+
+	/*
+	 * In concurrent mode, the partition is locked with share-update-exclusive
+	 * in the first transaction.  This allows concurrent transactions to be
+	 * doing DML to the partition.
+	 */
+	partRel = table_openrv(name, concurrent ? ShareUpdateExclusiveLock :
+						   AccessExclusiveLock);
+
+	/*
+	 * Check inheritance conditions and either delete the pg_inherits row (in
+	 * non-concurrent mode) or just set the inhdetachpending flag.
+	 */
+	if (!concurrent)
+		RemoveInheritance(partRel, rel, false);
+	else
+		MarkInheritDetached(partRel, rel);
+
+	/*
+	 * Ensure that foreign keys still hold after this detach.  This keeps
+	 * locks on the referencing tables, which prevents concurrent transactions
+	 * from adding rows that we wouldn't see.  For this to work in concurrent
+	 * mode, it is critical that the partition appears as no longer attached
+	 * for the RI queries as soon as the first transaction commits.
+	 */
+	ATDetachCheckNoForeignKeyRefs(partRel);
+
+	/*
+	 * Concurrent mode has to work harder; first we add a new constraint to
+	 * the partition that matches the partition constraint.  Then we close our
+	 * existing transaction, and in a new one wait for all processes to catch
+	 * up on the catalog updates we've done so far; at that point we can
+	 * complete the operation.
+	 */
+	if (concurrent)
+	{
+		Oid			partrelid,
+					parentrelid;
+		LOCKTAG		tag;
+		char	   *parentrelname;
+		char	   *partrelname;
+
+		/*
+		 * Add a new constraint to the partition being detached, which
+		 * supplants the partition constraint (unless there is one already).
+		 */
+		DetachAddConstraintIfNeeded(wqueue, partRel);
+
+		/*
+		 * We're almost done now; the only traces that remain are the
+		 * pg_inherits tuple and the partition's relpartbounds.  Before we can
+		 * remove those, we need to wait until all transactions that know that
+		 * this is a partition are gone.
+		 */
+
+		/*
+		 * Remember relation OIDs to re-acquire them later; and relation names
+		 * too, for error messages if something is dropped in between.
+		 */
+		partrelid = RelationGetRelid(partRel);
+		parentrelid = RelationGetRelid(rel);
+		parentrelname = MemoryContextStrdup(PortalContext,
+											RelationGetRelationName(rel));
+		partrelname = MemoryContextStrdup(PortalContext,
+										  RelationGetRelationName(partRel));
+
+		/* Invalidate relcache entries for the parent -- must be before close */
+		CacheInvalidateRelcache(rel);
+
+		table_close(partRel, NoLock);
+		table_close(rel, NoLock);
+		tab->rel = NULL;
+
+		/* Make updated catalog entry visible */
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+
+		StartTransactionCommand();
+
+		/*
+		 * Now wait.  This ensures that all queries that were planned
+		 * including the partition are finished before we remove the rest of
+		 * catalog entries.  We don't need or indeed want to acquire this
+		 * lock, though -- that would block later queries.
+		 *
+		 * We don't need to concern ourselves with waiting for a lock on the
+		 * partition itself, since we will acquire AccessExclusiveLock below.
+		 */
+		SET_LOCKTAG_RELATION(tag, MyDatabaseId, parentrelid);
+		WaitForLockersMultiple(list_make1(&tag), AccessExclusiveLock, false);
+
+		/*
+		 * Now acquire locks in both relations again.  Note they may have been
+		 * removed in the meantime, so care is required.
+		 */
+		rel = try_relation_open(parentrelid, ShareUpdateExclusiveLock);
+		partRel = try_relation_open(partrelid, AccessExclusiveLock);
+
+		/* If the relations aren't there, something bad happened; bail out */
+		if (rel == NULL)
+		{
+			if (partRel != NULL)	/* shouldn't happen */
+				elog(WARNING, "dangling partition \"%s\" remains, can't fix",
+					 partrelname);
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("partitioned table \"%s\" was removed concurrently",
+							parentrelname)));
+		}
+		if (partRel == NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("partition \"%s\" was removed concurrently", partrelname)));
+
+		tab->rel = rel;
+	}
+
+	/* Do the final part of detaching */
+	DetachPartitionFinalize(rel, partRel, concurrent, defaultPartOid);
+
+	ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
+
+	/* keep our lock until commit */
+	table_close(partRel, NoLock);
+
+	return address;
+}
+
+/*
+ * Second part of ALTER TABLE .. DETACH.
+ *
+ * This is separate so that it can be run independently when the second
+ * transaction of the concurrent algorithm fails (crash or abort).
+ */
+static void
+DetachPartitionFinalize(Relation rel, Relation partRel, bool concurrent,
+						Oid defaultPartOid)
+{
+	Relation	classRel;
+	List	   *fks;
+	ListCell   *cell;
+	List	   *indexes;
+	Datum		new_val[Natts_pg_class];
+	bool		new_null[Natts_pg_class],
+				new_repl[Natts_pg_class];
+	HeapTuple	tuple,
+				newtuple;
+	Relation	trigrel = NULL;
+
+	if (concurrent)
+	{
+		/*
+		 * We can remove the pg_inherits row now. (In the non-concurrent case,
+		 * this was already done).
+		 */
+		RemoveInheritance(partRel, rel, true);
+	}
+
+	/* Drop any triggers that were cloned on creation/attach. */
+	DropClonedTriggersFromPartition(RelationGetRelid(partRel));
+
+	/*
+	 * Detach any foreign keys that are inherited.  This includes creating
+	 * additional action triggers.
+	 */
+	fks = copyObject(RelationGetFKeyList(partRel));
+	if (fks != NIL)
+		trigrel = table_open(TriggerRelationId, RowExclusiveLock);
+	foreach(cell, fks)
+	{
+		ForeignKeyCacheInfo *fk = lfirst(cell);
+		HeapTuple	contup;
+		Form_pg_constraint conform;
+		Constraint *fkconstraint;
+		Oid			insertTriggerOid,
+					updateTriggerOid;
+
+		contup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(fk->conoid));
+		if (!HeapTupleIsValid(contup))
+			elog(ERROR, "cache lookup failed for constraint %u", fk->conoid);
+		conform = (Form_pg_constraint) GETSTRUCT(contup);
+
+		/* consider only the inherited foreign keys */
+		if (conform->contype != CONSTRAINT_FOREIGN ||
+			!OidIsValid(conform->conparentid))
+		{
+			ReleaseSysCache(contup);
+			continue;
+		}
+
+		/* unset conparentid and adjust conislocal, coninhcount, etc. */
+		ConstraintSetParentConstraint(fk->conoid, InvalidOid, InvalidOid);
+
+		/*
+		 * Also, look up the partition's "check" triggers corresponding to the
+		 * constraint being detached and detach them from the parent triggers.
+		 */
+		GetForeignKeyCheckTriggers(trigrel,
+								   fk->conoid, fk->confrelid, fk->conrelid,
+								   &insertTriggerOid, &updateTriggerOid);
+		Assert(OidIsValid(insertTriggerOid));
+		TriggerSetParentTrigger(trigrel, insertTriggerOid, InvalidOid,
+								RelationGetRelid(partRel));
+		Assert(OidIsValid(updateTriggerOid));
+		TriggerSetParentTrigger(trigrel, updateTriggerOid, InvalidOid,
+								RelationGetRelid(partRel));
+
+		/*
+		 * Make the action triggers on the referenced relation.  When this was
+		 * a partition the action triggers pointed to the parent rel (they
+		 * still do), but now we need separate ones of our own.
+		 */
+		fkconstraint = makeNode(Constraint);
+		fkconstraint->contype = CONSTRAINT_FOREIGN;
+		fkconstraint->conname = pstrdup(NameStr(conform->conname));
+		fkconstraint->deferrable = conform->condeferrable;
+		fkconstraint->initdeferred = conform->condeferred;
+		fkconstraint->location = -1;
+		fkconstraint->pktable = NULL;
+		fkconstraint->fk_attrs = NIL;
+		fkconstraint->pk_attrs = NIL;
+		fkconstraint->fk_matchtype = conform->confmatchtype;
+		fkconstraint->fk_upd_action = conform->confupdtype;
+		fkconstraint->fk_del_action = conform->confdeltype;
+		fkconstraint->fk_del_set_cols = NIL;
+		fkconstraint->old_conpfeqop = NIL;
+		fkconstraint->old_pktable_oid = InvalidOid;
+		fkconstraint->skip_validation = false;
+		fkconstraint->initially_valid = true;
+
+		createForeignKeyActionTriggers(partRel, conform->confrelid,
+									   fkconstraint, fk->conoid,
+									   conform->conindid,
+									   InvalidOid, InvalidOid,
+									   NULL, NULL);
+
+		ReleaseSysCache(contup);
+	}
+	list_free_deep(fks);
+	if (trigrel)
+		table_close(trigrel, RowExclusiveLock);
+
+	/*
+	 * Any sub-constraints that are in the referenced-side of a larger
+	 * constraint have to be removed.  This partition is no longer part of the
+	 * key space of the constraint.
+	 */
+	foreach(cell, GetParentedForeignKeyRefs(partRel))
+	{
+		Oid			constrOid = lfirst_oid(cell);
+		ObjectAddress constraint;
+
+		ConstraintSetParentConstraint(constrOid, InvalidOid, InvalidOid);
+		deleteDependencyRecordsForClass(ConstraintRelationId,
+										constrOid,
+										ConstraintRelationId,
+										DEPENDENCY_INTERNAL);
+		CommandCounterIncrement();
+
+		ObjectAddressSet(constraint, ConstraintRelationId, constrOid);
+		performDeletion(&constraint, DROP_RESTRICT, 0);
+	}
+
+	/* Now we can detach indexes */
+	indexes = RelationGetIndexList(partRel);
+	foreach(cell, indexes)
+	{
+		Oid			idxid = lfirst_oid(cell);
+		Relation	idx;
+		Oid			constrOid;
+
+		if (!has_superclass(idxid))
+			continue;
+
+		Assert((IndexGetRelation(get_partition_parent(idxid, false), false) ==
+				RelationGetRelid(rel)));
+
+		idx = index_open(idxid, AccessExclusiveLock);
+		IndexSetParentIndex(idx, InvalidOid);
+
+		/* If there's a constraint associated with the index, detach it too */
+		constrOid = get_relation_idx_constraint_oid(RelationGetRelid(partRel),
+													idxid);
+		if (OidIsValid(constrOid))
+			ConstraintSetParentConstraint(constrOid, InvalidOid, InvalidOid);
+
+		index_close(idx, NoLock);
+	}
+
+	/* Update pg_class tuple */
+	classRel = table_open(RelationRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheCopy1(RELOID,
+								ObjectIdGetDatum(RelationGetRelid(partRel)));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u",
+			 RelationGetRelid(partRel));
+	Assert(((Form_pg_class) GETSTRUCT(tuple))->relispartition);
+
+	/* Clear relpartbound and reset relispartition */
+	memset(new_val, 0, sizeof(new_val));
+	memset(new_null, false, sizeof(new_null));
+	memset(new_repl, false, sizeof(new_repl));
+	new_val[Anum_pg_class_relpartbound - 1] = (Datum) 0;
+	new_null[Anum_pg_class_relpartbound - 1] = true;
+	new_repl[Anum_pg_class_relpartbound - 1] = true;
+	newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel),
+								 new_val, new_null, new_repl);
+
+	((Form_pg_class) GETSTRUCT(newtuple))->relispartition = false;
+	CatalogTupleUpdate(classRel, &newtuple->t_self, newtuple);
+	heap_freetuple(newtuple);
+	table_close(classRel, RowExclusiveLock);
+
+	if (OidIsValid(defaultPartOid))
+	{
+		/*
+		 * If the relation being detached is the default partition itself,
+		 * remove it from the parent's pg_partitioned_table entry.
+		 *
+		 * If not, we must invalidate default partition's relcache entry, as
+		 * in StorePartitionBound: its partition constraint depends on every
+		 * other partition's partition constraint.
+		 */
+		if (RelationGetRelid(partRel) == defaultPartOid)
+			update_default_partition_oid(RelationGetRelid(rel), InvalidOid);
+		else
+			CacheInvalidateRelcacheByRelid(defaultPartOid);
+	}
+
+	/*
+	 * Invalidate the parent's relcache so that the partition is no longer
+	 * included in its partition descriptor.
+	 */
+	CacheInvalidateRelcache(rel);
+
+	/*
+	 * If the partition we just detached is partitioned itself, invalidate
+	 * relcache for all descendent partitions too to ensure that their
+	 * rd_partcheck expression trees are rebuilt; must lock partitions before
+	 * doing so, using the same lockmode as what partRel has been locked with
+	 * by the caller.
+	 */
+	if (partRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		List	   *children;
+
+		children = find_all_inheritors(RelationGetRelid(partRel),
+									   AccessExclusiveLock, NULL);
+		foreach(cell, children)
+		{
+			CacheInvalidateRelcacheByRelid(lfirst_oid(cell));
+		}
+	}
+}
+
+/*
+ * ALTER TABLE ... DETACH PARTITION ... FINALIZE
+ *
+ * To use when a DETACH PARTITION command previously did not run to
+ * completion; this completes the detaching process.
+ */
+static ObjectAddress
+ATExecDetachPartitionFinalize(Relation rel, RangeVar *name)
+{
+	Relation	partRel;
+	ObjectAddress address;
+	Snapshot	snap = GetActiveSnapshot();
+
+	partRel = table_openrv(name, AccessExclusiveLock);
+
+	/*
+	 * Wait until existing snapshots are gone.  This is important if the
+	 * second transaction of DETACH PARTITION CONCURRENTLY is canceled: the
+	 * user could immediately run DETACH FINALIZE without actually waiting for
+	 * existing transactions.  We must not complete the detach action until
+	 * all such queries are complete (otherwise we would present them with an
+	 * inconsistent view of catalogs).
+	 */
+	WaitForOlderSnapshots(snap->xmin, false);
+
+	DetachPartitionFinalize(rel, partRel, true, InvalidOid);
+
+	ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
+
+	table_close(partRel, NoLock);
+
+	return address;
+}
+
+/*
+ * DetachAddConstraintIfNeeded
+ *		Subroutine for ATExecDetachPartition.  Create a constraint that
+ *		takes the place of the partition constraint, but avoid creating
+ *		a dupe if an constraint already exists which implies the needed
+ *		constraint.
+ */
+static void
+DetachAddConstraintIfNeeded(List **wqueue, Relation partRel)
+{
+	List	   *constraintExpr;
+
+	constraintExpr = RelationGetPartitionQual(partRel);
+	constraintExpr = (List *) eval_const_expressions(NULL, (Node *) constraintExpr);
+
+	/*
+	 * Avoid adding a new constraint if the needed constraint is implied by an
+	 * existing constraint
+	 */
+	if (!PartConstraintImpliedByRelConstraint(partRel, constraintExpr))
+	{
+		AlteredTableInfo *tab;
+		Constraint *n;
+
+		tab = ATGetQueueEntry(wqueue, partRel);
+
+		/* Add constraint on partition, equivalent to the partition constraint */
+		n = makeNode(Constraint);
+		n->contype = CONSTR_CHECK;
+		n->conname = NULL;
+		n->location = -1;
+		n->is_no_inherit = false;
+		n->raw_expr = NULL;
+		n->cooked_expr = nodeToString(make_ands_explicit(constraintExpr));
+		n->initially_valid = true;
+		n->skip_validation = true;
+		/* It's a re-add, since it nominally already exists */
+		ATAddCheckConstraint(wqueue, tab, partRel, n,
+							 true, false, true, ShareUpdateExclusiveLock);
+	}
+}
+
+/*
+ * DropClonedTriggersFromPartition
+ *		subroutine for ATExecDetachPartition to remove any triggers that were
+ *		cloned to the partition when it was created-as-partition or attached.
+ *		This undoes what CloneRowTriggersToPartition did.
+ */
+static void
+DropClonedTriggersFromPartition(Oid partitionId)
+{
+	ScanKeyData skey;
+	SysScanDesc scan;
+	HeapTuple	trigtup;
+	Relation	tgrel;
+	ObjectAddresses *objects;
+
+	objects = new_object_addresses();
+
+	/*
+	 * Scan pg_trigger to search for all triggers on this rel.
+	 */
+	ScanKeyInit(&skey, Anum_pg_trigger_tgrelid, BTEqualStrategyNumber,
+				F_OIDEQ, ObjectIdGetDatum(partitionId));
+	tgrel = table_open(TriggerRelationId, RowExclusiveLock);
+	scan = systable_beginscan(tgrel, TriggerRelidNameIndexId,
+							  true, NULL, 1, &skey);
+	while (HeapTupleIsValid(trigtup = systable_getnext(scan)))
+	{
+		Form_pg_trigger pg_trigger = (Form_pg_trigger) GETSTRUCT(trigtup);
+		ObjectAddress trig;
+
+		/* Ignore triggers that weren't cloned */
+		if (!OidIsValid(pg_trigger->tgparentid))
+			continue;
+
+		/*
+		 * Ignore internal triggers that are implementation objects of foreign
+		 * keys, because these will be detached when the foreign keys
+		 * themselves are.
+		 */
+		if (OidIsValid(pg_trigger->tgconstrrelid))
+			continue;
+
+		/*
+		 * This is ugly, but necessary: remove the dependency markings on the
+		 * trigger so that it can be removed.
+		 */
+		deleteDependencyRecordsForClass(TriggerRelationId, pg_trigger->oid,
+										TriggerRelationId,
+										DEPENDENCY_PARTITION_PRI);
+		deleteDependencyRecordsForClass(TriggerRelationId, pg_trigger->oid,
+										RelationRelationId,
+										DEPENDENCY_PARTITION_SEC);
+
+		/* remember this trigger to remove it below */
+		ObjectAddressSet(trig, TriggerRelationId, pg_trigger->oid);
+		add_exact_object_address(&trig, objects);
+	}
+
+	/* make the dependency removal visible to the deletion below */
+	CommandCounterIncrement();
+	performMultipleDeletions(objects, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
+
+	/* done */
+	free_object_addresses(objects);
+	systable_endscan(scan);
+	table_close(tgrel, RowExclusiveLock);
+}
+
+/*
+ * Before acquiring lock on an index, acquire the same lock on the owning
+ * table.
+ */
+struct AttachIndexCallbackState
+{
+	Oid			partitionOid;
+	Oid			parentTblOid;
+	bool		lockedParentTbl;
+};
+
+static void
+RangeVarCallbackForAttachIndex(const RangeVar *rv, Oid relOid, Oid oldRelOid,
+							   void *arg)
+{
+	struct AttachIndexCallbackState *state;
+	Form_pg_class classform;
+	HeapTuple	tuple;
+
+	state = (struct AttachIndexCallbackState *) arg;
+
+	if (!state->lockedParentTbl)
+	{
+		LockRelationOid(state->parentTblOid, AccessShareLock);
+		state->lockedParentTbl = true;
+	}
+
+	/*
+	 * If we previously locked some other heap, and the name we're looking up
+	 * no longer refers to an index on that relation, release the now-useless
+	 * lock.  XXX maybe we should do *after* we verify whether the index does
+	 * not actually belong to the same relation ...
+	 */
+	if (relOid != oldRelOid && OidIsValid(state->partitionOid))
+	{
+		UnlockRelationOid(state->partitionOid, AccessShareLock);
+		state->partitionOid = InvalidOid;
+	}
+
+	/* Didn't find a relation, so no need for locking or permission checks. */
+	if (!OidIsValid(relOid))
+		return;
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relOid));
+	if (!HeapTupleIsValid(tuple))
+		return;					/* concurrently dropped, so nothing to do */
+	classform = (Form_pg_class) GETSTRUCT(tuple);
+	if (classform->relkind != RELKIND_PARTITIONED_INDEX &&
+		classform->relkind != RELKIND_INDEX)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("\"%s\" is not an index", rv->relname)));
+	ReleaseSysCache(tuple);
+
+	/*
+	 * Since we need only examine the heap's tupledesc, an access share lock
+	 * on it (preventing any DDL) is sufficient.
+	 */
+	state->partitionOid = IndexGetRelation(relOid, false);
+	LockRelationOid(state->partitionOid, AccessShareLock);
+}
+
+/*
+ * ALTER INDEX i1 ATTACH PARTITION i2
+ */
+static ObjectAddress
+ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name)
+{
+	Relation	partIdx;
+	Relation	partTbl;
+	Relation	parentTbl;
+	ObjectAddress address;
+	Oid			partIdxId;
+	Oid			currParent;
+	struct AttachIndexCallbackState state;
+
+	/*
+	 * We need to obtain lock on the index 'name' to modify it, but we also
+	 * need to read its owning table's tuple descriptor -- so we need to lock
+	 * both.  To avoid deadlocks, obtain lock on the table before doing so on
+	 * the index.  Furthermore, we need to examine the parent table of the
+	 * partition, so lock that one too.
+	 */
+	state.partitionOid = InvalidOid;
+	state.parentTblOid = parentIdx->rd_index->indrelid;
+	state.lockedParentTbl = false;
+	partIdxId =
+		RangeVarGetRelidExtended(name, AccessExclusiveLock, 0,
+								 RangeVarCallbackForAttachIndex,
+								 (void *) &state);
+	/* Not there? */
+	if (!OidIsValid(partIdxId))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("index \"%s\" does not exist", name->relname)));
+
+	/* no deadlock risk: RangeVarGetRelidExtended already acquired the lock */
+	partIdx = relation_open(partIdxId, AccessExclusiveLock);
+
+	/* we already hold locks on both tables, so this is safe: */
+	parentTbl = relation_open(parentIdx->rd_index->indrelid, AccessShareLock);
+	partTbl = relation_open(partIdx->rd_index->indrelid, NoLock);
+
+	ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partIdx));
+
+	/* Silently do nothing if already in the right state */
+	currParent = partIdx->rd_rel->relispartition ?
+		get_partition_parent(partIdxId, false) : InvalidOid;
+	if (currParent != RelationGetRelid(parentIdx))
+	{
+		IndexInfo  *childInfo;
+		IndexInfo  *parentInfo;
+		AttrMap    *attmap;
+		bool		found;
+		int			i;
+		PartitionDesc partDesc;
+		Oid			constraintOid,
+					cldConstrId = InvalidOid;
+
+		/*
+		 * If this partition already has an index attached, refuse the
+		 * operation.
+		 */
+		refuseDupeIndexAttach(parentIdx, partIdx, partTbl);
+
+		if (OidIsValid(currParent))
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("cannot attach index \"%s\" as a partition of index \"%s\"",
+							RelationGetRelationName(partIdx),
+							RelationGetRelationName(parentIdx)),
+					 errdetail("Index \"%s\" is already attached to another index.",
+							   RelationGetRelationName(partIdx))));
+
+		/* Make sure it indexes a partition of the other index's table */
+		partDesc = RelationGetPartitionDesc(parentTbl, true);
+		found = false;
+		for (i = 0; i < partDesc->nparts; i++)
+		{
+			if (partDesc->oids[i] == state.partitionOid)
+			{
+				found = true;
+				break;
+			}
+		}
+		if (!found)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("cannot attach index \"%s\" as a partition of index \"%s\"",
+							RelationGetRelationName(partIdx),
+							RelationGetRelationName(parentIdx)),
+					 errdetail("Index \"%s\" is not an index on any partition of table \"%s\".",
+							   RelationGetRelationName(partIdx),
+							   RelationGetRelationName(parentTbl))));
+
+		/* Ensure the indexes are compatible */
+		childInfo = BuildIndexInfo(partIdx);
+		parentInfo = BuildIndexInfo(parentIdx);
+		attmap = build_attrmap_by_name(RelationGetDescr(partTbl),
+									   RelationGetDescr(parentTbl));
+		if (!CompareIndexInfo(childInfo, parentInfo,
+							  partIdx->rd_indcollation,
+							  parentIdx->rd_indcollation,
+							  partIdx->rd_opfamily,
+							  parentIdx->rd_opfamily,
+							  attmap))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("cannot attach index \"%s\" as a partition of index \"%s\"",
+							RelationGetRelationName(partIdx),
+							RelationGetRelationName(parentIdx)),
+					 errdetail("The index definitions do not match.")));
+
+		/*
+		 * If there is a constraint in the parent, make sure there is one in
+		 * the child too.
+		 */
+		constraintOid = get_relation_idx_constraint_oid(RelationGetRelid(parentTbl),
+														RelationGetRelid(parentIdx));
+
+		if (OidIsValid(constraintOid))
+		{
+			cldConstrId = get_relation_idx_constraint_oid(RelationGetRelid(partTbl),
+														  partIdxId);
+			if (!OidIsValid(cldConstrId))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("cannot attach index \"%s\" as a partition of index \"%s\"",
+								RelationGetRelationName(partIdx),
+								RelationGetRelationName(parentIdx)),
+						 errdetail("The index \"%s\" belongs to a constraint in table \"%s\" but no constraint exists for index \"%s\".",
+								   RelationGetRelationName(parentIdx),
+								   RelationGetRelationName(parentTbl),
+								   RelationGetRelationName(partIdx))));
+		}
+
+		/* All good -- do it */
+		IndexSetParentIndex(partIdx, RelationGetRelid(parentIdx));
+		if (OidIsValid(constraintOid))
+			ConstraintSetParentConstraint(cldConstrId, constraintOid,
+										  RelationGetRelid(partTbl));
+
+		free_attrmap(attmap);
+
+		validatePartitionedIndex(parentIdx, parentTbl);
+	}
+
+	relation_close(parentTbl, AccessShareLock);
+	/* keep these locks till commit */
+	relation_close(partTbl, NoLock);
+	relation_close(partIdx, NoLock);
+
+	return address;
+}
+
+/*
+ * Verify whether the given partition already contains an index attached
+ * to the given partitioned index.  If so, raise an error.
+ */
+static void
+refuseDupeIndexAttach(Relation parentIdx, Relation partIdx, Relation partitionTbl)
+{
+	Oid			existingIdx;
+
+	existingIdx = index_get_partition(partitionTbl,
+									  RelationGetRelid(parentIdx));
+	if (OidIsValid(existingIdx))
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("cannot attach index \"%s\" as a partition of index \"%s\"",
+						RelationGetRelationName(partIdx),
+						RelationGetRelationName(parentIdx)),
+				 errdetail("Another index is already attached for partition \"%s\".",
+						   RelationGetRelationName(partitionTbl))));
+}
+
+/*
+ * Verify whether the set of attached partition indexes to a parent index on
+ * a partitioned table is complete.  If it is, mark the parent index valid.
+ *
+ * This should be called each time a partition index is attached.
+ */
+static void
+validatePartitionedIndex(Relation partedIdx, Relation partedTbl)
+{
+	Relation	inheritsRel;
+	SysScanDesc scan;
+	ScanKeyData key;
+	int			tuples = 0;
+	HeapTuple	inhTup;
+	bool		updated = false;
+
+	Assert(partedIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX);
+
+	/*
+	 * Scan pg_inherits for this parent index.  Count each valid index we find
+	 * (verifying the pg_index entry for each), and if we reach the total
+	 * amount we expect, we can mark this parent index as valid.
+	 */
+	inheritsRel = table_open(InheritsRelationId, AccessShareLock);
+	ScanKeyInit(&key, Anum_pg_inherits_inhparent,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(partedIdx)));
+	scan = systable_beginscan(inheritsRel, InheritsParentIndexId, true,
+							  NULL, 1, &key);
+	while ((inhTup = systable_getnext(scan)) != NULL)
+	{
+		Form_pg_inherits inhForm = (Form_pg_inherits) GETSTRUCT(inhTup);
+		HeapTuple	indTup;
+		Form_pg_index indexForm;
+
+		indTup = SearchSysCache1(INDEXRELID,
+								 ObjectIdGetDatum(inhForm->inhrelid));
+		if (!HeapTupleIsValid(indTup))
+			elog(ERROR, "cache lookup failed for index %u", inhForm->inhrelid);
+		indexForm = (Form_pg_index) GETSTRUCT(indTup);
+		if (indexForm->indisvalid)
+			tuples += 1;
+		ReleaseSysCache(indTup);
+	}
+
+	/* Done with pg_inherits */
+	systable_endscan(scan);
+	table_close(inheritsRel, AccessShareLock);
+
+	/*
+	 * If we found as many inherited indexes as the partitioned table has
+	 * partitions, we're good; update pg_index to set indisvalid.
+	 */
+	if (tuples == RelationGetPartitionDesc(partedTbl, true)->nparts)
+	{
+		Relation	idxRel;
+		HeapTuple	indTup;
+		Form_pg_index indexForm;
+
+		idxRel = table_open(IndexRelationId, RowExclusiveLock);
+		indTup = SearchSysCacheCopy1(INDEXRELID,
+									 ObjectIdGetDatum(RelationGetRelid(partedIdx)));
+		if (!HeapTupleIsValid(indTup))
+			elog(ERROR, "cache lookup failed for index %u",
+				 RelationGetRelid(partedIdx));
+		indexForm = (Form_pg_index) GETSTRUCT(indTup);
+
+		indexForm->indisvalid = true;
+		updated = true;
+
+		CatalogTupleUpdate(idxRel, &indTup->t_self, indTup);
+
+		table_close(idxRel, RowExclusiveLock);
+		heap_freetuple(indTup);
+	}
+
+	/*
+	 * If this index is in turn a partition of a larger index, validating it
+	 * might cause the parent to become valid also.  Try that.
+	 */
+	if (updated && partedIdx->rd_rel->relispartition)
+	{
+		Oid			parentIdxId,
+					parentTblId;
+		Relation	parentIdx,
+					parentTbl;
+
+		/* make sure we see the validation we just did */
+		CommandCounterIncrement();
+
+		parentIdxId = get_partition_parent(RelationGetRelid(partedIdx), false);
+		parentTblId = get_partition_parent(RelationGetRelid(partedTbl), false);
+		parentIdx = relation_open(parentIdxId, AccessExclusiveLock);
+		parentTbl = relation_open(parentTblId, AccessExclusiveLock);
+		Assert(!parentIdx->rd_index->indisvalid);
+
+		validatePartitionedIndex(parentIdx, parentTbl);
+
+		relation_close(parentIdx, AccessExclusiveLock);
+		relation_close(parentTbl, AccessExclusiveLock);
+	}
+}
+
+/*
+ * Return an OID list of constraints that reference the given relation
+ * that are marked as having a parent constraints.
+ */
+static List *
+GetParentedForeignKeyRefs(Relation partition)
+{
+	Relation	pg_constraint;
+	HeapTuple	tuple;
+	SysScanDesc scan;
+	ScanKeyData key[2];
+	List	   *constraints = NIL;
+
+	/*
+	 * If no indexes, or no columns are referenceable by FKs, we can avoid the
+	 * scan.
+	 */
+	if (RelationGetIndexList(partition) == NIL ||
+		bms_is_empty(RelationGetIndexAttrBitmap(partition,
+												INDEX_ATTR_BITMAP_KEY)))
+		return NIL;
+
+	/* Search for constraints referencing this table */
+	pg_constraint = table_open(ConstraintRelationId, AccessShareLock);
+	ScanKeyInit(&key[0],
+				Anum_pg_constraint_confrelid, BTEqualStrategyNumber,
+				F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(partition)));
+	ScanKeyInit(&key[1],
+				Anum_pg_constraint_contype, BTEqualStrategyNumber,
+				F_CHAREQ, CharGetDatum(CONSTRAINT_FOREIGN));
+
+	/* XXX This is a seqscan, as we don't have a usable index */
+	scan = systable_beginscan(pg_constraint, InvalidOid, true, NULL, 2, key);
+	while ((tuple = systable_getnext(scan)) != NULL)
+	{
+		Form_pg_constraint constrForm = (Form_pg_constraint) GETSTRUCT(tuple);
+
+		/*
+		 * We only need to process constraints that are part of larger ones.
+		 */
+		if (!OidIsValid(constrForm->conparentid))
+			continue;
+
+		constraints = lappend_oid(constraints, constrForm->oid);
+	}
+
+	systable_endscan(scan);
+	table_close(pg_constraint, AccessShareLock);
+
+	return constraints;
+}
+
+/*
+ * During DETACH PARTITION, verify that any foreign keys pointing to the
+ * partitioned table would not become invalid.  An error is raised if any
+ * referenced values exist.
+ */
+static void
+ATDetachCheckNoForeignKeyRefs(Relation partition)
+{
+	List	   *constraints;
+	ListCell   *cell;
+
+	constraints = GetParentedForeignKeyRefs(partition);
+
+	foreach(cell, constraints)
+	{
+		Oid			constrOid = lfirst_oid(cell);
+		HeapTuple	tuple;
+		Form_pg_constraint constrForm;
+		Relation	rel;
+		Trigger		trig;
+
+		tuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(constrOid));
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "cache lookup failed for constraint %u", constrOid);
+		constrForm = (Form_pg_constraint) GETSTRUCT(tuple);
+
+		Assert(OidIsValid(constrForm->conparentid));
+		Assert(constrForm->confrelid == RelationGetRelid(partition));
+
+		/* prevent data changes into the referencing table until commit */
+		rel = table_open(constrForm->conrelid, ShareLock);
+
+		MemSet(&trig, 0, sizeof(trig));
+		trig.tgoid = InvalidOid;
+		trig.tgname = NameStr(constrForm->conname);
+		trig.tgenabled = TRIGGER_FIRES_ON_ORIGIN;
+		trig.tgisinternal = true;
+		trig.tgconstrrelid = RelationGetRelid(partition);
+		trig.tgconstrindid = constrForm->conindid;
+		trig.tgconstraint = constrForm->oid;
+		trig.tgdeferrable = false;
+		trig.tginitdeferred = false;
+		/* we needn't fill in remaining fields */
+
+		RI_PartitionRemove_Check(&trig, rel, partition);
+
+		ReleaseSysCache(tuple);
+
+		table_close(rel, NoLock);
+	}
+}
+
+/*
+ * resolve column compression specification to compression method.
+ */
+static char
+GetAttributeCompression(Oid atttypid, char *compression)
+{
+	char		cmethod;
+
+	if (compression == NULL || strcmp(compression, "default") == 0)
+		return InvalidCompressionMethod;
+
+	/*
+	 * To specify a nondefault method, the column data type must be toastable.
+	 * Note this says nothing about whether the column's attstorage setting
+	 * permits compression; we intentionally allow attstorage and
+	 * attcompression to be independent.  But with a non-toastable type,
+	 * attstorage could not be set to a value that would permit compression.
+	 *
+	 * We don't actually need to enforce this, since nothing bad would happen
+	 * if attcompression were non-default; it would never be consulted.  But
+	 * it seems more user-friendly to complain about a certainly-useless
+	 * attempt to set the property.
+	 */
+	if (!TypeIsToastable(atttypid))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("column data type %s does not support compression",
+						format_type_be(atttypid))));
+
+	cmethod = CompressionNameToMethod(compression);
+	if (!CompressionMethodIsValid(cmethod))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid compression method \"%s\"", compression)));
+
+	return cmethod;
+}
diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c
new file mode 100644
index 0000000..9bdfef9
--- /dev/null
+++ b/src/backend/commands/tablespace.c
@@ -0,0 +1,1595 @@
+/*-------------------------------------------------------------------------
+ *
+ * tablespace.c
+ *	  Commands to manipulate table spaces
+ *
+ * Tablespaces in PostgreSQL are designed to allow users to determine
+ * where the data file(s) for a given database object reside on the file
+ * system.
+ *
+ * A tablespace represents a directory on the file system. At tablespace
+ * creation time, the directory must be empty. To simplify things and
+ * remove the possibility of having file name conflicts, we isolate
+ * files within a tablespace into database-specific subdirectories.
+ *
+ * To support file access via the information given in RelFileNode, we
+ * maintain a symbolic-link map in $PGDATA/pg_tblspc. The symlinks are
+ * named by tablespace OIDs and point to the actual tablespace directories.
+ * There is also a per-cluster version directory in each tablespace.
+ * Thus the full path to an arbitrary file is
+ *			$PGDATA/pg_tblspc/spcoid/PG_MAJORVER_CATVER/dboid/relfilenode
+ * e.g.
+ *			$PGDATA/pg_tblspc/20981/PG_9.0_201002161/719849/83292814
+ *
+ * There are two tablespaces created at initdb time: pg_global (for shared
+ * tables) and pg_default (for everything else).  For backwards compatibility
+ * and to remain functional on platforms without symlinks, these tablespaces
+ * are accessed specially: they are respectively
+ *			$PGDATA/global/relfilenode
+ *			$PGDATA/base/dboid/relfilenode
+ *
+ * To allow CREATE DATABASE to give a new database a default tablespace
+ * that's different from the template database's default, we make the
+ * provision that a zero in pg_class.reltablespace means the database's
+ * default tablespace.  Without this, CREATE DATABASE would have to go in
+ * and munge the system catalogs of the new database.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/tablespace.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <unistd.h>
+#include <dirent.h>
+#include <sys/stat.h>
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/reloptions.h"
+#include "access/sysattr.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "access/xloginsert.h"
+#include "access/xlogutils.h"
+#include "catalog/binary_upgrade.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_tablespace.h"
+#include "commands/comment.h"
+#include "commands/seclabel.h"
+#include "commands/tablecmds.h"
+#include "commands/tablespace.h"
+#include "common/file_perm.h"
+#include "miscadmin.h"
+#include "postmaster/bgwriter.h"
+#include "storage/fd.h"
+#include "storage/lmgr.h"
+#include "storage/standby.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/varlena.h"
+
+/* GUC variables */
+char	   *default_tablespace = NULL;
+char	   *temp_tablespaces = NULL;
+bool		allow_in_place_tablespaces = false;
+
+Oid			binary_upgrade_next_pg_tablespace_oid = InvalidOid;
+
+static void create_tablespace_directories(const char *location,
+										  const Oid tablespaceoid);
+static bool destroy_tablespace_directories(Oid tablespaceoid, bool redo);
+
+
+/*
+ * Each database using a table space is isolated into its own name space
+ * by a subdirectory named for the database OID.  On first creation of an
+ * object in the tablespace, create the subdirectory.  If the subdirectory
+ * already exists, fall through quietly.
+ *
+ * isRedo indicates that we are creating an object during WAL replay.
+ * In this case we will cope with the possibility of the tablespace
+ * directory not being there either --- this could happen if we are
+ * replaying an operation on a table in a subsequently-dropped tablespace.
+ * We handle this by making a directory in the place where the tablespace
+ * symlink would normally be.  This isn't an exact replay of course, but
+ * it's the best we can do given the available information.
+ *
+ * If tablespaces are not supported, we still need it in case we have to
+ * re-create a database subdirectory (of $PGDATA/base) during WAL replay.
+ */
+void
+TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo)
+{
+	struct stat st;
+	char	   *dir;
+
+	/*
+	 * The global tablespace doesn't have per-database subdirectories, so
+	 * nothing to do for it.
+	 */
+	if (spcNode == GLOBALTABLESPACE_OID)
+		return;
+
+	Assert(OidIsValid(spcNode));
+	Assert(OidIsValid(dbNode));
+
+	dir = GetDatabasePath(dbNode, spcNode);
+
+	if (stat(dir, &st) < 0)
+	{
+		/* Directory does not exist? */
+		if (errno == ENOENT)
+		{
+			/*
+			 * Acquire TablespaceCreateLock to ensure that no DROP TABLESPACE
+			 * or TablespaceCreateDbspace is running concurrently.
+			 */
+			LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
+
+			/*
+			 * Recheck to see if someone created the directory while we were
+			 * waiting for lock.
+			 */
+			if (stat(dir, &st) == 0 && S_ISDIR(st.st_mode))
+			{
+				/* Directory was created */
+			}
+			else
+			{
+				/* Directory creation failed? */
+				if (MakePGDirectory(dir) < 0)
+				{
+					/* Failure other than not exists or not in WAL replay? */
+					if (errno != ENOENT || !isRedo)
+						ereport(ERROR,
+								(errcode_for_file_access(),
+								 errmsg("could not create directory \"%s\": %m",
+										dir)));
+
+					/*
+					 * During WAL replay, it's conceivable that several levels
+					 * of directories are missing if tablespaces are dropped
+					 * further ahead of the WAL stream than we're currently
+					 * replaying.  An easy way forward is to create them as
+					 * plain directories and hope they are removed by further
+					 * WAL replay if necessary.  If this also fails, there is
+					 * trouble we cannot get out of, so just report that and
+					 * bail out.
+					 */
+					if (pg_mkdir_p(dir, pg_dir_create_mode) < 0)
+						ereport(ERROR,
+								(errcode_for_file_access(),
+								 errmsg("could not create directory \"%s\": %m",
+										dir)));
+				}
+			}
+
+			LWLockRelease(TablespaceCreateLock);
+		}
+		else
+		{
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not stat directory \"%s\": %m", dir)));
+		}
+	}
+	else
+	{
+		/* Is it not a directory? */
+		if (!S_ISDIR(st.st_mode))
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("\"%s\" exists but is not a directory",
+							dir)));
+	}
+
+	pfree(dir);
+}
+
+/*
+ * Create a table space
+ *
+ * Only superusers can create a tablespace. This seems a reasonable restriction
+ * since we're determining the system layout and, anyway, we probably have
+ * root if we're doing this kind of activity
+ */
+Oid
+CreateTableSpace(CreateTableSpaceStmt *stmt)
+{
+#ifdef HAVE_SYMLINK
+	Relation	rel;
+	Datum		values[Natts_pg_tablespace];
+	bool		nulls[Natts_pg_tablespace];
+	HeapTuple	tuple;
+	Oid			tablespaceoid;
+	char	   *location;
+	Oid			ownerId;
+	Datum		newOptions;
+	bool		in_place;
+
+	/* Must be superuser */
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied to create tablespace \"%s\"",
+						stmt->tablespacename),
+				 errhint("Must be superuser to create a tablespace.")));
+
+	/* However, the eventual owner of the tablespace need not be */
+	if (stmt->owner)
+		ownerId = get_rolespec_oid(stmt->owner, false);
+	else
+		ownerId = GetUserId();
+
+	/* Unix-ify the offered path, and strip any trailing slashes */
+	location = pstrdup(stmt->location);
+	canonicalize_path(location);
+
+	/* disallow quotes, else CREATE DATABASE would be at risk */
+	if (strchr(location, '\''))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_NAME),
+				 errmsg("tablespace location cannot contain single quotes")));
+
+	in_place = allow_in_place_tablespaces && strlen(location) == 0;
+
+	/*
+	 * Allowing relative paths seems risky
+	 *
+	 * This also helps us ensure that location is not empty or whitespace,
+	 * unless specifying a developer-only in-place tablespace.
+	 */
+	if (!in_place && !is_absolute_path(location))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("tablespace location must be an absolute path")));
+
+	/*
+	 * Check that location isn't too long. Remember that we're going to append
+	 * 'PG_XXX/<dboid>/<relid>_<fork>.<nnn>'.  FYI, we never actually
+	 * reference the whole path here, but MakePGDirectory() uses the first two
+	 * parts.
+	 */
+	if (strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 +
+		OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1 + OIDCHARS > MAXPGPATH)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("tablespace location \"%s\" is too long",
+						location)));
+
+	/* Warn if the tablespace is in the data directory. */
+	if (path_is_prefix_of_path(DataDir, location))
+		ereport(WARNING,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("tablespace location should not be inside the data directory")));
+
+	/*
+	 * Disallow creation of tablespaces named "pg_xxx"; we reserve this
+	 * namespace for system purposes.
+	 */
+	if (!allowSystemTableMods && IsReservedName(stmt->tablespacename))
+		ereport(ERROR,
+				(errcode(ERRCODE_RESERVED_NAME),
+				 errmsg("unacceptable tablespace name \"%s\"",
+						stmt->tablespacename),
+				 errdetail("The prefix \"pg_\" is reserved for system tablespaces.")));
+
+	/*
+	 * If built with appropriate switch, whine when regression-testing
+	 * conventions for tablespace names are violated.
+	 */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+	if (strncmp(stmt->tablespacename, "regress_", 8) != 0)
+		elog(WARNING, "tablespaces created by regression test cases should have names starting with \"regress_\"");
+#endif
+
+	/*
+	 * Check that there is no other tablespace by this name.  (The unique
+	 * index would catch this anyway, but might as well give a friendlier
+	 * message.)
+	 */
+	if (OidIsValid(get_tablespace_oid(stmt->tablespacename, true)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("tablespace \"%s\" already exists",
+						stmt->tablespacename)));
+
+	/*
+	 * Insert tuple into pg_tablespace.  The purpose of doing this first is to
+	 * lock the proposed tablename against other would-be creators. The
+	 * insertion will roll back if we find problems below.
+	 */
+	rel = table_open(TableSpaceRelationId, RowExclusiveLock);
+
+	MemSet(nulls, false, sizeof(nulls));
+
+	if (IsBinaryUpgrade)
+	{
+		/* Use binary-upgrade override for tablespace oid */
+		if (!OidIsValid(binary_upgrade_next_pg_tablespace_oid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("pg_tablespace OID value not set when in binary upgrade mode")));
+
+		tablespaceoid = binary_upgrade_next_pg_tablespace_oid;
+		binary_upgrade_next_pg_tablespace_oid = InvalidOid;
+	}
+	else
+		tablespaceoid = GetNewOidWithIndex(rel, TablespaceOidIndexId,
+										   Anum_pg_tablespace_oid);
+	values[Anum_pg_tablespace_oid - 1] = ObjectIdGetDatum(tablespaceoid);
+	values[Anum_pg_tablespace_spcname - 1] =
+		DirectFunctionCall1(namein, CStringGetDatum(stmt->tablespacename));
+	values[Anum_pg_tablespace_spcowner - 1] =
+		ObjectIdGetDatum(ownerId);
+	nulls[Anum_pg_tablespace_spcacl - 1] = true;
+
+	/* Generate new proposed spcoptions (text array) */
+	newOptions = transformRelOptions((Datum) 0,
+									 stmt->options,
+									 NULL, NULL, false, false);
+	(void) tablespace_reloptions(newOptions, true);
+	if (newOptions != (Datum) 0)
+		values[Anum_pg_tablespace_spcoptions - 1] = newOptions;
+	else
+		nulls[Anum_pg_tablespace_spcoptions - 1] = true;
+
+	tuple = heap_form_tuple(rel->rd_att, values, nulls);
+
+	CatalogTupleInsert(rel, tuple);
+
+	heap_freetuple(tuple);
+
+	/* Record dependency on owner */
+	recordDependencyOnOwner(TableSpaceRelationId, tablespaceoid, ownerId);
+
+	/* Post creation hook for new tablespace */
+	InvokeObjectPostCreateHook(TableSpaceRelationId, tablespaceoid, 0);
+
+	create_tablespace_directories(location, tablespaceoid);
+
+	/* Record the filesystem change in XLOG */
+	{
+		xl_tblspc_create_rec xlrec;
+
+		xlrec.ts_id = tablespaceoid;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec,
+						 offsetof(xl_tblspc_create_rec, ts_path));
+		XLogRegisterData((char *) location, strlen(location) + 1);
+
+		(void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE);
+	}
+
+	/*
+	 * Force synchronous commit, to minimize the window between creating the
+	 * symlink on-disk and marking the transaction committed.  It's not great
+	 * that there is any window at all, but definitely we don't want to make
+	 * it larger than necessary.
+	 */
+	ForceSyncCommit();
+
+	pfree(location);
+
+	/* We keep the lock on pg_tablespace until commit */
+	table_close(rel, NoLock);
+
+	return tablespaceoid;
+#else							/* !HAVE_SYMLINK */
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("tablespaces are not supported on this platform")));
+	return InvalidOid;			/* keep compiler quiet */
+#endif							/* HAVE_SYMLINK */
+}
+
+/*
+ * Drop a table space
+ *
+ * Be careful to check that the tablespace is empty.
+ */
+void
+DropTableSpace(DropTableSpaceStmt *stmt)
+{
+#ifdef HAVE_SYMLINK
+	char	   *tablespacename = stmt->tablespacename;
+	TableScanDesc scandesc;
+	Relation	rel;
+	HeapTuple	tuple;
+	Form_pg_tablespace spcform;
+	ScanKeyData entry[1];
+	Oid			tablespaceoid;
+	char	   *detail;
+	char	   *detail_log;
+
+	/*
+	 * Find the target tuple
+	 */
+	rel = table_open(TableSpaceRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&entry[0],
+				Anum_pg_tablespace_spcname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(tablespacename));
+	scandesc = table_beginscan_catalog(rel, 1, entry);
+	tuple = heap_getnext(scandesc, ForwardScanDirection);
+
+	if (!HeapTupleIsValid(tuple))
+	{
+		if (!stmt->missing_ok)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("tablespace \"%s\" does not exist",
+							tablespacename)));
+		}
+		else
+		{
+			ereport(NOTICE,
+					(errmsg("tablespace \"%s\" does not exist, skipping",
+							tablespacename)));
+			table_endscan(scandesc);
+			table_close(rel, NoLock);
+		}
+		return;
+	}
+
+	spcform = (Form_pg_tablespace) GETSTRUCT(tuple);
+	tablespaceoid = spcform->oid;
+
+	/* Must be tablespace owner */
+	if (!pg_tablespace_ownercheck(tablespaceoid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TABLESPACE,
+					   tablespacename);
+
+	/* Disallow drop of the standard tablespaces, even by superuser */
+	if (IsPinnedObject(TableSpaceRelationId, tablespaceoid))
+		aclcheck_error(ACLCHECK_NO_PRIV, OBJECT_TABLESPACE,
+					   tablespacename);
+
+	/* Check for pg_shdepend entries depending on this tablespace */
+	if (checkSharedDependencies(TableSpaceRelationId, tablespaceoid,
+								&detail, &detail_log))
+		ereport(ERROR,
+				(errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST),
+				 errmsg("tablespace \"%s\" cannot be dropped because some objects depend on it",
+						tablespacename),
+				 errdetail_internal("%s", detail),
+				 errdetail_log("%s", detail_log)));
+
+	/* DROP hook for the tablespace being removed */
+	InvokeObjectDropHook(TableSpaceRelationId, tablespaceoid, 0);
+
+	/*
+	 * Remove the pg_tablespace tuple (this will roll back if we fail below)
+	 */
+	CatalogTupleDelete(rel, &tuple->t_self);
+
+	table_endscan(scandesc);
+
+	/*
+	 * Remove any comments or security labels on this tablespace.
+	 */
+	DeleteSharedComments(tablespaceoid, TableSpaceRelationId);
+	DeleteSharedSecurityLabel(tablespaceoid, TableSpaceRelationId);
+
+	/*
+	 * Remove dependency on owner.
+	 */
+	deleteSharedDependencyRecordsFor(TableSpaceRelationId, tablespaceoid, 0);
+
+	/*
+	 * Acquire TablespaceCreateLock to ensure that no TablespaceCreateDbspace
+	 * is running concurrently.
+	 */
+	LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
+
+	/*
+	 * Try to remove the physical infrastructure.
+	 */
+	if (!destroy_tablespace_directories(tablespaceoid, false))
+	{
+		/*
+		 * Not all files deleted?  However, there can be lingering empty files
+		 * in the directories, left behind by for example DROP TABLE, that
+		 * have been scheduled for deletion at next checkpoint (see comments
+		 * in mdunlink() for details).  We could just delete them immediately,
+		 * but we can't tell them apart from important data files that we
+		 * mustn't delete.  So instead, we force a checkpoint which will clean
+		 * out any lingering files, and try again.
+		 */
+		RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+
+		/*
+		 * On Windows, an unlinked file persists in the directory listing
+		 * until no process retains an open handle for the file.  The DDL
+		 * commands that schedule files for unlink send invalidation messages
+		 * directing other PostgreSQL processes to close the files, but
+		 * nothing guarantees they'll be processed in time.  So, we'll also
+		 * use a global barrier to ask all backends to close all files, and
+		 * wait until they're finished.
+		 */
+		LWLockRelease(TablespaceCreateLock);
+		WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
+		LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
+
+		/* And now try again. */
+		if (!destroy_tablespace_directories(tablespaceoid, false))
+		{
+			/* Still not empty, the files must be important then */
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("tablespace \"%s\" is not empty",
+							tablespacename)));
+		}
+	}
+
+	/* Record the filesystem change in XLOG */
+	{
+		xl_tblspc_drop_rec xlrec;
+
+		xlrec.ts_id = tablespaceoid;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, sizeof(xl_tblspc_drop_rec));
+
+		(void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP);
+	}
+
+	/*
+	 * Note: because we checked that the tablespace was empty, there should be
+	 * no need to worry about flushing shared buffers or free space map
+	 * entries for relations in the tablespace.
+	 */
+
+	/*
+	 * Force synchronous commit, to minimize the window between removing the
+	 * files on-disk and marking the transaction committed.  It's not great
+	 * that there is any window at all, but definitely we don't want to make
+	 * it larger than necessary.
+	 */
+	ForceSyncCommit();
+
+	/*
+	 * Allow TablespaceCreateDbspace again.
+	 */
+	LWLockRelease(TablespaceCreateLock);
+
+	/* We keep the lock on pg_tablespace until commit */
+	table_close(rel, NoLock);
+#else							/* !HAVE_SYMLINK */
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("tablespaces are not supported on this platform")));
+#endif							/* HAVE_SYMLINK */
+}
+
+
+/*
+ * create_tablespace_directories
+ *
+ *	Attempt to create filesystem infrastructure linking $PGDATA/pg_tblspc/
+ *	to the specified directory
+ */
+static void
+create_tablespace_directories(const char *location, const Oid tablespaceoid)
+{
+	char	   *linkloc;
+	char	   *location_with_version_dir;
+	struct stat st;
+	bool		in_place;
+
+	linkloc = psprintf("pg_tblspc/%u", tablespaceoid);
+
+	/*
+	 * If we're asked to make an 'in place' tablespace, create the directory
+	 * directly where the symlink would normally go.  This is a developer-only
+	 * option for now, to facilitate regression testing.
+	 */
+	in_place = strlen(location) == 0;
+
+	if (in_place)
+	{
+		if (MakePGDirectory(linkloc) < 0 && errno != EEXIST)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not create directory \"%s\": %m",
+							linkloc)));
+	}
+
+	location_with_version_dir = psprintf("%s/%s", in_place ? linkloc : location,
+										 TABLESPACE_VERSION_DIRECTORY);
+
+	/*
+	 * Attempt to coerce target directory to safe permissions.  If this fails,
+	 * it doesn't exist or has the wrong owner.  Not needed for in-place mode,
+	 * because in that case we created the directory with the desired
+	 * permissions.
+	 */
+	if (!in_place && chmod(location, pg_dir_create_mode) != 0)
+	{
+		if (errno == ENOENT)
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_FILE),
+					 errmsg("directory \"%s\" does not exist", location),
+					 InRecovery ? errhint("Create this directory for the tablespace before "
+										  "restarting the server.") : 0));
+		else
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not set permissions on directory \"%s\": %m",
+							location)));
+	}
+
+	/*
+	 * The creation of the version directory prevents more than one tablespace
+	 * in a single location.  This imitates TablespaceCreateDbspace(), but it
+	 * ignores concurrency and missing parent directories.  The chmod() would
+	 * have failed in the absence of a parent.  pg_tablespace_spcname_index
+	 * prevents concurrency.
+	 */
+	if (stat(location_with_version_dir, &st) < 0)
+	{
+		if (errno != ENOENT)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not stat directory \"%s\": %m",
+							location_with_version_dir)));
+		else if (MakePGDirectory(location_with_version_dir) < 0)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not create directory \"%s\": %m",
+							location_with_version_dir)));
+	}
+	else if (!S_ISDIR(st.st_mode))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" exists but is not a directory",
+						location_with_version_dir)));
+	else if (!InRecovery)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_IN_USE),
+				 errmsg("directory \"%s\" already in use as a tablespace",
+						location_with_version_dir)));
+
+	/*
+	 * In recovery, remove old symlink, in case it points to the wrong place.
+	 */
+	if (!in_place && InRecovery)
+		remove_tablespace_symlink(linkloc);
+
+	/*
+	 * Create the symlink under PGDATA
+	 */
+	if (!in_place && symlink(location, linkloc) < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not create symbolic link \"%s\": %m",
+						linkloc)));
+
+	pfree(linkloc);
+	pfree(location_with_version_dir);
+}
+
+
+/*
+ * destroy_tablespace_directories
+ *
+ * Attempt to remove filesystem infrastructure for the tablespace.
+ *
+ * 'redo' indicates we are redoing a drop from XLOG; in that case we should
+ * not throw an ERROR for problems, just LOG them.  The worst consequence of
+ * not removing files here would be failure to release some disk space, which
+ * does not justify throwing an error that would require manual intervention
+ * to get the database running again.
+ *
+ * Returns true if successful, false if some subdirectory is not empty
+ */
+static bool
+destroy_tablespace_directories(Oid tablespaceoid, bool redo)
+{
+	char	   *linkloc;
+	char	   *linkloc_with_version_dir;
+	DIR		   *dirdesc;
+	struct dirent *de;
+	char	   *subfile;
+	struct stat st;
+
+	linkloc_with_version_dir = psprintf("pg_tblspc/%u/%s", tablespaceoid,
+										TABLESPACE_VERSION_DIRECTORY);
+
+	/*
+	 * Check if the tablespace still contains any files.  We try to rmdir each
+	 * per-database directory we find in it.  rmdir failure implies there are
+	 * still files in that subdirectory, so give up.  (We do not have to worry
+	 * about undoing any already completed rmdirs, since the next attempt to
+	 * use the tablespace from that database will simply recreate the
+	 * subdirectory via TablespaceCreateDbspace.)
+	 *
+	 * Since we hold TablespaceCreateLock, no one else should be creating any
+	 * fresh subdirectories in parallel. It is possible that new files are
+	 * being created within subdirectories, though, so the rmdir call could
+	 * fail.  Worst consequence is a less friendly error message.
+	 *
+	 * If redo is true then ENOENT is a likely outcome here, and we allow it
+	 * to pass without comment.  In normal operation we still allow it, but
+	 * with a warning.  This is because even though ProcessUtility disallows
+	 * DROP TABLESPACE in a transaction block, it's possible that a previous
+	 * DROP failed and rolled back after removing the tablespace directories
+	 * and/or symlink.  We want to allow a new DROP attempt to succeed at
+	 * removing the catalog entries (and symlink if still present), so we
+	 * should not give a hard error here.
+	 */
+	dirdesc = AllocateDir(linkloc_with_version_dir);
+	if (dirdesc == NULL)
+	{
+		if (errno == ENOENT)
+		{
+			if (!redo)
+				ereport(WARNING,
+						(errcode_for_file_access(),
+						 errmsg("could not open directory \"%s\": %m",
+								linkloc_with_version_dir)));
+			/* The symlink might still exist, so go try to remove it */
+			goto remove_symlink;
+		}
+		else if (redo)
+		{
+			/* in redo, just log other types of error */
+			ereport(LOG,
+					(errcode_for_file_access(),
+					 errmsg("could not open directory \"%s\": %m",
+							linkloc_with_version_dir)));
+			pfree(linkloc_with_version_dir);
+			return false;
+		}
+		/* else let ReadDir report the error */
+	}
+
+	while ((de = ReadDir(dirdesc, linkloc_with_version_dir)) != NULL)
+	{
+		if (strcmp(de->d_name, ".") == 0 ||
+			strcmp(de->d_name, "..") == 0)
+			continue;
+
+		subfile = psprintf("%s/%s", linkloc_with_version_dir, de->d_name);
+
+		/* This check is just to deliver a friendlier error message */
+		if (!redo && !directory_is_empty(subfile))
+		{
+			FreeDir(dirdesc);
+			pfree(subfile);
+			pfree(linkloc_with_version_dir);
+			return false;
+		}
+
+		/* remove empty directory */
+		if (rmdir(subfile) < 0)
+			ereport(redo ? LOG : ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not remove directory \"%s\": %m",
+							subfile)));
+
+		pfree(subfile);
+	}
+
+	FreeDir(dirdesc);
+
+	/* remove version directory */
+	if (rmdir(linkloc_with_version_dir) < 0)
+	{
+		ereport(redo ? LOG : ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not remove directory \"%s\": %m",
+						linkloc_with_version_dir)));
+		pfree(linkloc_with_version_dir);
+		return false;
+	}
+
+	/*
+	 * Try to remove the symlink.  We must however deal with the possibility
+	 * that it's a directory instead of a symlink --- this could happen during
+	 * WAL replay (see TablespaceCreateDbspace), and it is also the case on
+	 * Windows where junction points lstat() as directories.
+	 *
+	 * Note: in the redo case, we'll return true if this final step fails;
+	 * there's no point in retrying it.  Also, ENOENT should provoke no more
+	 * than a warning.
+	 */
+remove_symlink:
+	linkloc = pstrdup(linkloc_with_version_dir);
+	get_parent_directory(linkloc);
+	if (lstat(linkloc, &st) < 0)
+	{
+		int			saved_errno = errno;
+
+		ereport(redo ? LOG : (saved_errno == ENOENT ? WARNING : ERROR),
+				(errcode_for_file_access(),
+				 errmsg("could not stat file \"%s\": %m",
+						linkloc)));
+	}
+	else if (S_ISDIR(st.st_mode))
+	{
+		if (rmdir(linkloc) < 0)
+		{
+			int			saved_errno = errno;
+
+			ereport(redo ? LOG : (saved_errno == ENOENT ? WARNING : ERROR),
+					(errcode_for_file_access(),
+					 errmsg("could not remove directory \"%s\": %m",
+							linkloc)));
+		}
+	}
+#ifdef S_ISLNK
+	else if (S_ISLNK(st.st_mode))
+	{
+		if (unlink(linkloc) < 0)
+		{
+			int			saved_errno = errno;
+
+			ereport(redo ? LOG : (saved_errno == ENOENT ? WARNING : ERROR),
+					(errcode_for_file_access(),
+					 errmsg("could not remove symbolic link \"%s\": %m",
+							linkloc)));
+		}
+	}
+#endif
+	else
+	{
+		/* Refuse to remove anything that's not a directory or symlink */
+		ereport(redo ? LOG : ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("\"%s\" is not a directory or symbolic link",
+						linkloc)));
+	}
+
+	pfree(linkloc_with_version_dir);
+	pfree(linkloc);
+
+	return true;
+}
+
+
+/*
+ * Check if a directory is empty.
+ *
+ * This probably belongs somewhere else, but not sure where...
+ */
+bool
+directory_is_empty(const char *path)
+{
+	DIR		   *dirdesc;
+	struct dirent *de;
+
+	dirdesc = AllocateDir(path);
+
+	while ((de = ReadDir(dirdesc, path)) != NULL)
+	{
+		if (strcmp(de->d_name, ".") == 0 ||
+			strcmp(de->d_name, "..") == 0)
+			continue;
+		FreeDir(dirdesc);
+		return false;
+	}
+
+	FreeDir(dirdesc);
+	return true;
+}
+
+/*
+ *	remove_tablespace_symlink
+ *
+ * This function removes symlinks in pg_tblspc.  On Windows, junction points
+ * act like directories so we must be able to apply rmdir.  This function
+ * works like the symlink removal code in destroy_tablespace_directories,
+ * except that failure to remove is always an ERROR.  But if the file doesn't
+ * exist at all, that's OK.
+ */
+void
+remove_tablespace_symlink(const char *linkloc)
+{
+	struct stat st;
+
+	if (lstat(linkloc, &st) < 0)
+	{
+		if (errno == ENOENT)
+			return;
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not stat file \"%s\": %m", linkloc)));
+	}
+
+	if (S_ISDIR(st.st_mode))
+	{
+		/*
+		 * This will fail if the directory isn't empty, but not if it's a
+		 * junction point.
+		 */
+		if (rmdir(linkloc) < 0 && errno != ENOENT)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not remove directory \"%s\": %m",
+							linkloc)));
+	}
+#ifdef S_ISLNK
+	else if (S_ISLNK(st.st_mode))
+	{
+		if (unlink(linkloc) < 0 && errno != ENOENT)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not remove symbolic link \"%s\": %m",
+							linkloc)));
+	}
+#endif
+	else
+	{
+		/* Refuse to remove anything that's not a directory or symlink */
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("\"%s\" is not a directory or symbolic link",
+						linkloc)));
+	}
+}
+
+/*
+ * Rename a tablespace
+ */
+ObjectAddress
+RenameTableSpace(const char *oldname, const char *newname)
+{
+	Oid			tspId;
+	Relation	rel;
+	ScanKeyData entry[1];
+	TableScanDesc scan;
+	HeapTuple	tup;
+	HeapTuple	newtuple;
+	Form_pg_tablespace newform;
+	ObjectAddress address;
+
+	/* Search pg_tablespace */
+	rel = table_open(TableSpaceRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&entry[0],
+				Anum_pg_tablespace_spcname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(oldname));
+	scan = table_beginscan_catalog(rel, 1, entry);
+	tup = heap_getnext(scan, ForwardScanDirection);
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("tablespace \"%s\" does not exist",
+						oldname)));
+
+	newtuple = heap_copytuple(tup);
+	newform = (Form_pg_tablespace) GETSTRUCT(newtuple);
+	tspId = newform->oid;
+
+	table_endscan(scan);
+
+	/* Must be owner */
+	if (!pg_tablespace_ownercheck(tspId, GetUserId()))
+		aclcheck_error(ACLCHECK_NO_PRIV, OBJECT_TABLESPACE, oldname);
+
+	/* Validate new name */
+	if (!allowSystemTableMods && IsReservedName(newname))
+		ereport(ERROR,
+				(errcode(ERRCODE_RESERVED_NAME),
+				 errmsg("unacceptable tablespace name \"%s\"", newname),
+				 errdetail("The prefix \"pg_\" is reserved for system tablespaces.")));
+
+	/*
+	 * If built with appropriate switch, whine when regression-testing
+	 * conventions for tablespace names are violated.
+	 */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+	if (strncmp(newname, "regress_", 8) != 0)
+		elog(WARNING, "tablespaces created by regression test cases should have names starting with \"regress_\"");
+#endif
+
+	/* Make sure the new name doesn't exist */
+	ScanKeyInit(&entry[0],
+				Anum_pg_tablespace_spcname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(newname));
+	scan = table_beginscan_catalog(rel, 1, entry);
+	tup = heap_getnext(scan, ForwardScanDirection);
+	if (HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("tablespace \"%s\" already exists",
+						newname)));
+
+	table_endscan(scan);
+
+	/* OK, update the entry */
+	namestrcpy(&(newform->spcname), newname);
+
+	CatalogTupleUpdate(rel, &newtuple->t_self, newtuple);
+
+	InvokeObjectPostAlterHook(TableSpaceRelationId, tspId, 0);
+
+	ObjectAddressSet(address, TableSpaceRelationId, tspId);
+
+	table_close(rel, NoLock);
+
+	return address;
+}
+
+/*
+ * Alter table space options
+ */
+Oid
+AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt)
+{
+	Relation	rel;
+	ScanKeyData entry[1];
+	TableScanDesc scandesc;
+	HeapTuple	tup;
+	Oid			tablespaceoid;
+	Datum		datum;
+	Datum		newOptions;
+	Datum		repl_val[Natts_pg_tablespace];
+	bool		isnull;
+	bool		repl_null[Natts_pg_tablespace];
+	bool		repl_repl[Natts_pg_tablespace];
+	HeapTuple	newtuple;
+
+	/* Search pg_tablespace */
+	rel = table_open(TableSpaceRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&entry[0],
+				Anum_pg_tablespace_spcname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(stmt->tablespacename));
+	scandesc = table_beginscan_catalog(rel, 1, entry);
+	tup = heap_getnext(scandesc, ForwardScanDirection);
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("tablespace \"%s\" does not exist",
+						stmt->tablespacename)));
+
+	tablespaceoid = ((Form_pg_tablespace) GETSTRUCT(tup))->oid;
+
+	/* Must be owner of the existing object */
+	if (!pg_tablespace_ownercheck(tablespaceoid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TABLESPACE,
+					   stmt->tablespacename);
+
+	/* Generate new proposed spcoptions (text array) */
+	datum = heap_getattr(tup, Anum_pg_tablespace_spcoptions,
+						 RelationGetDescr(rel), &isnull);
+	newOptions = transformRelOptions(isnull ? (Datum) 0 : datum,
+									 stmt->options, NULL, NULL, false,
+									 stmt->isReset);
+	(void) tablespace_reloptions(newOptions, true);
+
+	/* Build new tuple. */
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+	if (newOptions != (Datum) 0)
+		repl_val[Anum_pg_tablespace_spcoptions - 1] = newOptions;
+	else
+		repl_null[Anum_pg_tablespace_spcoptions - 1] = true;
+	repl_repl[Anum_pg_tablespace_spcoptions - 1] = true;
+	newtuple = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val,
+								 repl_null, repl_repl);
+
+	/* Update system catalog. */
+	CatalogTupleUpdate(rel, &newtuple->t_self, newtuple);
+
+	InvokeObjectPostAlterHook(TableSpaceRelationId, tablespaceoid, 0);
+
+	heap_freetuple(newtuple);
+
+	/* Conclude heap scan. */
+	table_endscan(scandesc);
+	table_close(rel, NoLock);
+
+	return tablespaceoid;
+}
+
+/*
+ * Routines for handling the GUC variable 'default_tablespace'.
+ */
+
+/* check_hook: validate new default_tablespace */
+bool
+check_default_tablespace(char **newval, void **extra, GucSource source)
+{
+	/*
+	 * If we aren't inside a transaction, or connected to a database, we
+	 * cannot do the catalog accesses necessary to verify the name.  Must
+	 * accept the value on faith.
+	 */
+	if (IsTransactionState() && MyDatabaseId != InvalidOid)
+	{
+		if (**newval != '\0' &&
+			!OidIsValid(get_tablespace_oid(*newval, true)))
+		{
+			/*
+			 * When source == PGC_S_TEST, don't throw a hard error for a
+			 * nonexistent tablespace, only a NOTICE.  See comments in guc.h.
+			 */
+			if (source == PGC_S_TEST)
+			{
+				ereport(NOTICE,
+						(errcode(ERRCODE_UNDEFINED_OBJECT),
+						 errmsg("tablespace \"%s\" does not exist",
+								*newval)));
+			}
+			else
+			{
+				GUC_check_errdetail("Tablespace \"%s\" does not exist.",
+									*newval);
+				return false;
+			}
+		}
+	}
+
+	return true;
+}
+
+/*
+ * GetDefaultTablespace -- get the OID of the current default tablespace
+ *
+ * Temporary objects have different default tablespaces, hence the
+ * relpersistence parameter must be specified.  Also, for partitioned tables,
+ * we disallow specifying the database default, so that needs to be specified
+ * too.
+ *
+ * May return InvalidOid to indicate "use the database's default tablespace".
+ *
+ * Note that caller is expected to check appropriate permissions for any
+ * result other than InvalidOid.
+ *
+ * This exists to hide (and possibly optimize the use of) the
+ * default_tablespace GUC variable.
+ */
+Oid
+GetDefaultTablespace(char relpersistence, bool partitioned)
+{
+	Oid			result;
+
+	/* The temp-table case is handled elsewhere */
+	if (relpersistence == RELPERSISTENCE_TEMP)
+	{
+		PrepareTempTablespaces();
+		return GetNextTempTableSpace();
+	}
+
+	/* Fast path for default_tablespace == "" */
+	if (default_tablespace == NULL || default_tablespace[0] == '\0')
+		return InvalidOid;
+
+	/*
+	 * It is tempting to cache this lookup for more speed, but then we would
+	 * fail to detect the case where the tablespace was dropped since the GUC
+	 * variable was set.  Note also that we don't complain if the value fails
+	 * to refer to an existing tablespace; we just silently return InvalidOid,
+	 * causing the new object to be created in the database's tablespace.
+	 */
+	result = get_tablespace_oid(default_tablespace, true);
+
+	/*
+	 * Allow explicit specification of database's default tablespace in
+	 * default_tablespace without triggering permissions checks.  Don't allow
+	 * specifying that when creating a partitioned table, however, since the
+	 * result is confusing.
+	 */
+	if (result == MyDatabaseTableSpace)
+	{
+		if (partitioned)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot specify default tablespace for partitioned relations")));
+		result = InvalidOid;
+	}
+	return result;
+}
+
+
+/*
+ * Routines for handling the GUC variable 'temp_tablespaces'.
+ */
+
+typedef struct
+{
+	/* Array of OIDs to be passed to SetTempTablespaces() */
+	int			numSpcs;
+	Oid			tblSpcs[FLEXIBLE_ARRAY_MEMBER];
+} temp_tablespaces_extra;
+
+/* check_hook: validate new temp_tablespaces */
+bool
+check_temp_tablespaces(char **newval, void **extra, GucSource source)
+{
+	char	   *rawname;
+	List	   *namelist;
+
+	/* Need a modifiable copy of string */
+	rawname = pstrdup(*newval);
+
+	/* Parse string into list of identifiers */
+	if (!SplitIdentifierString(rawname, ',', &namelist))
+	{
+		/* syntax error in name list */
+		GUC_check_errdetail("List syntax is invalid.");
+		pfree(rawname);
+		list_free(namelist);
+		return false;
+	}
+
+	/*
+	 * If we aren't inside a transaction, or connected to a database, we
+	 * cannot do the catalog accesses necessary to verify the name.  Must
+	 * accept the value on faith. Fortunately, there's then also no need to
+	 * pass the data to fd.c.
+	 */
+	if (IsTransactionState() && MyDatabaseId != InvalidOid)
+	{
+		temp_tablespaces_extra *myextra;
+		Oid		   *tblSpcs;
+		int			numSpcs;
+		ListCell   *l;
+
+		/* temporary workspace until we are done verifying the list */
+		tblSpcs = (Oid *) palloc(list_length(namelist) * sizeof(Oid));
+		numSpcs = 0;
+		foreach(l, namelist)
+		{
+			char	   *curname = (char *) lfirst(l);
+			Oid			curoid;
+			AclResult	aclresult;
+
+			/* Allow an empty string (signifying database default) */
+			if (curname[0] == '\0')
+			{
+				/* InvalidOid signifies database's default tablespace */
+				tblSpcs[numSpcs++] = InvalidOid;
+				continue;
+			}
+
+			/*
+			 * In an interactive SET command, we ereport for bad info.  When
+			 * source == PGC_S_TEST, don't throw a hard error for a
+			 * nonexistent tablespace, only a NOTICE.  See comments in guc.h.
+			 */
+			curoid = get_tablespace_oid(curname, source <= PGC_S_TEST);
+			if (curoid == InvalidOid)
+			{
+				if (source == PGC_S_TEST)
+					ereport(NOTICE,
+							(errcode(ERRCODE_UNDEFINED_OBJECT),
+							 errmsg("tablespace \"%s\" does not exist",
+									curname)));
+				continue;
+			}
+
+			/*
+			 * Allow explicit specification of database's default tablespace
+			 * in temp_tablespaces without triggering permissions checks.
+			 */
+			if (curoid == MyDatabaseTableSpace)
+			{
+				/* InvalidOid signifies database's default tablespace */
+				tblSpcs[numSpcs++] = InvalidOid;
+				continue;
+			}
+
+			/* Check permissions, similarly complaining only if interactive */
+			aclresult = pg_tablespace_aclcheck(curoid, GetUserId(),
+											   ACL_CREATE);
+			if (aclresult != ACLCHECK_OK)
+			{
+				if (source >= PGC_S_INTERACTIVE)
+					aclcheck_error(aclresult, OBJECT_TABLESPACE, curname);
+				continue;
+			}
+
+			tblSpcs[numSpcs++] = curoid;
+		}
+
+		/* Now prepare an "extra" struct for assign_temp_tablespaces */
+		myextra = malloc(offsetof(temp_tablespaces_extra, tblSpcs) +
+						 numSpcs * sizeof(Oid));
+		if (!myextra)
+			return false;
+		myextra->numSpcs = numSpcs;
+		memcpy(myextra->tblSpcs, tblSpcs, numSpcs * sizeof(Oid));
+		*extra = (void *) myextra;
+
+		pfree(tblSpcs);
+	}
+
+	pfree(rawname);
+	list_free(namelist);
+
+	return true;
+}
+
+/* assign_hook: do extra actions as needed */
+void
+assign_temp_tablespaces(const char *newval, void *extra)
+{
+	temp_tablespaces_extra *myextra = (temp_tablespaces_extra *) extra;
+
+	/*
+	 * If check_temp_tablespaces was executed inside a transaction, then pass
+	 * the list it made to fd.c.  Otherwise, clear fd.c's list; we must be
+	 * still outside a transaction, or else restoring during transaction exit,
+	 * and in either case we can just let the next PrepareTempTablespaces call
+	 * make things sane.
+	 */
+	if (myextra)
+		SetTempTablespaces(myextra->tblSpcs, myextra->numSpcs);
+	else
+		SetTempTablespaces(NULL, 0);
+}
+
+/*
+ * PrepareTempTablespaces -- prepare to use temp tablespaces
+ *
+ * If we have not already done so in the current transaction, parse the
+ * temp_tablespaces GUC variable and tell fd.c which tablespace(s) to use
+ * for temp files.
+ */
+void
+PrepareTempTablespaces(void)
+{
+	char	   *rawname;
+	List	   *namelist;
+	Oid		   *tblSpcs;
+	int			numSpcs;
+	ListCell   *l;
+
+	/* No work if already done in current transaction */
+	if (TempTablespacesAreSet())
+		return;
+
+	/*
+	 * Can't do catalog access unless within a transaction.  This is just a
+	 * safety check in case this function is called by low-level code that
+	 * could conceivably execute outside a transaction.  Note that in such a
+	 * scenario, fd.c will fall back to using the current database's default
+	 * tablespace, which should always be OK.
+	 */
+	if (!IsTransactionState())
+		return;
+
+	/* Need a modifiable copy of string */
+	rawname = pstrdup(temp_tablespaces);
+
+	/* Parse string into list of identifiers */
+	if (!SplitIdentifierString(rawname, ',', &namelist))
+	{
+		/* syntax error in name list */
+		SetTempTablespaces(NULL, 0);
+		pfree(rawname);
+		list_free(namelist);
+		return;
+	}
+
+	/* Store tablespace OIDs in an array in TopTransactionContext */
+	tblSpcs = (Oid *) MemoryContextAlloc(TopTransactionContext,
+										 list_length(namelist) * sizeof(Oid));
+	numSpcs = 0;
+	foreach(l, namelist)
+	{
+		char	   *curname = (char *) lfirst(l);
+		Oid			curoid;
+		AclResult	aclresult;
+
+		/* Allow an empty string (signifying database default) */
+		if (curname[0] == '\0')
+		{
+			/* InvalidOid signifies database's default tablespace */
+			tblSpcs[numSpcs++] = InvalidOid;
+			continue;
+		}
+
+		/* Else verify that name is a valid tablespace name */
+		curoid = get_tablespace_oid(curname, true);
+		if (curoid == InvalidOid)
+		{
+			/* Skip any bad list elements */
+			continue;
+		}
+
+		/*
+		 * Allow explicit specification of database's default tablespace in
+		 * temp_tablespaces without triggering permissions checks.
+		 */
+		if (curoid == MyDatabaseTableSpace)
+		{
+			/* InvalidOid signifies database's default tablespace */
+			tblSpcs[numSpcs++] = InvalidOid;
+			continue;
+		}
+
+		/* Check permissions similarly */
+		aclresult = pg_tablespace_aclcheck(curoid, GetUserId(),
+										   ACL_CREATE);
+		if (aclresult != ACLCHECK_OK)
+			continue;
+
+		tblSpcs[numSpcs++] = curoid;
+	}
+
+	SetTempTablespaces(tblSpcs, numSpcs);
+
+	pfree(rawname);
+	list_free(namelist);
+}
+
+
+/*
+ * get_tablespace_oid - given a tablespace name, look up the OID
+ *
+ * If missing_ok is false, throw an error if tablespace name not found.  If
+ * true, just return InvalidOid.
+ */
+Oid
+get_tablespace_oid(const char *tablespacename, bool missing_ok)
+{
+	Oid			result;
+	Relation	rel;
+	TableScanDesc scandesc;
+	HeapTuple	tuple;
+	ScanKeyData entry[1];
+
+	/*
+	 * Search pg_tablespace.  We use a heapscan here even though there is an
+	 * index on name, on the theory that pg_tablespace will usually have just
+	 * a few entries and so an indexed lookup is a waste of effort.
+	 */
+	rel = table_open(TableSpaceRelationId, AccessShareLock);
+
+	ScanKeyInit(&entry[0],
+				Anum_pg_tablespace_spcname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(tablespacename));
+	scandesc = table_beginscan_catalog(rel, 1, entry);
+	tuple = heap_getnext(scandesc, ForwardScanDirection);
+
+	/* We assume that there can be at most one matching tuple */
+	if (HeapTupleIsValid(tuple))
+		result = ((Form_pg_tablespace) GETSTRUCT(tuple))->oid;
+	else
+		result = InvalidOid;
+
+	table_endscan(scandesc);
+	table_close(rel, AccessShareLock);
+
+	if (!OidIsValid(result) && !missing_ok)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("tablespace \"%s\" does not exist",
+						tablespacename)));
+
+	return result;
+}
+
+/*
+ * get_tablespace_name - given a tablespace OID, look up the name
+ *
+ * Returns a palloc'd string, or NULL if no such tablespace.
+ */
+char *
+get_tablespace_name(Oid spc_oid)
+{
+	char	   *result;
+	Relation	rel;
+	TableScanDesc scandesc;
+	HeapTuple	tuple;
+	ScanKeyData entry[1];
+
+	/*
+	 * Search pg_tablespace.  We use a heapscan here even though there is an
+	 * index on oid, on the theory that pg_tablespace will usually have just a
+	 * few entries and so an indexed lookup is a waste of effort.
+	 */
+	rel = table_open(TableSpaceRelationId, AccessShareLock);
+
+	ScanKeyInit(&entry[0],
+				Anum_pg_tablespace_oid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(spc_oid));
+	scandesc = table_beginscan_catalog(rel, 1, entry);
+	tuple = heap_getnext(scandesc, ForwardScanDirection);
+
+	/* We assume that there can be at most one matching tuple */
+	if (HeapTupleIsValid(tuple))
+		result = pstrdup(NameStr(((Form_pg_tablespace) GETSTRUCT(tuple))->spcname));
+	else
+		result = NULL;
+
+	table_endscan(scandesc);
+	table_close(rel, AccessShareLock);
+
+	return result;
+}
+
+
+/*
+ * TABLESPACE resource manager's routines
+ */
+void
+tblspc_redo(XLogReaderState *record)
+{
+	uint8		info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+
+	/* Backup blocks are not used in tblspc records */
+	Assert(!XLogRecHasAnyBlockRefs(record));
+
+	if (info == XLOG_TBLSPC_CREATE)
+	{
+		xl_tblspc_create_rec *xlrec = (xl_tblspc_create_rec *) XLogRecGetData(record);
+		char	   *location = xlrec->ts_path;
+
+		create_tablespace_directories(location, xlrec->ts_id);
+	}
+	else if (info == XLOG_TBLSPC_DROP)
+	{
+		xl_tblspc_drop_rec *xlrec = (xl_tblspc_drop_rec *) XLogRecGetData(record);
+
+		/* Close all smgr fds in all backends. */
+		WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
+
+		/*
+		 * If we issued a WAL record for a drop tablespace it implies that
+		 * there were no files in it at all when the DROP was done. That means
+		 * that no permanent objects can exist in it at this point.
+		 *
+		 * It is possible for standby users to be using this tablespace as a
+		 * location for their temporary files, so if we fail to remove all
+		 * files then do conflict processing and try again, if currently
+		 * enabled.
+		 *
+		 * Other possible reasons for failure include bollixed file
+		 * permissions on a standby server when they were okay on the primary,
+		 * etc etc. There's not much we can do about that, so just remove what
+		 * we can and press on.
+		 */
+		if (!destroy_tablespace_directories(xlrec->ts_id, true))
+		{
+			ResolveRecoveryConflictWithTablespace(xlrec->ts_id);
+
+			/*
+			 * If we did recovery processing then hopefully the backends who
+			 * wrote temp files should have cleaned up and exited by now.  So
+			 * retry before complaining.  If we fail again, this is just a LOG
+			 * condition, because it's not worth throwing an ERROR for (as
+			 * that would crash the database and require manual intervention
+			 * before we could get past this WAL record on restart).
+			 */
+			if (!destroy_tablespace_directories(xlrec->ts_id, true))
+				ereport(LOG,
+						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						 errmsg("directories for tablespace %u could not be removed",
+								xlrec->ts_id),
+						 errhint("You can remove the directories manually if necessary.")));
+		}
+	}
+	else
+		elog(PANIC, "tblspc_redo: unknown op code %u", info);
+}
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
new file mode 100644
index 0000000..0769ae3
--- /dev/null
+++ b/src/backend/commands/trigger.c
@@ -0,0 +1,6664 @@
+/*-------------------------------------------------------------------------
+ *
+ * trigger.c
+ *	  PostgreSQL TRIGGERs support code.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/trigger.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/relation.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/index.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/partition.h"
+#include "catalog/pg_constraint.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_trigger.h"
+#include "catalog/pg_type.h"
+#include "commands/dbcommands.h"
+#include "commands/defrem.h"
+#include "commands/trigger.h"
+#include "executor/executor.h"
+#include "executor/execPartition.h"
+#include "miscadmin.h"
+#include "nodes/bitmapset.h"
+#include "nodes/makefuncs.h"
+#include "optimizer/optimizer.h"
+#include "parser/parse_clause.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_func.h"
+#include "parser/parse_relation.h"
+#include "parser/parsetree.h"
+#include "partitioning/partdesc.h"
+#include "pgstat.h"
+#include "rewrite/rewriteManip.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/bytea.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/tuplestore.h"
+
+
+/* GUC variables */
+int			SessionReplicationRole = SESSION_REPLICATION_ROLE_ORIGIN;
+
+/* How many levels deep into trigger execution are we? */
+static int	MyTriggerDepth = 0;
+
+/* Local function prototypes */
+static void renametrig_internal(Relation tgrel, Relation targetrel,
+								HeapTuple trigtup, const char *newname,
+								const char *expected_name);
+static void renametrig_partition(Relation tgrel, Oid partitionId,
+								 Oid parentTriggerOid, const char *newname,
+								 const char *expected_name);
+static void SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger);
+static bool GetTupleForTrigger(EState *estate,
+							   EPQState *epqstate,
+							   ResultRelInfo *relinfo,
+							   ItemPointer tid,
+							   LockTupleMode lockmode,
+							   TupleTableSlot *oldslot,
+							   TupleTableSlot **epqslot,
+							   TM_Result *tmresultp,
+							   TM_FailureData *tmfdp);
+static bool TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
+						   Trigger *trigger, TriggerEvent event,
+						   Bitmapset *modifiedCols,
+						   TupleTableSlot *oldslot, TupleTableSlot *newslot);
+static HeapTuple ExecCallTriggerFunc(TriggerData *trigdata,
+									 int tgindx,
+									 FmgrInfo *finfo,
+									 Instrumentation *instr,
+									 MemoryContext per_tuple_context);
+static void AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
+								  ResultRelInfo *src_partinfo,
+								  ResultRelInfo *dst_partinfo,
+								  int event, bool row_trigger,
+								  TupleTableSlot *oldtup, TupleTableSlot *newtup,
+								  List *recheckIndexes, Bitmapset *modifiedCols,
+								  TransitionCaptureState *transition_capture,
+								  bool is_crosspart_update);
+static void AfterTriggerEnlargeQueryState(void);
+static bool before_stmt_triggers_fired(Oid relid, CmdType cmdType);
+
+
+/*
+ * Create a trigger.  Returns the address of the created trigger.
+ *
+ * queryString is the source text of the CREATE TRIGGER command.
+ * This must be supplied if a whenClause is specified, else it can be NULL.
+ *
+ * relOid, if nonzero, is the relation on which the trigger should be
+ * created.  If zero, the name provided in the statement will be looked up.
+ *
+ * refRelOid, if nonzero, is the relation to which the constraint trigger
+ * refers.  If zero, the constraint relation name provided in the statement
+ * will be looked up as needed.
+ *
+ * constraintOid, if nonzero, says that this trigger is being created
+ * internally to implement that constraint.  A suitable pg_depend entry will
+ * be made to link the trigger to that constraint.  constraintOid is zero when
+ * executing a user-entered CREATE TRIGGER command.  (For CREATE CONSTRAINT
+ * TRIGGER, we build a pg_constraint entry internally.)
+ *
+ * indexOid, if nonzero, is the OID of an index associated with the constraint.
+ * We do nothing with this except store it into pg_trigger.tgconstrindid;
+ * but when creating a trigger for a deferrable unique constraint on a
+ * partitioned table, its children are looked up.  Note we don't cope with
+ * invalid indexes in that case.
+ *
+ * funcoid, if nonzero, is the OID of the function to invoke.  When this is
+ * given, stmt->funcname is ignored.
+ *
+ * parentTriggerOid, if nonzero, is a trigger that begets this one; so that
+ * if that trigger is dropped, this one should be too.  There are two cases
+ * when a nonzero value is passed for this: 1) when this function recurses to
+ * create the trigger on partitions, 2) when creating child foreign key
+ * triggers; see CreateFKCheckTrigger() and createForeignKeyActionTriggers().
+ *
+ * If whenClause is passed, it is an already-transformed expression for
+ * WHEN.  In this case, we ignore any that may come in stmt->whenClause.
+ *
+ * If isInternal is true then this is an internally-generated trigger.
+ * This argument sets the tgisinternal field of the pg_trigger entry, and
+ * if true causes us to modify the given trigger name to ensure uniqueness.
+ *
+ * When isInternal is not true we require ACL_TRIGGER permissions on the
+ * relation, as well as ACL_EXECUTE on the trigger function.  For internal
+ * triggers the caller must apply any required permission checks.
+ *
+ * When called on partitioned tables, this function recurses to create the
+ * trigger on all the partitions, except if isInternal is true, in which
+ * case caller is expected to execute recursion on its own.  in_partition
+ * indicates such a recursive call; outside callers should pass "false"
+ * (but see CloneRowTriggersToPartition).
+ */
+ObjectAddress
+CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
+			  Oid relOid, Oid refRelOid, Oid constraintOid, Oid indexOid,
+			  Oid funcoid, Oid parentTriggerOid, Node *whenClause,
+			  bool isInternal, bool in_partition)
+{
+	return
+		CreateTriggerFiringOn(stmt, queryString, relOid, refRelOid,
+							  constraintOid, indexOid, funcoid,
+							  parentTriggerOid, whenClause, isInternal,
+							  in_partition, TRIGGER_FIRES_ON_ORIGIN);
+}
+
+/*
+ * Like the above; additionally the firing condition
+ * (always/origin/replica/disabled) can be specified.
+ */
+ObjectAddress
+CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString,
+					  Oid relOid, Oid refRelOid, Oid constraintOid,
+					  Oid indexOid, Oid funcoid, Oid parentTriggerOid,
+					  Node *whenClause, bool isInternal, bool in_partition,
+					  char trigger_fires_when)
+{
+	int16		tgtype;
+	int			ncolumns;
+	int16	   *columns;
+	int2vector *tgattr;
+	List	   *whenRtable;
+	char	   *qual;
+	Datum		values[Natts_pg_trigger];
+	bool		nulls[Natts_pg_trigger];
+	Relation	rel;
+	AclResult	aclresult;
+	Relation	tgrel;
+	Relation	pgrel;
+	HeapTuple	tuple = NULL;
+	Oid			funcrettype;
+	Oid			trigoid = InvalidOid;
+	char		internaltrigname[NAMEDATALEN];
+	char	   *trigname;
+	Oid			constrrelid = InvalidOid;
+	ObjectAddress myself,
+				referenced;
+	char	   *oldtablename = NULL;
+	char	   *newtablename = NULL;
+	bool		partition_recurse;
+	bool		trigger_exists = false;
+	Oid			existing_constraint_oid = InvalidOid;
+	bool		existing_isInternal = false;
+	bool		existing_isClone = false;
+
+	if (OidIsValid(relOid))
+		rel = table_open(relOid, ShareRowExclusiveLock);
+	else
+		rel = table_openrv(stmt->relation, ShareRowExclusiveLock);
+
+	/*
+	 * Triggers must be on tables or views, and there are additional
+	 * relation-type-specific restrictions.
+	 */
+	if (rel->rd_rel->relkind == RELKIND_RELATION)
+	{
+		/* Tables can't have INSTEAD OF triggers */
+		if (stmt->timing != TRIGGER_TYPE_BEFORE &&
+			stmt->timing != TRIGGER_TYPE_AFTER)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("\"%s\" is a table",
+							RelationGetRelationName(rel)),
+					 errdetail("Tables cannot have INSTEAD OF triggers.")));
+	}
+	else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		/* Partitioned tables can't have INSTEAD OF triggers */
+		if (stmt->timing != TRIGGER_TYPE_BEFORE &&
+			stmt->timing != TRIGGER_TYPE_AFTER)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("\"%s\" is a table",
+							RelationGetRelationName(rel)),
+					 errdetail("Tables cannot have INSTEAD OF triggers.")));
+
+		/*
+		 * FOR EACH ROW triggers have further restrictions
+		 */
+		if (stmt->row)
+		{
+			/*
+			 * Disallow use of transition tables.
+			 *
+			 * Note that we have another restriction about transition tables
+			 * in partitions; search for 'has_superclass' below for an
+			 * explanation.  The check here is just to protect from the fact
+			 * that if we allowed it here, the creation would succeed for a
+			 * partitioned table with no partitions, but would be blocked by
+			 * the other restriction when the first partition was created,
+			 * which is very unfriendly behavior.
+			 */
+			if (stmt->transitionRels != NIL)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("\"%s\" is a partitioned table",
+								RelationGetRelationName(rel)),
+						 errdetail("ROW triggers with transition tables are not supported on partitioned tables.")));
+		}
+	}
+	else if (rel->rd_rel->relkind == RELKIND_VIEW)
+	{
+		/*
+		 * Views can have INSTEAD OF triggers (which we check below are
+		 * row-level), or statement-level BEFORE/AFTER triggers.
+		 */
+		if (stmt->timing != TRIGGER_TYPE_INSTEAD && stmt->row)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("\"%s\" is a view",
+							RelationGetRelationName(rel)),
+					 errdetail("Views cannot have row-level BEFORE or AFTER triggers.")));
+		/* Disallow TRUNCATE triggers on VIEWs */
+		if (TRIGGER_FOR_TRUNCATE(stmt->events))
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("\"%s\" is a view",
+							RelationGetRelationName(rel)),
+					 errdetail("Views cannot have TRUNCATE triggers.")));
+	}
+	else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+	{
+		if (stmt->timing != TRIGGER_TYPE_BEFORE &&
+			stmt->timing != TRIGGER_TYPE_AFTER)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("\"%s\" is a foreign table",
+							RelationGetRelationName(rel)),
+					 errdetail("Foreign tables cannot have INSTEAD OF triggers.")));
+
+		if (TRIGGER_FOR_TRUNCATE(stmt->events))
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("\"%s\" is a foreign table",
+							RelationGetRelationName(rel)),
+					 errdetail("Foreign tables cannot have TRUNCATE triggers.")));
+
+		/*
+		 * We disallow constraint triggers to protect the assumption that
+		 * triggers on FKs can't be deferred.  See notes with AfterTriggers
+		 * data structures, below.
+		 */
+		if (stmt->isconstraint)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("\"%s\" is a foreign table",
+							RelationGetRelationName(rel)),
+					 errdetail("Foreign tables cannot have constraint triggers.")));
+	}
+	else
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("relation \"%s\" cannot have triggers",
+						RelationGetRelationName(rel)),
+				 errdetail_relkind_not_supported(rel->rd_rel->relkind)));
+
+	if (!allowSystemTableMods && IsSystemRelation(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						RelationGetRelationName(rel))));
+
+	if (stmt->isconstraint)
+	{
+		/*
+		 * We must take a lock on the target relation to protect against
+		 * concurrent drop.  It's not clear that AccessShareLock is strong
+		 * enough, but we certainly need at least that much... otherwise, we
+		 * might end up creating a pg_constraint entry referencing a
+		 * nonexistent table.
+		 */
+		if (OidIsValid(refRelOid))
+		{
+			LockRelationOid(refRelOid, AccessShareLock);
+			constrrelid = refRelOid;
+		}
+		else if (stmt->constrrel != NULL)
+			constrrelid = RangeVarGetRelid(stmt->constrrel, AccessShareLock,
+										   false);
+	}
+
+	/* permission checks */
+	if (!isInternal)
+	{
+		aclresult = pg_class_aclcheck(RelationGetRelid(rel), GetUserId(),
+									  ACL_TRIGGER);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, get_relkind_objtype(rel->rd_rel->relkind),
+						   RelationGetRelationName(rel));
+
+		if (OidIsValid(constrrelid))
+		{
+			aclresult = pg_class_aclcheck(constrrelid, GetUserId(),
+										  ACL_TRIGGER);
+			if (aclresult != ACLCHECK_OK)
+				aclcheck_error(aclresult, get_relkind_objtype(get_rel_relkind(constrrelid)),
+							   get_rel_name(constrrelid));
+		}
+	}
+
+	/*
+	 * When called on a partitioned table to create a FOR EACH ROW trigger
+	 * that's not internal, we create one trigger for each partition, too.
+	 *
+	 * For that, we'd better hold lock on all of them ahead of time.
+	 */
+	partition_recurse = !isInternal && stmt->row &&
+		rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
+	if (partition_recurse)
+		list_free(find_all_inheritors(RelationGetRelid(rel),
+									  ShareRowExclusiveLock, NULL));
+
+	/* Compute tgtype */
+	TRIGGER_CLEAR_TYPE(tgtype);
+	if (stmt->row)
+		TRIGGER_SETT_ROW(tgtype);
+	tgtype |= stmt->timing;
+	tgtype |= stmt->events;
+
+	/* Disallow ROW-level TRUNCATE triggers */
+	if (TRIGGER_FOR_ROW(tgtype) && TRIGGER_FOR_TRUNCATE(tgtype))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("TRUNCATE FOR EACH ROW triggers are not supported")));
+
+	/* INSTEAD triggers must be row-level, and can't have WHEN or columns */
+	if (TRIGGER_FOR_INSTEAD(tgtype))
+	{
+		if (!TRIGGER_FOR_ROW(tgtype))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("INSTEAD OF triggers must be FOR EACH ROW")));
+		if (stmt->whenClause)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("INSTEAD OF triggers cannot have WHEN conditions")));
+		if (stmt->columns != NIL)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("INSTEAD OF triggers cannot have column lists")));
+	}
+
+	/*
+	 * We don't yet support naming ROW transition variables, but the parser
+	 * recognizes the syntax so we can give a nicer message here.
+	 *
+	 * Per standard, REFERENCING TABLE names are only allowed on AFTER
+	 * triggers.  Per standard, REFERENCING ROW names are not allowed with FOR
+	 * EACH STATEMENT.  Per standard, each OLD/NEW, ROW/TABLE permutation is
+	 * only allowed once.  Per standard, OLD may not be specified when
+	 * creating a trigger only for INSERT, and NEW may not be specified when
+	 * creating a trigger only for DELETE.
+	 *
+	 * Notice that the standard allows an AFTER ... FOR EACH ROW trigger to
+	 * reference both ROW and TABLE transition data.
+	 */
+	if (stmt->transitionRels != NIL)
+	{
+		List	   *varList = stmt->transitionRels;
+		ListCell   *lc;
+
+		foreach(lc, varList)
+		{
+			TriggerTransition *tt = lfirst_node(TriggerTransition, lc);
+
+			if (!(tt->isTable))
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("ROW variable naming in the REFERENCING clause is not supported"),
+						 errhint("Use OLD TABLE or NEW TABLE for naming transition tables.")));
+
+			/*
+			 * Because of the above test, we omit further ROW-related testing
+			 * below.  If we later allow naming OLD and NEW ROW variables,
+			 * adjustments will be needed below.
+			 */
+
+			if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+				ereport(ERROR,
+						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+						 errmsg("\"%s\" is a foreign table",
+								RelationGetRelationName(rel)),
+						 errdetail("Triggers on foreign tables cannot have transition tables.")));
+
+			if (rel->rd_rel->relkind == RELKIND_VIEW)
+				ereport(ERROR,
+						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+						 errmsg("\"%s\" is a view",
+								RelationGetRelationName(rel)),
+						 errdetail("Triggers on views cannot have transition tables.")));
+
+			/*
+			 * We currently don't allow row-level triggers with transition
+			 * tables on partition or inheritance children.  Such triggers
+			 * would somehow need to see tuples converted to the format of the
+			 * table they're attached to, and it's not clear which subset of
+			 * tuples each child should see.  See also the prohibitions in
+			 * ATExecAttachPartition() and ATExecAddInherit().
+			 */
+			if (TRIGGER_FOR_ROW(tgtype) && has_superclass(rel->rd_id))
+			{
+				/* Use appropriate error message. */
+				if (rel->rd_rel->relispartition)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("ROW triggers with transition tables are not supported on partitions")));
+				else
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("ROW triggers with transition tables are not supported on inheritance children")));
+			}
+
+			if (stmt->timing != TRIGGER_TYPE_AFTER)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("transition table name can only be specified for an AFTER trigger")));
+
+			if (TRIGGER_FOR_TRUNCATE(tgtype))
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("TRUNCATE triggers with transition tables are not supported")));
+
+			/*
+			 * We currently don't allow multi-event triggers ("INSERT OR
+			 * UPDATE") with transition tables, because it's not clear how to
+			 * handle INSERT ... ON CONFLICT statements which can fire both
+			 * INSERT and UPDATE triggers.  We show the inserted tuples to
+			 * INSERT triggers and the updated tuples to UPDATE triggers, but
+			 * it's not yet clear what INSERT OR UPDATE trigger should see.
+			 * This restriction could be lifted if we can decide on the right
+			 * semantics in a later release.
+			 */
+			if (((TRIGGER_FOR_INSERT(tgtype) ? 1 : 0) +
+				 (TRIGGER_FOR_UPDATE(tgtype) ? 1 : 0) +
+				 (TRIGGER_FOR_DELETE(tgtype) ? 1 : 0)) != 1)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("transition tables cannot be specified for triggers with more than one event")));
+
+			/*
+			 * We currently don't allow column-specific triggers with
+			 * transition tables.  Per spec, that seems to require
+			 * accumulating separate transition tables for each combination of
+			 * columns, which is a lot of work for a rather marginal feature.
+			 */
+			if (stmt->columns != NIL)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("transition tables cannot be specified for triggers with column lists")));
+
+			/*
+			 * We disallow constraint triggers with transition tables, to
+			 * protect the assumption that such triggers can't be deferred.
+			 * See notes with AfterTriggers data structures, below.
+			 *
+			 * Currently this is enforced by the grammar, so just Assert here.
+			 */
+			Assert(!stmt->isconstraint);
+
+			if (tt->isNew)
+			{
+				if (!(TRIGGER_FOR_INSERT(tgtype) ||
+					  TRIGGER_FOR_UPDATE(tgtype)))
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("NEW TABLE can only be specified for an INSERT or UPDATE trigger")));
+
+				if (newtablename != NULL)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("NEW TABLE cannot be specified multiple times")));
+
+				newtablename = tt->name;
+			}
+			else
+			{
+				if (!(TRIGGER_FOR_DELETE(tgtype) ||
+					  TRIGGER_FOR_UPDATE(tgtype)))
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("OLD TABLE can only be specified for a DELETE or UPDATE trigger")));
+
+				if (oldtablename != NULL)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("OLD TABLE cannot be specified multiple times")));
+
+				oldtablename = tt->name;
+			}
+		}
+
+		if (newtablename != NULL && oldtablename != NULL &&
+			strcmp(newtablename, oldtablename) == 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("OLD TABLE name and NEW TABLE name cannot be the same")));
+	}
+
+	/*
+	 * Parse the WHEN clause, if any and we weren't passed an already
+	 * transformed one.
+	 *
+	 * Note that as a side effect, we fill whenRtable when parsing.  If we got
+	 * an already parsed clause, this does not occur, which is what we want --
+	 * no point in adding redundant dependencies below.
+	 */
+	if (!whenClause && stmt->whenClause)
+	{
+		ParseState *pstate;
+		ParseNamespaceItem *nsitem;
+		List	   *varList;
+		ListCell   *lc;
+
+		/* Set up a pstate to parse with */
+		pstate = make_parsestate(NULL);
+		pstate->p_sourcetext = queryString;
+
+		/*
+		 * Set up nsitems for OLD and NEW references.
+		 *
+		 * 'OLD' must always have varno equal to 1 and 'NEW' equal to 2.
+		 */
+		nsitem = addRangeTableEntryForRelation(pstate, rel,
+											   AccessShareLock,
+											   makeAlias("old", NIL),
+											   false, false);
+		addNSItemToQuery(pstate, nsitem, false, true, true);
+		nsitem = addRangeTableEntryForRelation(pstate, rel,
+											   AccessShareLock,
+											   makeAlias("new", NIL),
+											   false, false);
+		addNSItemToQuery(pstate, nsitem, false, true, true);
+
+		/* Transform expression.  Copy to be sure we don't modify original */
+		whenClause = transformWhereClause(pstate,
+										  copyObject(stmt->whenClause),
+										  EXPR_KIND_TRIGGER_WHEN,
+										  "WHEN");
+		/* we have to fix its collations too */
+		assign_expr_collations(pstate, whenClause);
+
+		/*
+		 * Check for disallowed references to OLD/NEW.
+		 *
+		 * NB: pull_var_clause is okay here only because we don't allow
+		 * subselects in WHEN clauses; it would fail to examine the contents
+		 * of subselects.
+		 */
+		varList = pull_var_clause(whenClause, 0);
+		foreach(lc, varList)
+		{
+			Var		   *var = (Var *) lfirst(lc);
+
+			switch (var->varno)
+			{
+				case PRS2_OLD_VARNO:
+					if (!TRIGGER_FOR_ROW(tgtype))
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+								 errmsg("statement trigger's WHEN condition cannot reference column values"),
+								 parser_errposition(pstate, var->location)));
+					if (TRIGGER_FOR_INSERT(tgtype))
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+								 errmsg("INSERT trigger's WHEN condition cannot reference OLD values"),
+								 parser_errposition(pstate, var->location)));
+					/* system columns are okay here */
+					break;
+				case PRS2_NEW_VARNO:
+					if (!TRIGGER_FOR_ROW(tgtype))
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+								 errmsg("statement trigger's WHEN condition cannot reference column values"),
+								 parser_errposition(pstate, var->location)));
+					if (TRIGGER_FOR_DELETE(tgtype))
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+								 errmsg("DELETE trigger's WHEN condition cannot reference NEW values"),
+								 parser_errposition(pstate, var->location)));
+					if (var->varattno < 0 && TRIGGER_FOR_BEFORE(tgtype))
+						ereport(ERROR,
+								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+								 errmsg("BEFORE trigger's WHEN condition cannot reference NEW system columns"),
+								 parser_errposition(pstate, var->location)));
+					if (TRIGGER_FOR_BEFORE(tgtype) &&
+						var->varattno == 0 &&
+						RelationGetDescr(rel)->constr &&
+						RelationGetDescr(rel)->constr->has_generated_stored)
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+								 errmsg("BEFORE trigger's WHEN condition cannot reference NEW generated columns"),
+								 errdetail("A whole-row reference is used and the table contains generated columns."),
+								 parser_errposition(pstate, var->location)));
+					if (TRIGGER_FOR_BEFORE(tgtype) &&
+						var->varattno > 0 &&
+						TupleDescAttr(RelationGetDescr(rel), var->varattno - 1)->attgenerated)
+						ereport(ERROR,
+								(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+								 errmsg("BEFORE trigger's WHEN condition cannot reference NEW generated columns"),
+								 errdetail("Column \"%s\" is a generated column.",
+										   NameStr(TupleDescAttr(RelationGetDescr(rel), var->varattno - 1)->attname)),
+								 parser_errposition(pstate, var->location)));
+					break;
+				default:
+					/* can't happen without add_missing_from, so just elog */
+					elog(ERROR, "trigger WHEN condition cannot contain references to other relations");
+					break;
+			}
+		}
+
+		/* we'll need the rtable for recordDependencyOnExpr */
+		whenRtable = pstate->p_rtable;
+
+		qual = nodeToString(whenClause);
+
+		free_parsestate(pstate);
+	}
+	else if (!whenClause)
+	{
+		whenClause = NULL;
+		whenRtable = NIL;
+		qual = NULL;
+	}
+	else
+	{
+		qual = nodeToString(whenClause);
+		whenRtable = NIL;
+	}
+
+	/*
+	 * Find and validate the trigger function.
+	 */
+	if (!OidIsValid(funcoid))
+		funcoid = LookupFuncName(stmt->funcname, 0, NULL, false);
+	if (!isInternal)
+	{
+		aclresult = pg_proc_aclcheck(funcoid, GetUserId(), ACL_EXECUTE);
+		if (aclresult != ACLCHECK_OK)
+			aclcheck_error(aclresult, OBJECT_FUNCTION,
+						   NameListToString(stmt->funcname));
+	}
+	funcrettype = get_func_rettype(funcoid);
+	if (funcrettype != TRIGGEROID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("function %s must return type %s",
+						NameListToString(stmt->funcname), "trigger")));
+
+	/*
+	 * Scan pg_trigger to see if there is already a trigger of the same name.
+	 * Skip this for internally generated triggers, since we'll modify the
+	 * name to be unique below.
+	 *
+	 * NOTE that this is cool only because we have ShareRowExclusiveLock on
+	 * the relation, so the trigger set won't be changing underneath us.
+	 */
+	tgrel = table_open(TriggerRelationId, RowExclusiveLock);
+	if (!isInternal)
+	{
+		ScanKeyData skeys[2];
+		SysScanDesc tgscan;
+
+		ScanKeyInit(&skeys[0],
+					Anum_pg_trigger_tgrelid,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(RelationGetRelid(rel)));
+
+		ScanKeyInit(&skeys[1],
+					Anum_pg_trigger_tgname,
+					BTEqualStrategyNumber, F_NAMEEQ,
+					CStringGetDatum(stmt->trigname));
+
+		tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true,
+									NULL, 2, skeys);
+
+		/* There should be at most one matching tuple */
+		if (HeapTupleIsValid(tuple = systable_getnext(tgscan)))
+		{
+			Form_pg_trigger oldtrigger = (Form_pg_trigger) GETSTRUCT(tuple);
+
+			trigoid = oldtrigger->oid;
+			existing_constraint_oid = oldtrigger->tgconstraint;
+			existing_isInternal = oldtrigger->tgisinternal;
+			existing_isClone = OidIsValid(oldtrigger->tgparentid);
+			trigger_exists = true;
+			/* copy the tuple to use in CatalogTupleUpdate() */
+			tuple = heap_copytuple(tuple);
+		}
+		systable_endscan(tgscan);
+	}
+
+	if (!trigger_exists)
+	{
+		/* Generate the OID for the new trigger. */
+		trigoid = GetNewOidWithIndex(tgrel, TriggerOidIndexId,
+									 Anum_pg_trigger_oid);
+	}
+	else
+	{
+		/*
+		 * If OR REPLACE was specified, we'll replace the old trigger;
+		 * otherwise complain about the duplicate name.
+		 */
+		if (!stmt->replace)
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("trigger \"%s\" for relation \"%s\" already exists",
+							stmt->trigname, RelationGetRelationName(rel))));
+
+		/*
+		 * An internal trigger or a child trigger (isClone) cannot be replaced
+		 * by a user-defined trigger.  However, skip this test when
+		 * in_partition, because then we're recursing from a partitioned table
+		 * and the check was made at the parent level.
+		 */
+		if ((existing_isInternal || existing_isClone) &&
+			!isInternal && !in_partition)
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("trigger \"%s\" for relation \"%s\" is an internal or a child trigger",
+							stmt->trigname, RelationGetRelationName(rel))));
+
+		/*
+		 * It is not allowed to replace with a constraint trigger; gram.y
+		 * should have enforced this already.
+		 */
+		Assert(!stmt->isconstraint);
+
+		/*
+		 * It is not allowed to replace an existing constraint trigger,
+		 * either.  (The reason for these restrictions is partly that it seems
+		 * difficult to deal with pending trigger events in such cases, and
+		 * partly that the command might imply changing the constraint's
+		 * properties as well, which doesn't seem nice.)
+		 */
+		if (OidIsValid(existing_constraint_oid))
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("trigger \"%s\" for relation \"%s\" is a constraint trigger",
+							stmt->trigname, RelationGetRelationName(rel))));
+	}
+
+	/*
+	 * If it's a user-entered CREATE CONSTRAINT TRIGGER command, make a
+	 * corresponding pg_constraint entry.
+	 */
+	if (stmt->isconstraint && !OidIsValid(constraintOid))
+	{
+		/* Internal callers should have made their own constraints */
+		Assert(!isInternal);
+		constraintOid = CreateConstraintEntry(stmt->trigname,
+											  RelationGetNamespace(rel),
+											  CONSTRAINT_TRIGGER,
+											  stmt->deferrable,
+											  stmt->initdeferred,
+											  true,
+											  InvalidOid,	/* no parent */
+											  RelationGetRelid(rel),
+											  NULL, /* no conkey */
+											  0,
+											  0,
+											  InvalidOid,	/* no domain */
+											  InvalidOid,	/* no index */
+											  InvalidOid,	/* no foreign key */
+											  NULL,
+											  NULL,
+											  NULL,
+											  NULL,
+											  0,
+											  ' ',
+											  ' ',
+											  NULL,
+											  0,
+											  ' ',
+											  NULL, /* no exclusion */
+											  NULL, /* no check constraint */
+											  NULL,
+											  true, /* islocal */
+											  0,	/* inhcount */
+											  true, /* noinherit */
+											  isInternal);	/* is_internal */
+	}
+
+	/*
+	 * If trigger is internally generated, modify the provided trigger name to
+	 * ensure uniqueness by appending the trigger OID.  (Callers will usually
+	 * supply a simple constant trigger name in these cases.)
+	 */
+	if (isInternal)
+	{
+		snprintf(internaltrigname, sizeof(internaltrigname),
+				 "%s_%u", stmt->trigname, trigoid);
+		trigname = internaltrigname;
+	}
+	else
+	{
+		/* user-defined trigger; use the specified trigger name as-is */
+		trigname = stmt->trigname;
+	}
+
+	/*
+	 * Build the new pg_trigger tuple.
+	 */
+	memset(nulls, false, sizeof(nulls));
+
+	values[Anum_pg_trigger_oid - 1] = ObjectIdGetDatum(trigoid);
+	values[Anum_pg_trigger_tgrelid - 1] = ObjectIdGetDatum(RelationGetRelid(rel));
+	values[Anum_pg_trigger_tgparentid - 1] = ObjectIdGetDatum(parentTriggerOid);
+	values[Anum_pg_trigger_tgname - 1] = DirectFunctionCall1(namein,
+															 CStringGetDatum(trigname));
+	values[Anum_pg_trigger_tgfoid - 1] = ObjectIdGetDatum(funcoid);
+	values[Anum_pg_trigger_tgtype - 1] = Int16GetDatum(tgtype);
+	values[Anum_pg_trigger_tgenabled - 1] = trigger_fires_when;
+	values[Anum_pg_trigger_tgisinternal - 1] = BoolGetDatum(isInternal);
+	values[Anum_pg_trigger_tgconstrrelid - 1] = ObjectIdGetDatum(constrrelid);
+	values[Anum_pg_trigger_tgconstrindid - 1] = ObjectIdGetDatum(indexOid);
+	values[Anum_pg_trigger_tgconstraint - 1] = ObjectIdGetDatum(constraintOid);
+	values[Anum_pg_trigger_tgdeferrable - 1] = BoolGetDatum(stmt->deferrable);
+	values[Anum_pg_trigger_tginitdeferred - 1] = BoolGetDatum(stmt->initdeferred);
+
+	if (stmt->args)
+	{
+		ListCell   *le;
+		char	   *args;
+		int16		nargs = list_length(stmt->args);
+		int			len = 0;
+
+		foreach(le, stmt->args)
+		{
+			char	   *ar = strVal(lfirst(le));
+
+			len += strlen(ar) + 4;
+			for (; *ar; ar++)
+			{
+				if (*ar == '\\')
+					len++;
+			}
+		}
+		args = (char *) palloc(len + 1);
+		args[0] = '\0';
+		foreach(le, stmt->args)
+		{
+			char	   *s = strVal(lfirst(le));
+			char	   *d = args + strlen(args);
+
+			while (*s)
+			{
+				if (*s == '\\')
+					*d++ = '\\';
+				*d++ = *s++;
+			}
+			strcpy(d, "\\000");
+		}
+		values[Anum_pg_trigger_tgnargs - 1] = Int16GetDatum(nargs);
+		values[Anum_pg_trigger_tgargs - 1] = DirectFunctionCall1(byteain,
+																 CStringGetDatum(args));
+	}
+	else
+	{
+		values[Anum_pg_trigger_tgnargs - 1] = Int16GetDatum(0);
+		values[Anum_pg_trigger_tgargs - 1] = DirectFunctionCall1(byteain,
+																 CStringGetDatum(""));
+	}
+
+	/* build column number array if it's a column-specific trigger */
+	ncolumns = list_length(stmt->columns);
+	if (ncolumns == 0)
+		columns = NULL;
+	else
+	{
+		ListCell   *cell;
+		int			i = 0;
+
+		columns = (int16 *) palloc(ncolumns * sizeof(int16));
+		foreach(cell, stmt->columns)
+		{
+			char	   *name = strVal(lfirst(cell));
+			int16		attnum;
+			int			j;
+
+			/* Lookup column name.  System columns are not allowed */
+			attnum = attnameAttNum(rel, name, false);
+			if (attnum == InvalidAttrNumber)
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_COLUMN),
+						 errmsg("column \"%s\" of relation \"%s\" does not exist",
+								name, RelationGetRelationName(rel))));
+
+			/* Check for duplicates */
+			for (j = i - 1; j >= 0; j--)
+			{
+				if (columns[j] == attnum)
+					ereport(ERROR,
+							(errcode(ERRCODE_DUPLICATE_COLUMN),
+							 errmsg("column \"%s\" specified more than once",
+									name)));
+			}
+
+			columns[i++] = attnum;
+		}
+	}
+	tgattr = buildint2vector(columns, ncolumns);
+	values[Anum_pg_trigger_tgattr - 1] = PointerGetDatum(tgattr);
+
+	/* set tgqual if trigger has WHEN clause */
+	if (qual)
+		values[Anum_pg_trigger_tgqual - 1] = CStringGetTextDatum(qual);
+	else
+		nulls[Anum_pg_trigger_tgqual - 1] = true;
+
+	if (oldtablename)
+		values[Anum_pg_trigger_tgoldtable - 1] = DirectFunctionCall1(namein,
+																	 CStringGetDatum(oldtablename));
+	else
+		nulls[Anum_pg_trigger_tgoldtable - 1] = true;
+	if (newtablename)
+		values[Anum_pg_trigger_tgnewtable - 1] = DirectFunctionCall1(namein,
+																	 CStringGetDatum(newtablename));
+	else
+		nulls[Anum_pg_trigger_tgnewtable - 1] = true;
+
+	/*
+	 * Insert or replace tuple in pg_trigger.
+	 */
+	if (!trigger_exists)
+	{
+		tuple = heap_form_tuple(tgrel->rd_att, values, nulls);
+		CatalogTupleInsert(tgrel, tuple);
+	}
+	else
+	{
+		HeapTuple	newtup;
+
+		newtup = heap_form_tuple(tgrel->rd_att, values, nulls);
+		CatalogTupleUpdate(tgrel, &tuple->t_self, newtup);
+		heap_freetuple(newtup);
+	}
+
+	heap_freetuple(tuple);		/* free either original or new tuple */
+	table_close(tgrel, RowExclusiveLock);
+
+	pfree(DatumGetPointer(values[Anum_pg_trigger_tgname - 1]));
+	pfree(DatumGetPointer(values[Anum_pg_trigger_tgargs - 1]));
+	pfree(DatumGetPointer(values[Anum_pg_trigger_tgattr - 1]));
+	if (oldtablename)
+		pfree(DatumGetPointer(values[Anum_pg_trigger_tgoldtable - 1]));
+	if (newtablename)
+		pfree(DatumGetPointer(values[Anum_pg_trigger_tgnewtable - 1]));
+
+	/*
+	 * Update relation's pg_class entry; if necessary; and if not, send an SI
+	 * message to make other backends (and this one) rebuild relcache entries.
+	 */
+	pgrel = table_open(RelationRelationId, RowExclusiveLock);
+	tuple = SearchSysCacheCopy1(RELOID,
+								ObjectIdGetDatum(RelationGetRelid(rel)));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for relation %u",
+			 RelationGetRelid(rel));
+	if (!((Form_pg_class) GETSTRUCT(tuple))->relhastriggers)
+	{
+		((Form_pg_class) GETSTRUCT(tuple))->relhastriggers = true;
+
+		CatalogTupleUpdate(pgrel, &tuple->t_self, tuple);
+
+		CommandCounterIncrement();
+	}
+	else
+		CacheInvalidateRelcacheByTuple(tuple);
+
+	heap_freetuple(tuple);
+	table_close(pgrel, RowExclusiveLock);
+
+	/*
+	 * If we're replacing a trigger, flush all the old dependencies before
+	 * recording new ones.
+	 */
+	if (trigger_exists)
+		deleteDependencyRecordsFor(TriggerRelationId, trigoid, true);
+
+	/*
+	 * Record dependencies for trigger.  Always place a normal dependency on
+	 * the function.
+	 */
+	myself.classId = TriggerRelationId;
+	myself.objectId = trigoid;
+	myself.objectSubId = 0;
+
+	referenced.classId = ProcedureRelationId;
+	referenced.objectId = funcoid;
+	referenced.objectSubId = 0;
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+	if (isInternal && OidIsValid(constraintOid))
+	{
+		/*
+		 * Internally-generated trigger for a constraint, so make it an
+		 * internal dependency of the constraint.  We can skip depending on
+		 * the relation(s), as there'll be an indirect dependency via the
+		 * constraint.
+		 */
+		referenced.classId = ConstraintRelationId;
+		referenced.objectId = constraintOid;
+		referenced.objectSubId = 0;
+		recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
+	}
+	else
+	{
+		/*
+		 * User CREATE TRIGGER, so place dependencies.  We make trigger be
+		 * auto-dropped if its relation is dropped or if the FK relation is
+		 * dropped.  (Auto drop is compatible with our pre-7.3 behavior.)
+		 */
+		referenced.classId = RelationRelationId;
+		referenced.objectId = RelationGetRelid(rel);
+		referenced.objectSubId = 0;
+		recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
+
+		if (OidIsValid(constrrelid))
+		{
+			referenced.classId = RelationRelationId;
+			referenced.objectId = constrrelid;
+			referenced.objectSubId = 0;
+			recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
+		}
+		/* Not possible to have an index dependency in this case */
+		Assert(!OidIsValid(indexOid));
+
+		/*
+		 * If it's a user-specified constraint trigger, make the constraint
+		 * internally dependent on the trigger instead of vice versa.
+		 */
+		if (OidIsValid(constraintOid))
+		{
+			referenced.classId = ConstraintRelationId;
+			referenced.objectId = constraintOid;
+			referenced.objectSubId = 0;
+			recordDependencyOn(&referenced, &myself, DEPENDENCY_INTERNAL);
+		}
+
+		/*
+		 * If it's a partition trigger, create the partition dependencies.
+		 */
+		if (OidIsValid(parentTriggerOid))
+		{
+			ObjectAddressSet(referenced, TriggerRelationId, parentTriggerOid);
+			recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_PRI);
+			ObjectAddressSet(referenced, RelationRelationId, RelationGetRelid(rel));
+			recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_SEC);
+		}
+	}
+
+	/* If column-specific trigger, add normal dependencies on columns */
+	if (columns != NULL)
+	{
+		int			i;
+
+		referenced.classId = RelationRelationId;
+		referenced.objectId = RelationGetRelid(rel);
+		for (i = 0; i < ncolumns; i++)
+		{
+			referenced.objectSubId = columns[i];
+			recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+		}
+	}
+
+	/*
+	 * If it has a WHEN clause, add dependencies on objects mentioned in the
+	 * expression (eg, functions, as well as any columns used).
+	 */
+	if (whenRtable != NIL)
+		recordDependencyOnExpr(&myself, whenClause, whenRtable,
+							   DEPENDENCY_NORMAL);
+
+	/* Post creation hook for new trigger */
+	InvokeObjectPostCreateHookArg(TriggerRelationId, trigoid, 0,
+								  isInternal);
+
+	/*
+	 * Lastly, create the trigger on child relations, if needed.
+	 */
+	if (partition_recurse)
+	{
+		PartitionDesc partdesc = RelationGetPartitionDesc(rel, true);
+		List	   *idxs = NIL;
+		List	   *childTbls = NIL;
+		ListCell   *l;
+		int			i;
+		MemoryContext oldcxt,
+					perChildCxt;
+
+		perChildCxt = AllocSetContextCreate(CurrentMemoryContext,
+											"part trig clone",
+											ALLOCSET_SMALL_SIZES);
+
+		/*
+		 * When a trigger is being created associated with an index, we'll
+		 * need to associate the trigger in each child partition with the
+		 * corresponding index on it.
+		 */
+		if (OidIsValid(indexOid))
+		{
+			ListCell   *l;
+			List	   *idxs = NIL;
+
+			idxs = find_inheritance_children(indexOid, ShareRowExclusiveLock);
+			foreach(l, idxs)
+				childTbls = lappend_oid(childTbls,
+										IndexGetRelation(lfirst_oid(l),
+														 false));
+		}
+
+		oldcxt = MemoryContextSwitchTo(perChildCxt);
+
+		/* Iterate to create the trigger on each existing partition */
+		for (i = 0; i < partdesc->nparts; i++)
+		{
+			Oid			indexOnChild = InvalidOid;
+			ListCell   *l2;
+			CreateTrigStmt *childStmt;
+			Relation	childTbl;
+			Node	   *qual;
+
+			childTbl = table_open(partdesc->oids[i], ShareRowExclusiveLock);
+
+			/* Find which of the child indexes is the one on this partition */
+			if (OidIsValid(indexOid))
+			{
+				forboth(l, idxs, l2, childTbls)
+				{
+					if (lfirst_oid(l2) == partdesc->oids[i])
+					{
+						indexOnChild = lfirst_oid(l);
+						break;
+					}
+				}
+				if (!OidIsValid(indexOnChild))
+					elog(ERROR, "failed to find index matching index \"%s\" in partition \"%s\"",
+						 get_rel_name(indexOid),
+						 get_rel_name(partdesc->oids[i]));
+			}
+
+			/*
+			 * Initialize our fabricated parse node by copying the original
+			 * one, then resetting fields that we pass separately.
+			 */
+			childStmt = (CreateTrigStmt *) copyObject(stmt);
+			childStmt->funcname = NIL;
+			childStmt->whenClause = NULL;
+
+			/* If there is a WHEN clause, create a modified copy of it */
+			qual = copyObject(whenClause);
+			qual = (Node *)
+				map_partition_varattnos((List *) qual, PRS2_OLD_VARNO,
+										childTbl, rel);
+			qual = (Node *)
+				map_partition_varattnos((List *) qual, PRS2_NEW_VARNO,
+										childTbl, rel);
+
+			CreateTriggerFiringOn(childStmt, queryString,
+								  partdesc->oids[i], refRelOid,
+								  InvalidOid, indexOnChild,
+								  funcoid, trigoid, qual,
+								  isInternal, true, trigger_fires_when);
+
+			table_close(childTbl, NoLock);
+
+			MemoryContextReset(perChildCxt);
+		}
+
+		MemoryContextSwitchTo(oldcxt);
+		MemoryContextDelete(perChildCxt);
+		list_free(idxs);
+		list_free(childTbls);
+	}
+
+	/* Keep lock on target rel until end of xact */
+	table_close(rel, NoLock);
+
+	return myself;
+}
+
+/*
+ * TriggerSetParentTrigger
+ *		Set a partition's trigger as child of its parent trigger,
+ *		or remove the linkage if parentTrigId is InvalidOid.
+ *
+ * This updates the constraint's pg_trigger row to show it as inherited, and
+ * adds PARTITION dependencies to prevent the trigger from being deleted
+ * on its own.  Alternatively, reverse that.
+ */
+void
+TriggerSetParentTrigger(Relation trigRel,
+						Oid childTrigId,
+						Oid parentTrigId,
+						Oid childTableId)
+{
+	SysScanDesc tgscan;
+	ScanKeyData skey[1];
+	Form_pg_trigger trigForm;
+	HeapTuple	tuple,
+				newtup;
+	ObjectAddress depender;
+	ObjectAddress referenced;
+
+	/*
+	 * Find the trigger to delete.
+	 */
+	ScanKeyInit(&skey[0],
+				Anum_pg_trigger_oid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(childTrigId));
+
+	tgscan = systable_beginscan(trigRel, TriggerOidIndexId, true,
+								NULL, 1, skey);
+
+	tuple = systable_getnext(tgscan);
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "could not find tuple for trigger %u", childTrigId);
+	newtup = heap_copytuple(tuple);
+	trigForm = (Form_pg_trigger) GETSTRUCT(newtup);
+	if (OidIsValid(parentTrigId))
+	{
+		/* don't allow setting parent for a constraint that already has one */
+		if (OidIsValid(trigForm->tgparentid))
+			elog(ERROR, "trigger %u already has a parent trigger",
+				 childTrigId);
+
+		trigForm->tgparentid = parentTrigId;
+
+		CatalogTupleUpdate(trigRel, &tuple->t_self, newtup);
+
+		ObjectAddressSet(depender, TriggerRelationId, childTrigId);
+
+		ObjectAddressSet(referenced, TriggerRelationId, parentTrigId);
+		recordDependencyOn(&depender, &referenced, DEPENDENCY_PARTITION_PRI);
+
+		ObjectAddressSet(referenced, RelationRelationId, childTableId);
+		recordDependencyOn(&depender, &referenced, DEPENDENCY_PARTITION_SEC);
+	}
+	else
+	{
+		trigForm->tgparentid = InvalidOid;
+
+		CatalogTupleUpdate(trigRel, &tuple->t_self, newtup);
+
+		deleteDependencyRecordsForClass(TriggerRelationId, childTrigId,
+										TriggerRelationId,
+										DEPENDENCY_PARTITION_PRI);
+		deleteDependencyRecordsForClass(TriggerRelationId, childTrigId,
+										RelationRelationId,
+										DEPENDENCY_PARTITION_SEC);
+	}
+
+	heap_freetuple(newtup);
+	systable_endscan(tgscan);
+}
+
+
+/*
+ * Guts of trigger deletion.
+ */
+void
+RemoveTriggerById(Oid trigOid)
+{
+	Relation	tgrel;
+	SysScanDesc tgscan;
+	ScanKeyData skey[1];
+	HeapTuple	tup;
+	Oid			relid;
+	Relation	rel;
+
+	tgrel = table_open(TriggerRelationId, RowExclusiveLock);
+
+	/*
+	 * Find the trigger to delete.
+	 */
+	ScanKeyInit(&skey[0],
+				Anum_pg_trigger_oid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(trigOid));
+
+	tgscan = systable_beginscan(tgrel, TriggerOidIndexId, true,
+								NULL, 1, skey);
+
+	tup = systable_getnext(tgscan);
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "could not find tuple for trigger %u", trigOid);
+
+	/*
+	 * Open and exclusive-lock the relation the trigger belongs to.
+	 */
+	relid = ((Form_pg_trigger) GETSTRUCT(tup))->tgrelid;
+
+	rel = table_open(relid, AccessExclusiveLock);
+
+	if (rel->rd_rel->relkind != RELKIND_RELATION &&
+		rel->rd_rel->relkind != RELKIND_VIEW &&
+		rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+		rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("relation \"%s\" cannot have triggers",
+						RelationGetRelationName(rel)),
+				 errdetail_relkind_not_supported(rel->rd_rel->relkind)));
+
+	if (!allowSystemTableMods && IsSystemRelation(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						RelationGetRelationName(rel))));
+
+	/*
+	 * Delete the pg_trigger tuple.
+	 */
+	CatalogTupleDelete(tgrel, &tup->t_self);
+
+	systable_endscan(tgscan);
+	table_close(tgrel, RowExclusiveLock);
+
+	/*
+	 * We do not bother to try to determine whether any other triggers remain,
+	 * which would be needed in order to decide whether it's safe to clear the
+	 * relation's relhastriggers.  (In any case, there might be a concurrent
+	 * process adding new triggers.)  Instead, just force a relcache inval to
+	 * make other backends (and this one too!) rebuild their relcache entries.
+	 * There's no great harm in leaving relhastriggers true even if there are
+	 * no triggers left.
+	 */
+	CacheInvalidateRelcache(rel);
+
+	/* Keep lock on trigger's rel until end of xact */
+	table_close(rel, NoLock);
+}
+
+/*
+ * get_trigger_oid - Look up a trigger by name to find its OID.
+ *
+ * If missing_ok is false, throw an error if trigger not found.  If
+ * true, just return InvalidOid.
+ */
+Oid
+get_trigger_oid(Oid relid, const char *trigname, bool missing_ok)
+{
+	Relation	tgrel;
+	ScanKeyData skey[2];
+	SysScanDesc tgscan;
+	HeapTuple	tup;
+	Oid			oid;
+
+	/*
+	 * Find the trigger, verify permissions, set up object address
+	 */
+	tgrel = table_open(TriggerRelationId, AccessShareLock);
+
+	ScanKeyInit(&skey[0],
+				Anum_pg_trigger_tgrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relid));
+	ScanKeyInit(&skey[1],
+				Anum_pg_trigger_tgname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(trigname));
+
+	tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true,
+								NULL, 2, skey);
+
+	tup = systable_getnext(tgscan);
+
+	if (!HeapTupleIsValid(tup))
+	{
+		if (!missing_ok)
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("trigger \"%s\" for table \"%s\" does not exist",
+							trigname, get_rel_name(relid))));
+		oid = InvalidOid;
+	}
+	else
+	{
+		oid = ((Form_pg_trigger) GETSTRUCT(tup))->oid;
+	}
+
+	systable_endscan(tgscan);
+	table_close(tgrel, AccessShareLock);
+	return oid;
+}
+
+/*
+ * Perform permissions and integrity checks before acquiring a relation lock.
+ */
+static void
+RangeVarCallbackForRenameTrigger(const RangeVar *rv, Oid relid, Oid oldrelid,
+								 void *arg)
+{
+	HeapTuple	tuple;
+	Form_pg_class form;
+
+	tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(tuple))
+		return;					/* concurrently dropped */
+	form = (Form_pg_class) GETSTRUCT(tuple);
+
+	/* only tables and views can have triggers */
+	if (form->relkind != RELKIND_RELATION && form->relkind != RELKIND_VIEW &&
+		form->relkind != RELKIND_FOREIGN_TABLE &&
+		form->relkind != RELKIND_PARTITIONED_TABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("relation \"%s\" cannot have triggers",
+						rv->relname),
+				 errdetail_relkind_not_supported(form->relkind)));
+
+	/* you must own the table to rename one of its triggers */
+	if (!pg_class_ownercheck(relid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relid)), rv->relname);
+	if (!allowSystemTableMods && IsSystemClass(relid, form))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied: \"%s\" is a system catalog",
+						rv->relname)));
+
+	ReleaseSysCache(tuple);
+}
+
+/*
+ *		renametrig		- changes the name of a trigger on a relation
+ *
+ *		trigger name is changed in trigger catalog.
+ *		No record of the previous name is kept.
+ *
+ *		get proper relrelation from relation catalog (if not arg)
+ *		scan trigger catalog
+ *				for name conflict (within rel)
+ *				for original trigger (if not arg)
+ *		modify tgname in trigger tuple
+ *		update row in catalog
+ */
+ObjectAddress
+renametrig(RenameStmt *stmt)
+{
+	Oid			tgoid;
+	Relation	targetrel;
+	Relation	tgrel;
+	HeapTuple	tuple;
+	SysScanDesc tgscan;
+	ScanKeyData key[2];
+	Oid			relid;
+	ObjectAddress address;
+
+	/*
+	 * Look up name, check permissions, and acquire lock (which we will NOT
+	 * release until end of transaction).
+	 */
+	relid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock,
+									 0,
+									 RangeVarCallbackForRenameTrigger,
+									 NULL);
+
+	/* Have lock already, so just need to build relcache entry. */
+	targetrel = relation_open(relid, NoLock);
+
+	/*
+	 * On partitioned tables, this operation recurses to partitions.  Lock all
+	 * tables upfront.
+	 */
+	if (targetrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		(void) find_all_inheritors(relid, AccessExclusiveLock, NULL);
+
+	tgrel = table_open(TriggerRelationId, RowExclusiveLock);
+
+	/*
+	 * Search for the trigger to modify.
+	 */
+	ScanKeyInit(&key[0],
+				Anum_pg_trigger_tgrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relid));
+	ScanKeyInit(&key[1],
+				Anum_pg_trigger_tgname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				PointerGetDatum(stmt->subname));
+	tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true,
+								NULL, 2, key);
+	if (HeapTupleIsValid(tuple = systable_getnext(tgscan)))
+	{
+		Form_pg_trigger trigform;
+
+		trigform = (Form_pg_trigger) GETSTRUCT(tuple);
+		tgoid = trigform->oid;
+
+		/*
+		 * If the trigger descends from a trigger on a parent partitioned
+		 * table, reject the rename.  We don't allow a trigger in a partition
+		 * to differ in name from that of its parent: that would lead to an
+		 * inconsistency that pg_dump would not reproduce.
+		 */
+		if (OidIsValid(trigform->tgparentid))
+			ereport(ERROR,
+					errmsg("cannot rename trigger \"%s\" on table \"%s\"",
+						   stmt->subname, RelationGetRelationName(targetrel)),
+					errhint("Rename the trigger on the partitioned table \"%s\" instead.",
+							get_rel_name(get_partition_parent(relid, false))));
+
+
+		/* Rename the trigger on this relation ... */
+		renametrig_internal(tgrel, targetrel, tuple, stmt->newname,
+							stmt->subname);
+
+		/* ... and if it is partitioned, recurse to its partitions */
+		if (targetrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		{
+			PartitionDesc partdesc = RelationGetPartitionDesc(targetrel, true);
+
+			for (int i = 0; i < partdesc->nparts; i++)
+			{
+				Oid			partitionId = partdesc->oids[i];
+
+				renametrig_partition(tgrel, partitionId, trigform->oid,
+									 stmt->newname, stmt->subname);
+			}
+		}
+	}
+	else
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("trigger \"%s\" for table \"%s\" does not exist",
+						stmt->subname, RelationGetRelationName(targetrel))));
+	}
+
+	ObjectAddressSet(address, TriggerRelationId, tgoid);
+
+	systable_endscan(tgscan);
+
+	table_close(tgrel, RowExclusiveLock);
+
+	/*
+	 * Close rel, but keep exclusive lock!
+	 */
+	relation_close(targetrel, NoLock);
+
+	return address;
+}
+
+/*
+ * Subroutine for renametrig -- perform the actual work of renaming one
+ * trigger on one table.
+ *
+ * If the trigger has a name different from the expected one, raise a
+ * NOTICE about it.
+ */
+static void
+renametrig_internal(Relation tgrel, Relation targetrel, HeapTuple trigtup,
+					const char *newname, const char *expected_name)
+{
+	HeapTuple	tuple;
+	Form_pg_trigger tgform;
+	ScanKeyData key[2];
+	SysScanDesc tgscan;
+
+	/* If the trigger already has the new name, nothing to do. */
+	tgform = (Form_pg_trigger) GETSTRUCT(trigtup);
+	if (strcmp(NameStr(tgform->tgname), newname) == 0)
+		return;
+
+	/*
+	 * Before actually trying the rename, search for triggers with the same
+	 * name.  The update would fail with an ugly message in that case, and it
+	 * is better to throw a nicer error.
+	 */
+	ScanKeyInit(&key[0],
+				Anum_pg_trigger_tgrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(targetrel)));
+	ScanKeyInit(&key[1],
+				Anum_pg_trigger_tgname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				PointerGetDatum(newname));
+	tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true,
+								NULL, 2, key);
+	if (HeapTupleIsValid(tuple = systable_getnext(tgscan)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("trigger \"%s\" for relation \"%s\" already exists",
+						newname, RelationGetRelationName(targetrel))));
+	systable_endscan(tgscan);
+
+	/*
+	 * The target name is free; update the existing pg_trigger tuple with it.
+	 */
+	tuple = heap_copytuple(trigtup);	/* need a modifiable copy */
+	tgform = (Form_pg_trigger) GETSTRUCT(tuple);
+
+	/*
+	 * If the trigger has a name different from what we expected, let the user
+	 * know. (We can proceed anyway, since we must have reached here following
+	 * a tgparentid link.)
+	 */
+	if (strcmp(NameStr(tgform->tgname), expected_name) != 0)
+		ereport(NOTICE,
+				errmsg("renamed trigger \"%s\" on relation \"%s\"",
+					   NameStr(tgform->tgname),
+					   RelationGetRelationName(targetrel)));
+
+	namestrcpy(&tgform->tgname, newname);
+
+	CatalogTupleUpdate(tgrel, &tuple->t_self, tuple);
+
+	InvokeObjectPostAlterHook(TriggerRelationId, tgform->oid, 0);
+
+	/*
+	 * Invalidate relation's relcache entry so that other backends (and this
+	 * one too!) are sent SI message to make them rebuild relcache entries.
+	 * (Ideally this should happen automatically...)
+	 */
+	CacheInvalidateRelcache(targetrel);
+}
+
+/*
+ * Subroutine for renametrig -- Helper for recursing to partitions when
+ * renaming triggers on a partitioned table.
+ */
+static void
+renametrig_partition(Relation tgrel, Oid partitionId, Oid parentTriggerOid,
+					 const char *newname, const char *expected_name)
+{
+	SysScanDesc tgscan;
+	ScanKeyData key;
+	HeapTuple	tuple;
+
+	/*
+	 * Given a relation and the OID of a trigger on parent relation, find the
+	 * corresponding trigger in the child and rename that trigger to the given
+	 * name.
+	 */
+	ScanKeyInit(&key,
+				Anum_pg_trigger_tgrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(partitionId));
+	tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true,
+								NULL, 1, &key);
+	while (HeapTupleIsValid(tuple = systable_getnext(tgscan)))
+	{
+		Form_pg_trigger tgform = (Form_pg_trigger) GETSTRUCT(tuple);
+		Relation	partitionRel;
+
+		if (tgform->tgparentid != parentTriggerOid)
+			continue;			/* not our trigger */
+
+		partitionRel = table_open(partitionId, NoLock);
+
+		/* Rename the trigger on this partition */
+		renametrig_internal(tgrel, partitionRel, tuple, newname, expected_name);
+
+		/* And if this relation is partitioned, recurse to its partitions */
+		if (partitionRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+		{
+			PartitionDesc partdesc = RelationGetPartitionDesc(partitionRel,
+															  true);
+
+			for (int i = 0; i < partdesc->nparts; i++)
+			{
+				Oid			partitionId = partdesc->oids[i];
+
+				renametrig_partition(tgrel, partitionId, tgform->oid, newname,
+									 NameStr(tgform->tgname));
+			}
+		}
+		table_close(partitionRel, NoLock);
+
+		/* There should be at most one matching tuple */
+		break;
+	}
+	systable_endscan(tgscan);
+}
+
+/*
+ * EnableDisableTrigger()
+ *
+ *	Called by ALTER TABLE ENABLE/DISABLE [ REPLICA | ALWAYS ] TRIGGER
+ *	to change 'tgenabled' field for the specified trigger(s)
+ *
+ * rel: relation to process (caller must hold suitable lock on it)
+ * tgname: name of trigger to process, or NULL to scan all triggers
+ * tgparent: if not zero, process only triggers with this tgparentid
+ * fires_when: new value for tgenabled field. In addition to generic
+ *			   enablement/disablement, this also defines when the trigger
+ *			   should be fired in session replication roles.
+ * skip_system: if true, skip "system" triggers (constraint triggers)
+ * recurse: if true, recurse to partitions
+ *
+ * Caller should have checked permissions for the table; here we also
+ * enforce that superuser privilege is required to alter the state of
+ * system triggers
+ */
+void
+EnableDisableTriggerNew2(Relation rel, const char *tgname, Oid tgparent,
+						 char fires_when, bool skip_system, bool recurse,
+						 LOCKMODE lockmode)
+{
+	Relation	tgrel;
+	int			nkeys;
+	ScanKeyData keys[2];
+	SysScanDesc tgscan;
+	HeapTuple	tuple;
+	bool		found;
+	bool		changed;
+
+	/* Scan the relevant entries in pg_triggers */
+	tgrel = table_open(TriggerRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&keys[0],
+				Anum_pg_trigger_tgrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	if (tgname)
+	{
+		ScanKeyInit(&keys[1],
+					Anum_pg_trigger_tgname,
+					BTEqualStrategyNumber, F_NAMEEQ,
+					CStringGetDatum(tgname));
+		nkeys = 2;
+	}
+	else
+		nkeys = 1;
+
+	tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true,
+								NULL, nkeys, keys);
+
+	found = changed = false;
+
+	while (HeapTupleIsValid(tuple = systable_getnext(tgscan)))
+	{
+		Form_pg_trigger oldtrig = (Form_pg_trigger) GETSTRUCT(tuple);
+
+		if (OidIsValid(tgparent) && tgparent != oldtrig->tgparentid)
+			continue;
+
+		if (oldtrig->tgisinternal)
+		{
+			/* system trigger ... ok to process? */
+			if (skip_system)
+				continue;
+			if (!superuser())
+				ereport(ERROR,
+						(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+						 errmsg("permission denied: \"%s\" is a system trigger",
+								NameStr(oldtrig->tgname))));
+		}
+
+		found = true;
+
+		if (oldtrig->tgenabled != fires_when)
+		{
+			/* need to change this one ... make a copy to scribble on */
+			HeapTuple	newtup = heap_copytuple(tuple);
+			Form_pg_trigger newtrig = (Form_pg_trigger) GETSTRUCT(newtup);
+
+			newtrig->tgenabled = fires_when;
+
+			CatalogTupleUpdate(tgrel, &newtup->t_self, newtup);
+
+			heap_freetuple(newtup);
+
+			changed = true;
+		}
+
+		/*
+		 * When altering FOR EACH ROW triggers on a partitioned table, do the
+		 * same on the partitions as well, unless ONLY is specified.
+		 *
+		 * Note that we recurse even if we didn't change the trigger above,
+		 * because the partitions' copy of the trigger may have a different
+		 * value of tgenabled than the parent's trigger and thus might need to
+		 * be changed.
+		 */
+		if (recurse &&
+			rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+			(TRIGGER_FOR_ROW(oldtrig->tgtype)))
+		{
+			PartitionDesc partdesc = RelationGetPartitionDesc(rel, true);
+			int			i;
+
+			for (i = 0; i < partdesc->nparts; i++)
+			{
+				Relation	part;
+
+				part = relation_open(partdesc->oids[i], lockmode);
+				/* Match on child triggers' tgparentid, not their name */
+				EnableDisableTriggerNew2(part, NULL, oldtrig->oid,
+										 fires_when, skip_system, recurse,
+										 lockmode);
+				table_close(part, NoLock);	/* keep lock till commit */
+			}
+		}
+
+		InvokeObjectPostAlterHook(TriggerRelationId,
+								  oldtrig->oid, 0);
+	}
+
+	systable_endscan(tgscan);
+
+	table_close(tgrel, RowExclusiveLock);
+
+	if (tgname && !found)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("trigger \"%s\" for table \"%s\" does not exist",
+						tgname, RelationGetRelationName(rel))));
+
+	/*
+	 * If we changed anything, broadcast a SI inval message to force each
+	 * backend (including our own!) to rebuild relation's relcache entry.
+	 * Otherwise they will fail to apply the change promptly.
+	 */
+	if (changed)
+		CacheInvalidateRelcache(rel);
+}
+
+/*
+ * ABI-compatible wrappers to emulate old versions of the above function.
+ * Do not call these versions in new code.
+ */
+void
+EnableDisableTriggerNew(Relation rel, const char *tgname,
+						char fires_when, bool skip_system, bool recurse,
+						LOCKMODE lockmode)
+{
+	EnableDisableTriggerNew2(rel, tgname, InvalidOid,
+							 fires_when, skip_system,
+							 recurse, lockmode);
+}
+
+void
+EnableDisableTrigger(Relation rel, const char *tgname,
+					 char fires_when, bool skip_system,
+					 LOCKMODE lockmode)
+{
+	EnableDisableTriggerNew2(rel, tgname, InvalidOid,
+							 fires_when, skip_system,
+							 true, lockmode);
+}
+
+
+/*
+ * Build trigger data to attach to the given relcache entry.
+ *
+ * Note that trigger data attached to a relcache entry must be stored in
+ * CacheMemoryContext to ensure it survives as long as the relcache entry.
+ * But we should be running in a less long-lived working context.  To avoid
+ * leaking cache memory if this routine fails partway through, we build a
+ * temporary TriggerDesc in working memory and then copy the completed
+ * structure into cache memory.
+ */
+void
+RelationBuildTriggers(Relation relation)
+{
+	TriggerDesc *trigdesc;
+	int			numtrigs;
+	int			maxtrigs;
+	Trigger    *triggers;
+	Relation	tgrel;
+	ScanKeyData skey;
+	SysScanDesc tgscan;
+	HeapTuple	htup;
+	MemoryContext oldContext;
+	int			i;
+
+	/*
+	 * Allocate a working array to hold the triggers (the array is extended if
+	 * necessary)
+	 */
+	maxtrigs = 16;
+	triggers = (Trigger *) palloc(maxtrigs * sizeof(Trigger));
+	numtrigs = 0;
+
+	/*
+	 * Note: since we scan the triggers using TriggerRelidNameIndexId, we will
+	 * be reading the triggers in name order, except possibly during
+	 * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
+	 * ensures that triggers will be fired in name order.
+	 */
+	ScanKeyInit(&skey,
+				Anum_pg_trigger_tgrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(relation)));
+
+	tgrel = table_open(TriggerRelationId, AccessShareLock);
+	tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true,
+								NULL, 1, &skey);
+
+	while (HeapTupleIsValid(htup = systable_getnext(tgscan)))
+	{
+		Form_pg_trigger pg_trigger = (Form_pg_trigger) GETSTRUCT(htup);
+		Trigger    *build;
+		Datum		datum;
+		bool		isnull;
+
+		if (numtrigs >= maxtrigs)
+		{
+			maxtrigs *= 2;
+			triggers = (Trigger *) repalloc(triggers, maxtrigs * sizeof(Trigger));
+		}
+		build = &(triggers[numtrigs]);
+
+		build->tgoid = pg_trigger->oid;
+		build->tgname = DatumGetCString(DirectFunctionCall1(nameout,
+															NameGetDatum(&pg_trigger->tgname)));
+		build->tgfoid = pg_trigger->tgfoid;
+		build->tgtype = pg_trigger->tgtype;
+		build->tgenabled = pg_trigger->tgenabled;
+		build->tgisinternal = pg_trigger->tgisinternal;
+		build->tgisclone = OidIsValid(pg_trigger->tgparentid);
+		build->tgconstrrelid = pg_trigger->tgconstrrelid;
+		build->tgconstrindid = pg_trigger->tgconstrindid;
+		build->tgconstraint = pg_trigger->tgconstraint;
+		build->tgdeferrable = pg_trigger->tgdeferrable;
+		build->tginitdeferred = pg_trigger->tginitdeferred;
+		build->tgnargs = pg_trigger->tgnargs;
+		/* tgattr is first var-width field, so OK to access directly */
+		build->tgnattr = pg_trigger->tgattr.dim1;
+		if (build->tgnattr > 0)
+		{
+			build->tgattr = (int16 *) palloc(build->tgnattr * sizeof(int16));
+			memcpy(build->tgattr, &(pg_trigger->tgattr.values),
+				   build->tgnattr * sizeof(int16));
+		}
+		else
+			build->tgattr = NULL;
+		if (build->tgnargs > 0)
+		{
+			bytea	   *val;
+			char	   *p;
+
+			val = DatumGetByteaPP(fastgetattr(htup,
+											  Anum_pg_trigger_tgargs,
+											  tgrel->rd_att, &isnull));
+			if (isnull)
+				elog(ERROR, "tgargs is null in trigger for relation \"%s\"",
+					 RelationGetRelationName(relation));
+			p = (char *) VARDATA_ANY(val);
+			build->tgargs = (char **) palloc(build->tgnargs * sizeof(char *));
+			for (i = 0; i < build->tgnargs; i++)
+			{
+				build->tgargs[i] = pstrdup(p);
+				p += strlen(p) + 1;
+			}
+		}
+		else
+			build->tgargs = NULL;
+
+		datum = fastgetattr(htup, Anum_pg_trigger_tgoldtable,
+							tgrel->rd_att, &isnull);
+		if (!isnull)
+			build->tgoldtable =
+				DatumGetCString(DirectFunctionCall1(nameout, datum));
+		else
+			build->tgoldtable = NULL;
+
+		datum = fastgetattr(htup, Anum_pg_trigger_tgnewtable,
+							tgrel->rd_att, &isnull);
+		if (!isnull)
+			build->tgnewtable =
+				DatumGetCString(DirectFunctionCall1(nameout, datum));
+		else
+			build->tgnewtable = NULL;
+
+		datum = fastgetattr(htup, Anum_pg_trigger_tgqual,
+							tgrel->rd_att, &isnull);
+		if (!isnull)
+			build->tgqual = TextDatumGetCString(datum);
+		else
+			build->tgqual = NULL;
+
+		numtrigs++;
+	}
+
+	systable_endscan(tgscan);
+	table_close(tgrel, AccessShareLock);
+
+	/* There might not be any triggers */
+	if (numtrigs == 0)
+	{
+		pfree(triggers);
+		return;
+	}
+
+	/* Build trigdesc */
+	trigdesc = (TriggerDesc *) palloc0(sizeof(TriggerDesc));
+	trigdesc->triggers = triggers;
+	trigdesc->numtriggers = numtrigs;
+	for (i = 0; i < numtrigs; i++)
+		SetTriggerFlags(trigdesc, &(triggers[i]));
+
+	/* Copy completed trigdesc into cache storage */
+	oldContext = MemoryContextSwitchTo(CacheMemoryContext);
+	relation->trigdesc = CopyTriggerDesc(trigdesc);
+	MemoryContextSwitchTo(oldContext);
+
+	/* Release working memory */
+	FreeTriggerDesc(trigdesc);
+}
+
+/*
+ * Update the TriggerDesc's hint flags to include the specified trigger
+ */
+static void
+SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger)
+{
+	int16		tgtype = trigger->tgtype;
+
+	trigdesc->trig_insert_before_row |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+							 TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_INSERT);
+	trigdesc->trig_insert_after_row |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+							 TRIGGER_TYPE_AFTER, TRIGGER_TYPE_INSERT);
+	trigdesc->trig_insert_instead_row |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+							 TRIGGER_TYPE_INSTEAD, TRIGGER_TYPE_INSERT);
+	trigdesc->trig_insert_before_statement |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+							 TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_INSERT);
+	trigdesc->trig_insert_after_statement |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+							 TRIGGER_TYPE_AFTER, TRIGGER_TYPE_INSERT);
+	trigdesc->trig_update_before_row |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+							 TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_UPDATE);
+	trigdesc->trig_update_after_row |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+							 TRIGGER_TYPE_AFTER, TRIGGER_TYPE_UPDATE);
+	trigdesc->trig_update_instead_row |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+							 TRIGGER_TYPE_INSTEAD, TRIGGER_TYPE_UPDATE);
+	trigdesc->trig_update_before_statement |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+							 TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_UPDATE);
+	trigdesc->trig_update_after_statement |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+							 TRIGGER_TYPE_AFTER, TRIGGER_TYPE_UPDATE);
+	trigdesc->trig_delete_before_row |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+							 TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_DELETE);
+	trigdesc->trig_delete_after_row |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+							 TRIGGER_TYPE_AFTER, TRIGGER_TYPE_DELETE);
+	trigdesc->trig_delete_instead_row |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+							 TRIGGER_TYPE_INSTEAD, TRIGGER_TYPE_DELETE);
+	trigdesc->trig_delete_before_statement |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+							 TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_DELETE);
+	trigdesc->trig_delete_after_statement |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+							 TRIGGER_TYPE_AFTER, TRIGGER_TYPE_DELETE);
+	/* there are no row-level truncate triggers */
+	trigdesc->trig_truncate_before_statement |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+							 TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_TRUNCATE);
+	trigdesc->trig_truncate_after_statement |=
+		TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+							 TRIGGER_TYPE_AFTER, TRIGGER_TYPE_TRUNCATE);
+
+	trigdesc->trig_insert_new_table |=
+		(TRIGGER_FOR_INSERT(tgtype) &&
+		 TRIGGER_USES_TRANSITION_TABLE(trigger->tgnewtable));
+	trigdesc->trig_update_old_table |=
+		(TRIGGER_FOR_UPDATE(tgtype) &&
+		 TRIGGER_USES_TRANSITION_TABLE(trigger->tgoldtable));
+	trigdesc->trig_update_new_table |=
+		(TRIGGER_FOR_UPDATE(tgtype) &&
+		 TRIGGER_USES_TRANSITION_TABLE(trigger->tgnewtable));
+	trigdesc->trig_delete_old_table |=
+		(TRIGGER_FOR_DELETE(tgtype) &&
+		 TRIGGER_USES_TRANSITION_TABLE(trigger->tgoldtable));
+}
+
+/*
+ * Copy a TriggerDesc data structure.
+ *
+ * The copy is allocated in the current memory context.
+ */
+TriggerDesc *
+CopyTriggerDesc(TriggerDesc *trigdesc)
+{
+	TriggerDesc *newdesc;
+	Trigger    *trigger;
+	int			i;
+
+	if (trigdesc == NULL || trigdesc->numtriggers <= 0)
+		return NULL;
+
+	newdesc = (TriggerDesc *) palloc(sizeof(TriggerDesc));
+	memcpy(newdesc, trigdesc, sizeof(TriggerDesc));
+
+	trigger = (Trigger *) palloc(trigdesc->numtriggers * sizeof(Trigger));
+	memcpy(trigger, trigdesc->triggers,
+		   trigdesc->numtriggers * sizeof(Trigger));
+	newdesc->triggers = trigger;
+
+	for (i = 0; i < trigdesc->numtriggers; i++)
+	{
+		trigger->tgname = pstrdup(trigger->tgname);
+		if (trigger->tgnattr > 0)
+		{
+			int16	   *newattr;
+
+			newattr = (int16 *) palloc(trigger->tgnattr * sizeof(int16));
+			memcpy(newattr, trigger->tgattr,
+				   trigger->tgnattr * sizeof(int16));
+			trigger->tgattr = newattr;
+		}
+		if (trigger->tgnargs > 0)
+		{
+			char	  **newargs;
+			int16		j;
+
+			newargs = (char **) palloc(trigger->tgnargs * sizeof(char *));
+			for (j = 0; j < trigger->tgnargs; j++)
+				newargs[j] = pstrdup(trigger->tgargs[j]);
+			trigger->tgargs = newargs;
+		}
+		if (trigger->tgqual)
+			trigger->tgqual = pstrdup(trigger->tgqual);
+		if (trigger->tgoldtable)
+			trigger->tgoldtable = pstrdup(trigger->tgoldtable);
+		if (trigger->tgnewtable)
+			trigger->tgnewtable = pstrdup(trigger->tgnewtable);
+		trigger++;
+	}
+
+	return newdesc;
+}
+
+/*
+ * Free a TriggerDesc data structure.
+ */
+void
+FreeTriggerDesc(TriggerDesc *trigdesc)
+{
+	Trigger    *trigger;
+	int			i;
+
+	if (trigdesc == NULL)
+		return;
+
+	trigger = trigdesc->triggers;
+	for (i = 0; i < trigdesc->numtriggers; i++)
+	{
+		pfree(trigger->tgname);
+		if (trigger->tgnattr > 0)
+			pfree(trigger->tgattr);
+		if (trigger->tgnargs > 0)
+		{
+			while (--(trigger->tgnargs) >= 0)
+				pfree(trigger->tgargs[trigger->tgnargs]);
+			pfree(trigger->tgargs);
+		}
+		if (trigger->tgqual)
+			pfree(trigger->tgqual);
+		if (trigger->tgoldtable)
+			pfree(trigger->tgoldtable);
+		if (trigger->tgnewtable)
+			pfree(trigger->tgnewtable);
+		trigger++;
+	}
+	pfree(trigdesc->triggers);
+	pfree(trigdesc);
+}
+
+/*
+ * Compare two TriggerDesc structures for logical equality.
+ */
+#ifdef NOT_USED
+bool
+equalTriggerDescs(TriggerDesc *trigdesc1, TriggerDesc *trigdesc2)
+{
+	int			i,
+				j;
+
+	/*
+	 * We need not examine the hint flags, just the trigger array itself; if
+	 * we have the same triggers with the same types, the flags should match.
+	 *
+	 * As of 7.3 we assume trigger set ordering is significant in the
+	 * comparison; so we just compare corresponding slots of the two sets.
+	 *
+	 * Note: comparing the stringToNode forms of the WHEN clauses means that
+	 * parse column locations will affect the result.  This is okay as long as
+	 * this function is only used for detecting exact equality, as for example
+	 * in checking for staleness of a cache entry.
+	 */
+	if (trigdesc1 != NULL)
+	{
+		if (trigdesc2 == NULL)
+			return false;
+		if (trigdesc1->numtriggers != trigdesc2->numtriggers)
+			return false;
+		for (i = 0; i < trigdesc1->numtriggers; i++)
+		{
+			Trigger    *trig1 = trigdesc1->triggers + i;
+			Trigger    *trig2 = trigdesc2->triggers + i;
+
+			if (trig1->tgoid != trig2->tgoid)
+				return false;
+			if (strcmp(trig1->tgname, trig2->tgname) != 0)
+				return false;
+			if (trig1->tgfoid != trig2->tgfoid)
+				return false;
+			if (trig1->tgtype != trig2->tgtype)
+				return false;
+			if (trig1->tgenabled != trig2->tgenabled)
+				return false;
+			if (trig1->tgisinternal != trig2->tgisinternal)
+				return false;
+			if (trig1->tgisclone != trig2->tgisclone)
+				return false;
+			if (trig1->tgconstrrelid != trig2->tgconstrrelid)
+				return false;
+			if (trig1->tgconstrindid != trig2->tgconstrindid)
+				return false;
+			if (trig1->tgconstraint != trig2->tgconstraint)
+				return false;
+			if (trig1->tgdeferrable != trig2->tgdeferrable)
+				return false;
+			if (trig1->tginitdeferred != trig2->tginitdeferred)
+				return false;
+			if (trig1->tgnargs != trig2->tgnargs)
+				return false;
+			if (trig1->tgnattr != trig2->tgnattr)
+				return false;
+			if (trig1->tgnattr > 0 &&
+				memcmp(trig1->tgattr, trig2->tgattr,
+					   trig1->tgnattr * sizeof(int16)) != 0)
+				return false;
+			for (j = 0; j < trig1->tgnargs; j++)
+				if (strcmp(trig1->tgargs[j], trig2->tgargs[j]) != 0)
+					return false;
+			if (trig1->tgqual == NULL && trig2->tgqual == NULL)
+				 /* ok */ ;
+			else if (trig1->tgqual == NULL || trig2->tgqual == NULL)
+				return false;
+			else if (strcmp(trig1->tgqual, trig2->tgqual) != 0)
+				return false;
+			if (trig1->tgoldtable == NULL && trig2->tgoldtable == NULL)
+				 /* ok */ ;
+			else if (trig1->tgoldtable == NULL || trig2->tgoldtable == NULL)
+				return false;
+			else if (strcmp(trig1->tgoldtable, trig2->tgoldtable) != 0)
+				return false;
+			if (trig1->tgnewtable == NULL && trig2->tgnewtable == NULL)
+				 /* ok */ ;
+			else if (trig1->tgnewtable == NULL || trig2->tgnewtable == NULL)
+				return false;
+			else if (strcmp(trig1->tgnewtable, trig2->tgnewtable) != 0)
+				return false;
+		}
+	}
+	else if (trigdesc2 != NULL)
+		return false;
+	return true;
+}
+#endif							/* NOT_USED */
+
+/*
+ * Check if there is a row-level trigger with transition tables that prevents
+ * a table from becoming an inheritance child or partition.  Return the name
+ * of the first such incompatible trigger, or NULL if there is none.
+ */
+const char *
+FindTriggerIncompatibleWithInheritance(TriggerDesc *trigdesc)
+{
+	if (trigdesc != NULL)
+	{
+		int			i;
+
+		for (i = 0; i < trigdesc->numtriggers; ++i)
+		{
+			Trigger    *trigger = &trigdesc->triggers[i];
+
+			if (trigger->tgoldtable != NULL || trigger->tgnewtable != NULL)
+				return trigger->tgname;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Call a trigger function.
+ *
+ *		trigdata: trigger descriptor.
+ *		tgindx: trigger's index in finfo and instr arrays.
+ *		finfo: array of cached trigger function call information.
+ *		instr: optional array of EXPLAIN ANALYZE instrumentation state.
+ *		per_tuple_context: memory context to execute the function in.
+ *
+ * Returns the tuple (or NULL) as returned by the function.
+ */
+static HeapTuple
+ExecCallTriggerFunc(TriggerData *trigdata,
+					int tgindx,
+					FmgrInfo *finfo,
+					Instrumentation *instr,
+					MemoryContext per_tuple_context)
+{
+	LOCAL_FCINFO(fcinfo, 0);
+	PgStat_FunctionCallUsage fcusage;
+	Datum		result;
+	MemoryContext oldContext;
+
+	/*
+	 * Protect against code paths that may fail to initialize transition table
+	 * info.
+	 */
+	Assert(((TRIGGER_FIRED_BY_INSERT(trigdata->tg_event) ||
+			 TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event) ||
+			 TRIGGER_FIRED_BY_DELETE(trigdata->tg_event)) &&
+			TRIGGER_FIRED_AFTER(trigdata->tg_event) &&
+			!(trigdata->tg_event & AFTER_TRIGGER_DEFERRABLE) &&
+			!(trigdata->tg_event & AFTER_TRIGGER_INITDEFERRED)) ||
+		   (trigdata->tg_oldtable == NULL && trigdata->tg_newtable == NULL));
+
+	finfo += tgindx;
+
+	/*
+	 * We cache fmgr lookup info, to avoid making the lookup again on each
+	 * call.
+	 */
+	if (finfo->fn_oid == InvalidOid)
+		fmgr_info(trigdata->tg_trigger->tgfoid, finfo);
+
+	Assert(finfo->fn_oid == trigdata->tg_trigger->tgfoid);
+
+	/*
+	 * If doing EXPLAIN ANALYZE, start charging time to this trigger.
+	 */
+	if (instr)
+		InstrStartNode(instr + tgindx);
+
+	/*
+	 * Do the function evaluation in the per-tuple memory context, so that
+	 * leaked memory will be reclaimed once per tuple. Note in particular that
+	 * any new tuple created by the trigger function will live till the end of
+	 * the tuple cycle.
+	 */
+	oldContext = MemoryContextSwitchTo(per_tuple_context);
+
+	/*
+	 * Call the function, passing no arguments but setting a context.
+	 */
+	InitFunctionCallInfoData(*fcinfo, finfo, 0,
+							 InvalidOid, (Node *) trigdata, NULL);
+
+	pgstat_init_function_usage(fcinfo, &fcusage);
+
+	MyTriggerDepth++;
+	PG_TRY();
+	{
+		result = FunctionCallInvoke(fcinfo);
+	}
+	PG_FINALLY();
+	{
+		MyTriggerDepth--;
+	}
+	PG_END_TRY();
+
+	pgstat_end_function_usage(&fcusage, true);
+
+	MemoryContextSwitchTo(oldContext);
+
+	/*
+	 * Trigger protocol allows function to return a null pointer, but NOT to
+	 * set the isnull result flag.
+	 */
+	if (fcinfo->isnull)
+		ereport(ERROR,
+				(errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+				 errmsg("trigger function %u returned null value",
+						fcinfo->flinfo->fn_oid)));
+
+	/*
+	 * If doing EXPLAIN ANALYZE, stop charging time to this trigger, and count
+	 * one "tuple returned" (really the number of firings).
+	 */
+	if (instr)
+		InstrStopNode(instr + tgindx, 1);
+
+	return (HeapTuple) DatumGetPointer(result);
+}
+
+void
+ExecBSInsertTriggers(EState *estate, ResultRelInfo *relinfo)
+{
+	TriggerDesc *trigdesc;
+	int			i;
+	TriggerData LocTriggerData = {0};
+
+	trigdesc = relinfo->ri_TrigDesc;
+
+	if (trigdesc == NULL)
+		return;
+	if (!trigdesc->trig_insert_before_statement)
+		return;
+
+	/* no-op if we already fired BS triggers in this context */
+	if (before_stmt_triggers_fired(RelationGetRelid(relinfo->ri_RelationDesc),
+								   CMD_INSERT))
+		return;
+
+	LocTriggerData.type = T_TriggerData;
+	LocTriggerData.tg_event = TRIGGER_EVENT_INSERT |
+		TRIGGER_EVENT_BEFORE;
+	LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+	for (i = 0; i < trigdesc->numtriggers; i++)
+	{
+		Trigger    *trigger = &trigdesc->triggers[i];
+		HeapTuple	newtuple;
+
+		if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+								  TRIGGER_TYPE_STATEMENT,
+								  TRIGGER_TYPE_BEFORE,
+								  TRIGGER_TYPE_INSERT))
+			continue;
+		if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+							NULL, NULL, NULL))
+			continue;
+
+		LocTriggerData.tg_trigger = trigger;
+		newtuple = ExecCallTriggerFunc(&LocTriggerData,
+									   i,
+									   relinfo->ri_TrigFunctions,
+									   relinfo->ri_TrigInstrument,
+									   GetPerTupleMemoryContext(estate));
+
+		if (newtuple)
+			ereport(ERROR,
+					(errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+					 errmsg("BEFORE STATEMENT trigger cannot return a value")));
+	}
+}
+
+void
+ExecASInsertTriggers(EState *estate, ResultRelInfo *relinfo,
+					 TransitionCaptureState *transition_capture)
+{
+	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+
+	if (trigdesc && trigdesc->trig_insert_after_statement)
+		AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
+							  TRIGGER_EVENT_INSERT,
+							  false, NULL, NULL, NIL, NULL, transition_capture,
+							  false);
+}
+
+bool
+ExecBRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
+					 TupleTableSlot *slot)
+{
+	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+	HeapTuple	newtuple = NULL;
+	bool		should_free;
+	TriggerData LocTriggerData = {0};
+	int			i;
+
+	LocTriggerData.type = T_TriggerData;
+	LocTriggerData.tg_event = TRIGGER_EVENT_INSERT |
+		TRIGGER_EVENT_ROW |
+		TRIGGER_EVENT_BEFORE;
+	LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+	for (i = 0; i < trigdesc->numtriggers; i++)
+	{
+		Trigger    *trigger = &trigdesc->triggers[i];
+		HeapTuple	oldtuple;
+
+		if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+								  TRIGGER_TYPE_ROW,
+								  TRIGGER_TYPE_BEFORE,
+								  TRIGGER_TYPE_INSERT))
+			continue;
+		if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+							NULL, NULL, slot))
+			continue;
+
+		if (!newtuple)
+			newtuple = ExecFetchSlotHeapTuple(slot, true, &should_free);
+
+		LocTriggerData.tg_trigslot = slot;
+		LocTriggerData.tg_trigtuple = oldtuple = newtuple;
+		LocTriggerData.tg_trigger = trigger;
+		newtuple = ExecCallTriggerFunc(&LocTriggerData,
+									   i,
+									   relinfo->ri_TrigFunctions,
+									   relinfo->ri_TrigInstrument,
+									   GetPerTupleMemoryContext(estate));
+		if (newtuple == NULL)
+		{
+			if (should_free)
+				heap_freetuple(oldtuple);
+			return false;		/* "do nothing" */
+		}
+		else if (newtuple != oldtuple)
+		{
+			ExecForceStoreHeapTuple(newtuple, slot, false);
+
+			/*
+			 * After a tuple in a partition goes through a trigger, the user
+			 * could have changed the partition key enough that the tuple no
+			 * longer fits the partition.  Verify that.
+			 */
+			if (trigger->tgisclone &&
+				!ExecPartitionCheck(relinfo, slot, estate, false))
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("moving row to another partition during a BEFORE FOR EACH ROW trigger is not supported"),
+						 errdetail("Before executing trigger \"%s\", the row was to be in partition \"%s.%s\".",
+								   trigger->tgname,
+								   get_namespace_name(RelationGetNamespace(relinfo->ri_RelationDesc)),
+								   RelationGetRelationName(relinfo->ri_RelationDesc))));
+
+			if (should_free)
+				heap_freetuple(oldtuple);
+
+			/* signal tuple should be re-fetched if used */
+			newtuple = NULL;
+		}
+	}
+
+	return true;
+}
+
+void
+ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
+					 TupleTableSlot *slot, List *recheckIndexes,
+					 TransitionCaptureState *transition_capture)
+{
+	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+
+	if ((trigdesc && trigdesc->trig_insert_after_row) ||
+		(transition_capture && transition_capture->tcs_insert_new_table))
+		AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
+							  TRIGGER_EVENT_INSERT,
+							  true, NULL, slot,
+							  recheckIndexes, NULL,
+							  transition_capture,
+							  false);
+}
+
+bool
+ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
+					 TupleTableSlot *slot)
+{
+	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+	HeapTuple	newtuple = NULL;
+	bool		should_free;
+	TriggerData LocTriggerData = {0};
+	int			i;
+
+	LocTriggerData.type = T_TriggerData;
+	LocTriggerData.tg_event = TRIGGER_EVENT_INSERT |
+		TRIGGER_EVENT_ROW |
+		TRIGGER_EVENT_INSTEAD;
+	LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+	for (i = 0; i < trigdesc->numtriggers; i++)
+	{
+		Trigger    *trigger = &trigdesc->triggers[i];
+		HeapTuple	oldtuple;
+
+		if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+								  TRIGGER_TYPE_ROW,
+								  TRIGGER_TYPE_INSTEAD,
+								  TRIGGER_TYPE_INSERT))
+			continue;
+		if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+							NULL, NULL, slot))
+			continue;
+
+		if (!newtuple)
+			newtuple = ExecFetchSlotHeapTuple(slot, true, &should_free);
+
+		LocTriggerData.tg_trigslot = slot;
+		LocTriggerData.tg_trigtuple = oldtuple = newtuple;
+		LocTriggerData.tg_trigger = trigger;
+		newtuple = ExecCallTriggerFunc(&LocTriggerData,
+									   i,
+									   relinfo->ri_TrigFunctions,
+									   relinfo->ri_TrigInstrument,
+									   GetPerTupleMemoryContext(estate));
+		if (newtuple == NULL)
+		{
+			if (should_free)
+				heap_freetuple(oldtuple);
+			return false;		/* "do nothing" */
+		}
+		else if (newtuple != oldtuple)
+		{
+			ExecForceStoreHeapTuple(newtuple, slot, false);
+
+			if (should_free)
+				heap_freetuple(oldtuple);
+
+			/* signal tuple should be re-fetched if used */
+			newtuple = NULL;
+		}
+	}
+
+	return true;
+}
+
+void
+ExecBSDeleteTriggers(EState *estate, ResultRelInfo *relinfo)
+{
+	TriggerDesc *trigdesc;
+	int			i;
+	TriggerData LocTriggerData = {0};
+
+	trigdesc = relinfo->ri_TrigDesc;
+
+	if (trigdesc == NULL)
+		return;
+	if (!trigdesc->trig_delete_before_statement)
+		return;
+
+	/* no-op if we already fired BS triggers in this context */
+	if (before_stmt_triggers_fired(RelationGetRelid(relinfo->ri_RelationDesc),
+								   CMD_DELETE))
+		return;
+
+	LocTriggerData.type = T_TriggerData;
+	LocTriggerData.tg_event = TRIGGER_EVENT_DELETE |
+		TRIGGER_EVENT_BEFORE;
+	LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+	for (i = 0; i < trigdesc->numtriggers; i++)
+	{
+		Trigger    *trigger = &trigdesc->triggers[i];
+		HeapTuple	newtuple;
+
+		if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+								  TRIGGER_TYPE_STATEMENT,
+								  TRIGGER_TYPE_BEFORE,
+								  TRIGGER_TYPE_DELETE))
+			continue;
+		if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+							NULL, NULL, NULL))
+			continue;
+
+		LocTriggerData.tg_trigger = trigger;
+		newtuple = ExecCallTriggerFunc(&LocTriggerData,
+									   i,
+									   relinfo->ri_TrigFunctions,
+									   relinfo->ri_TrigInstrument,
+									   GetPerTupleMemoryContext(estate));
+
+		if (newtuple)
+			ereport(ERROR,
+					(errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+					 errmsg("BEFORE STATEMENT trigger cannot return a value")));
+	}
+}
+
+void
+ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
+					 TransitionCaptureState *transition_capture)
+{
+	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+
+	if (trigdesc && trigdesc->trig_delete_after_statement)
+		AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
+							  TRIGGER_EVENT_DELETE,
+							  false, NULL, NULL, NIL, NULL, transition_capture,
+							  false);
+}
+
+/*
+ * Execute BEFORE ROW DELETE triggers.
+ *
+ * True indicates caller can proceed with the delete.  False indicates caller
+ * need to suppress the delete and additionally if requested, we need to pass
+ * back the concurrently updated tuple if any.
+ */
+bool
+ExecBRDeleteTriggersNew(EState *estate, EPQState *epqstate,
+						ResultRelInfo *relinfo,
+						ItemPointer tupleid,
+						HeapTuple fdw_trigtuple,
+						TupleTableSlot **epqslot,
+						TM_Result *tmresult,
+						TM_FailureData *tmfd)
+{
+	TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo);
+	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+	bool		result = true;
+	TriggerData LocTriggerData = {0};
+	HeapTuple	trigtuple;
+	bool		should_free = false;
+	int			i;
+
+	Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
+	if (fdw_trigtuple == NULL)
+	{
+		TupleTableSlot *epqslot_candidate = NULL;
+
+		if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
+								LockTupleExclusive, slot, &epqslot_candidate,
+								tmresult, tmfd))
+			return false;
+
+		/*
+		 * If the tuple was concurrently updated and the caller of this
+		 * function requested for the updated tuple, skip the trigger
+		 * execution.
+		 */
+		if (epqslot_candidate != NULL && epqslot != NULL)
+		{
+			*epqslot = epqslot_candidate;
+			return false;
+		}
+
+		trigtuple = ExecFetchSlotHeapTuple(slot, true, &should_free);
+	}
+	else
+	{
+		trigtuple = fdw_trigtuple;
+		ExecForceStoreHeapTuple(trigtuple, slot, false);
+	}
+
+	LocTriggerData.type = T_TriggerData;
+	LocTriggerData.tg_event = TRIGGER_EVENT_DELETE |
+		TRIGGER_EVENT_ROW |
+		TRIGGER_EVENT_BEFORE;
+	LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+	for (i = 0; i < trigdesc->numtriggers; i++)
+	{
+		HeapTuple	newtuple;
+		Trigger    *trigger = &trigdesc->triggers[i];
+
+		if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+								  TRIGGER_TYPE_ROW,
+								  TRIGGER_TYPE_BEFORE,
+								  TRIGGER_TYPE_DELETE))
+			continue;
+		if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+							NULL, slot, NULL))
+			continue;
+
+		LocTriggerData.tg_trigslot = slot;
+		LocTriggerData.tg_trigtuple = trigtuple;
+		LocTriggerData.tg_trigger = trigger;
+		newtuple = ExecCallTriggerFunc(&LocTriggerData,
+									   i,
+									   relinfo->ri_TrigFunctions,
+									   relinfo->ri_TrigInstrument,
+									   GetPerTupleMemoryContext(estate));
+		if (newtuple == NULL)
+		{
+			result = false;		/* tell caller to suppress delete */
+			break;
+		}
+		if (newtuple != trigtuple)
+			heap_freetuple(newtuple);
+	}
+	if (should_free)
+		heap_freetuple(trigtuple);
+
+	return result;
+}
+
+/*
+ * ABI-compatible wrapper to emulate old version of the above function.
+ * Do not call this version in new code.
+ */
+bool
+ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
+					 ResultRelInfo *relinfo,
+					 ItemPointer tupleid,
+					 HeapTuple fdw_trigtuple,
+					 TupleTableSlot **epqslot)
+{
+	return ExecBRDeleteTriggersNew(estate, epqstate, relinfo, tupleid,
+								   fdw_trigtuple, epqslot, NULL, NULL);
+}
+
+/*
+ * Note: is_crosspart_update must be true if the DELETE is being performed
+ * as part of a cross-partition update.
+ */
+void
+ExecARDeleteTriggers(EState *estate,
+					 ResultRelInfo *relinfo,
+					 ItemPointer tupleid,
+					 HeapTuple fdw_trigtuple,
+					 TransitionCaptureState *transition_capture,
+					 bool is_crosspart_update)
+{
+	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+
+	if ((trigdesc && trigdesc->trig_delete_after_row) ||
+		(transition_capture && transition_capture->tcs_delete_old_table))
+	{
+		TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo);
+
+		Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
+		if (fdw_trigtuple == NULL)
+			GetTupleForTrigger(estate,
+							   NULL,
+							   relinfo,
+							   tupleid,
+							   LockTupleExclusive,
+							   slot,
+							   NULL,
+							   NULL,
+							   NULL);
+		else
+			ExecForceStoreHeapTuple(fdw_trigtuple, slot, false);
+
+		AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
+							  TRIGGER_EVENT_DELETE,
+							  true, slot, NULL, NIL, NULL,
+							  transition_capture,
+							  is_crosspart_update);
+	}
+}
+
+bool
+ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
+					 HeapTuple trigtuple)
+{
+	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+	TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo);
+	TriggerData LocTriggerData = {0};
+	int			i;
+
+	LocTriggerData.type = T_TriggerData;
+	LocTriggerData.tg_event = TRIGGER_EVENT_DELETE |
+		TRIGGER_EVENT_ROW |
+		TRIGGER_EVENT_INSTEAD;
+	LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+
+	ExecForceStoreHeapTuple(trigtuple, slot, false);
+
+	for (i = 0; i < trigdesc->numtriggers; i++)
+	{
+		HeapTuple	rettuple;
+		Trigger    *trigger = &trigdesc->triggers[i];
+
+		if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+								  TRIGGER_TYPE_ROW,
+								  TRIGGER_TYPE_INSTEAD,
+								  TRIGGER_TYPE_DELETE))
+			continue;
+		if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+							NULL, slot, NULL))
+			continue;
+
+		LocTriggerData.tg_trigslot = slot;
+		LocTriggerData.tg_trigtuple = trigtuple;
+		LocTriggerData.tg_trigger = trigger;
+		rettuple = ExecCallTriggerFunc(&LocTriggerData,
+									   i,
+									   relinfo->ri_TrigFunctions,
+									   relinfo->ri_TrigInstrument,
+									   GetPerTupleMemoryContext(estate));
+		if (rettuple == NULL)
+			return false;		/* Delete was suppressed */
+		if (rettuple != trigtuple)
+			heap_freetuple(rettuple);
+	}
+	return true;
+}
+
+void
+ExecBSUpdateTriggers(EState *estate, ResultRelInfo *relinfo)
+{
+	TriggerDesc *trigdesc;
+	int			i;
+	TriggerData LocTriggerData = {0};
+	Bitmapset  *updatedCols;
+
+	trigdesc = relinfo->ri_TrigDesc;
+
+	if (trigdesc == NULL)
+		return;
+	if (!trigdesc->trig_update_before_statement)
+		return;
+
+	/* no-op if we already fired BS triggers in this context */
+	if (before_stmt_triggers_fired(RelationGetRelid(relinfo->ri_RelationDesc),
+								   CMD_UPDATE))
+		return;
+
+	/* statement-level triggers operate on the parent table */
+	Assert(relinfo->ri_RootResultRelInfo == NULL);
+
+	updatedCols = ExecGetAllUpdatedCols(relinfo, estate);
+
+	LocTriggerData.type = T_TriggerData;
+	LocTriggerData.tg_event = TRIGGER_EVENT_UPDATE |
+		TRIGGER_EVENT_BEFORE;
+	LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+	LocTriggerData.tg_updatedcols = updatedCols;
+	for (i = 0; i < trigdesc->numtriggers; i++)
+	{
+		Trigger    *trigger = &trigdesc->triggers[i];
+		HeapTuple	newtuple;
+
+		if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+								  TRIGGER_TYPE_STATEMENT,
+								  TRIGGER_TYPE_BEFORE,
+								  TRIGGER_TYPE_UPDATE))
+			continue;
+		if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+							updatedCols, NULL, NULL))
+			continue;
+
+		LocTriggerData.tg_trigger = trigger;
+		newtuple = ExecCallTriggerFunc(&LocTriggerData,
+									   i,
+									   relinfo->ri_TrigFunctions,
+									   relinfo->ri_TrigInstrument,
+									   GetPerTupleMemoryContext(estate));
+
+		if (newtuple)
+			ereport(ERROR,
+					(errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+					 errmsg("BEFORE STATEMENT trigger cannot return a value")));
+	}
+}
+
+void
+ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
+					 TransitionCaptureState *transition_capture)
+{
+	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+
+	/* statement-level triggers operate on the parent table */
+	Assert(relinfo->ri_RootResultRelInfo == NULL);
+
+	if (trigdesc && trigdesc->trig_update_after_statement)
+		AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
+							  TRIGGER_EVENT_UPDATE,
+							  false, NULL, NULL, NIL,
+							  ExecGetAllUpdatedCols(relinfo, estate),
+							  transition_capture,
+							  false);
+}
+
+bool
+ExecBRUpdateTriggersNew(EState *estate, EPQState *epqstate,
+						ResultRelInfo *relinfo,
+						ItemPointer tupleid,
+						HeapTuple fdw_trigtuple,
+						TupleTableSlot *newslot,
+						TM_Result *tmresult,
+						TM_FailureData *tmfd)
+{
+	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+	TupleTableSlot *oldslot = ExecGetTriggerOldSlot(estate, relinfo);
+	HeapTuple	newtuple = NULL;
+	HeapTuple	trigtuple;
+	bool		should_free_trig = false;
+	bool		should_free_new = false;
+	TriggerData LocTriggerData = {0};
+	int			i;
+	Bitmapset  *updatedCols;
+	LockTupleMode lockmode;
+
+	/* Determine lock mode to use */
+	lockmode = ExecUpdateLockMode(estate, relinfo);
+
+	Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
+	if (fdw_trigtuple == NULL)
+	{
+		TupleTableSlot *epqslot_candidate = NULL;
+
+		/* get a copy of the on-disk tuple we are planning to update */
+		if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
+								lockmode, oldslot, &epqslot_candidate,
+								tmresult, tmfd))
+			return false;		/* cancel the update action */
+
+		/*
+		 * In READ COMMITTED isolation level it's possible that target tuple
+		 * was changed due to concurrent update.  In that case we have a raw
+		 * subplan output tuple in epqslot_candidate, and need to form a new
+		 * insertable tuple using ExecGetUpdateNewTuple to replace the one we
+		 * received in newslot.  Neither we nor our callers have any further
+		 * interest in the passed-in tuple, so it's okay to overwrite newslot
+		 * with the newer data.
+		 *
+		 * (Typically, newslot was also generated by ExecGetUpdateNewTuple, so
+		 * that epqslot_clean will be that same slot and the copy step below
+		 * is not needed.)
+		 */
+		if (epqslot_candidate != NULL)
+		{
+			TupleTableSlot *epqslot_clean;
+
+			epqslot_clean = ExecGetUpdateNewTuple(relinfo, epqslot_candidate,
+												  oldslot);
+
+			if (newslot != epqslot_clean)
+				ExecCopySlot(newslot, epqslot_clean);
+		}
+
+		trigtuple = ExecFetchSlotHeapTuple(oldslot, true, &should_free_trig);
+	}
+	else
+	{
+		ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false);
+		trigtuple = fdw_trigtuple;
+	}
+
+	LocTriggerData.type = T_TriggerData;
+	LocTriggerData.tg_event = TRIGGER_EVENT_UPDATE |
+		TRIGGER_EVENT_ROW |
+		TRIGGER_EVENT_BEFORE;
+	LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+	updatedCols = ExecGetAllUpdatedCols(relinfo, estate);
+	LocTriggerData.tg_updatedcols = updatedCols;
+	for (i = 0; i < trigdesc->numtriggers; i++)
+	{
+		Trigger    *trigger = &trigdesc->triggers[i];
+		HeapTuple	oldtuple;
+
+		if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+								  TRIGGER_TYPE_ROW,
+								  TRIGGER_TYPE_BEFORE,
+								  TRIGGER_TYPE_UPDATE))
+			continue;
+		if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+							updatedCols, oldslot, newslot))
+			continue;
+
+		if (!newtuple)
+			newtuple = ExecFetchSlotHeapTuple(newslot, true, &should_free_new);
+
+		LocTriggerData.tg_trigslot = oldslot;
+		LocTriggerData.tg_trigtuple = trigtuple;
+		LocTriggerData.tg_newtuple = oldtuple = newtuple;
+		LocTriggerData.tg_newslot = newslot;
+		LocTriggerData.tg_trigger = trigger;
+		newtuple = ExecCallTriggerFunc(&LocTriggerData,
+									   i,
+									   relinfo->ri_TrigFunctions,
+									   relinfo->ri_TrigInstrument,
+									   GetPerTupleMemoryContext(estate));
+
+		if (newtuple == NULL)
+		{
+			if (should_free_trig)
+				heap_freetuple(trigtuple);
+			if (should_free_new)
+				heap_freetuple(oldtuple);
+			return false;		/* "do nothing" */
+		}
+		else if (newtuple != oldtuple)
+		{
+			ExecForceStoreHeapTuple(newtuple, newslot, false);
+
+			/*
+			 * If the tuple returned by the trigger / being stored, is the old
+			 * row version, and the heap tuple passed to the trigger was
+			 * allocated locally, materialize the slot. Otherwise we might
+			 * free it while still referenced by the slot.
+			 */
+			if (should_free_trig && newtuple == trigtuple)
+				ExecMaterializeSlot(newslot);
+
+			if (should_free_new)
+				heap_freetuple(oldtuple);
+
+			/* signal tuple should be re-fetched if used */
+			newtuple = NULL;
+		}
+	}
+	if (should_free_trig)
+		heap_freetuple(trigtuple);
+
+	return true;
+}
+
+/*
+ * ABI-compatible wrapper to emulate old version of the above function.
+ * Do not call this version in new code.
+ */
+bool
+ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
+					 ResultRelInfo *relinfo,
+					 ItemPointer tupleid,
+					 HeapTuple fdw_trigtuple,
+					 TupleTableSlot *newslot,
+					 TM_FailureData *tmfd)
+{
+	return ExecBRUpdateTriggersNew(estate, epqstate, relinfo, tupleid,
+								   fdw_trigtuple, newslot, NULL, tmfd);
+}
+
+/*
+ * Note: 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source
+ * and destination partitions, respectively, of a cross-partition update of
+ * the root partitioned table mentioned in the query, given by 'relinfo'.
+ * 'tupleid' in that case refers to the ctid of the "old" tuple in the source
+ * partition, and 'newslot' contains the "new" tuple in the destination
+ * partition.  This interface allows to support the requirements of
+ * ExecCrossPartitionUpdateForeignKey(); is_crosspart_update must be true in
+ * that case.
+ */
+void
+ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
+					 ResultRelInfo *src_partinfo,
+					 ResultRelInfo *dst_partinfo,
+					 ItemPointer tupleid,
+					 HeapTuple fdw_trigtuple,
+					 TupleTableSlot *newslot,
+					 List *recheckIndexes,
+					 TransitionCaptureState *transition_capture,
+					 bool is_crosspart_update)
+{
+	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+
+	if ((trigdesc && trigdesc->trig_update_after_row) ||
+		(transition_capture &&
+		 (transition_capture->tcs_update_old_table ||
+		  transition_capture->tcs_update_new_table)))
+	{
+		/*
+		 * Note: if the UPDATE is converted into a DELETE+INSERT as part of
+		 * update-partition-key operation, then this function is also called
+		 * separately for DELETE and INSERT to capture transition table rows.
+		 * In such case, either old tuple or new tuple can be NULL.
+		 */
+		TupleTableSlot *oldslot;
+		ResultRelInfo *tupsrc;
+
+		Assert((src_partinfo != NULL && dst_partinfo != NULL) ||
+			   !is_crosspart_update);
+
+		tupsrc = src_partinfo ? src_partinfo : relinfo;
+		oldslot = ExecGetTriggerOldSlot(estate, tupsrc);
+
+		if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid))
+			GetTupleForTrigger(estate,
+							   NULL,
+							   tupsrc,
+							   tupleid,
+							   LockTupleExclusive,
+							   oldslot,
+							   NULL,
+							   NULL,
+							   NULL);
+		else if (fdw_trigtuple != NULL)
+			ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false);
+		else
+			ExecClearTuple(oldslot);
+
+		AfterTriggerSaveEvent(estate, relinfo,
+							  src_partinfo, dst_partinfo,
+							  TRIGGER_EVENT_UPDATE,
+							  true,
+							  oldslot, newslot, recheckIndexes,
+							  ExecGetAllUpdatedCols(relinfo, estate),
+							  transition_capture,
+							  is_crosspart_update);
+	}
+}
+
+bool
+ExecIRUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
+					 HeapTuple trigtuple, TupleTableSlot *newslot)
+{
+	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+	TupleTableSlot *oldslot = ExecGetTriggerOldSlot(estate, relinfo);
+	HeapTuple	newtuple = NULL;
+	bool		should_free;
+	TriggerData LocTriggerData = {0};
+	int			i;
+
+	LocTriggerData.type = T_TriggerData;
+	LocTriggerData.tg_event = TRIGGER_EVENT_UPDATE |
+		TRIGGER_EVENT_ROW |
+		TRIGGER_EVENT_INSTEAD;
+	LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+
+	ExecForceStoreHeapTuple(trigtuple, oldslot, false);
+
+	for (i = 0; i < trigdesc->numtriggers; i++)
+	{
+		Trigger    *trigger = &trigdesc->triggers[i];
+		HeapTuple	oldtuple;
+
+		if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+								  TRIGGER_TYPE_ROW,
+								  TRIGGER_TYPE_INSTEAD,
+								  TRIGGER_TYPE_UPDATE))
+			continue;
+		if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+							NULL, oldslot, newslot))
+			continue;
+
+		if (!newtuple)
+			newtuple = ExecFetchSlotHeapTuple(newslot, true, &should_free);
+
+		LocTriggerData.tg_trigslot = oldslot;
+		LocTriggerData.tg_trigtuple = trigtuple;
+		LocTriggerData.tg_newslot = newslot;
+		LocTriggerData.tg_newtuple = oldtuple = newtuple;
+
+		LocTriggerData.tg_trigger = trigger;
+		newtuple = ExecCallTriggerFunc(&LocTriggerData,
+									   i,
+									   relinfo->ri_TrigFunctions,
+									   relinfo->ri_TrigInstrument,
+									   GetPerTupleMemoryContext(estate));
+		if (newtuple == NULL)
+		{
+			return false;		/* "do nothing" */
+		}
+		else if (newtuple != oldtuple)
+		{
+			ExecForceStoreHeapTuple(newtuple, newslot, false);
+
+			if (should_free)
+				heap_freetuple(oldtuple);
+
+			/* signal tuple should be re-fetched if used */
+			newtuple = NULL;
+		}
+	}
+
+	return true;
+}
+
+void
+ExecBSTruncateTriggers(EState *estate, ResultRelInfo *relinfo)
+{
+	TriggerDesc *trigdesc;
+	int			i;
+	TriggerData LocTriggerData = {0};
+
+	trigdesc = relinfo->ri_TrigDesc;
+
+	if (trigdesc == NULL)
+		return;
+	if (!trigdesc->trig_truncate_before_statement)
+		return;
+
+	LocTriggerData.type = T_TriggerData;
+	LocTriggerData.tg_event = TRIGGER_EVENT_TRUNCATE |
+		TRIGGER_EVENT_BEFORE;
+	LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+
+	for (i = 0; i < trigdesc->numtriggers; i++)
+	{
+		Trigger    *trigger = &trigdesc->triggers[i];
+		HeapTuple	newtuple;
+
+		if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+								  TRIGGER_TYPE_STATEMENT,
+								  TRIGGER_TYPE_BEFORE,
+								  TRIGGER_TYPE_TRUNCATE))
+			continue;
+		if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+							NULL, NULL, NULL))
+			continue;
+
+		LocTriggerData.tg_trigger = trigger;
+		newtuple = ExecCallTriggerFunc(&LocTriggerData,
+									   i,
+									   relinfo->ri_TrigFunctions,
+									   relinfo->ri_TrigInstrument,
+									   GetPerTupleMemoryContext(estate));
+
+		if (newtuple)
+			ereport(ERROR,
+					(errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+					 errmsg("BEFORE STATEMENT trigger cannot return a value")));
+	}
+}
+
+void
+ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo)
+{
+	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+
+	if (trigdesc && trigdesc->trig_truncate_after_statement)
+		AfterTriggerSaveEvent(estate, relinfo,
+							  NULL, NULL,
+							  TRIGGER_EVENT_TRUNCATE,
+							  false, NULL, NULL, NIL, NULL, NULL,
+							  false);
+}
+
+
+/*
+ * Fetch tuple into "oldslot", dealing with locking and EPQ if necessary
+ */
+static bool
+GetTupleForTrigger(EState *estate,
+				   EPQState *epqstate,
+				   ResultRelInfo *relinfo,
+				   ItemPointer tid,
+				   LockTupleMode lockmode,
+				   TupleTableSlot *oldslot,
+				   TupleTableSlot **epqslot,
+				   TM_Result *tmresultp,
+				   TM_FailureData *tmfdp)
+{
+	Relation	relation = relinfo->ri_RelationDesc;
+
+	if (epqslot != NULL)
+	{
+		TM_Result	test;
+		TM_FailureData tmfd;
+		int			lockflags = 0;
+
+		*epqslot = NULL;
+
+		/* caller must pass an epqstate if EvalPlanQual is possible */
+		Assert(epqstate != NULL);
+
+		/*
+		 * lock tuple for update
+		 */
+		if (!IsolationUsesXactSnapshot())
+			lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION;
+		test = table_tuple_lock(relation, tid, estate->es_snapshot, oldslot,
+								estate->es_output_cid,
+								lockmode, LockWaitBlock,
+								lockflags,
+								&tmfd);
+
+		/* Let the caller know about the status of this operation */
+		if (tmresultp)
+			*tmresultp = test;
+		if (tmfdp)
+			*tmfdp = tmfd;
+
+		switch (test)
+		{
+			case TM_SelfModified:
+
+				/*
+				 * The target tuple was already updated or deleted by the
+				 * current command, or by a later command in the current
+				 * transaction.  We ignore the tuple in the former case, and
+				 * throw error in the latter case, for the same reasons
+				 * enumerated in ExecUpdate and ExecDelete in
+				 * nodeModifyTable.c.
+				 */
+				if (tmfd.cmax != estate->es_output_cid)
+					ereport(ERROR,
+							(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
+							 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
+							 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
+
+				/* treat it as deleted; do not process */
+				return false;
+
+			case TM_Ok:
+				if (tmfd.traversed)
+				{
+					/*
+					 * Recheck the tuple using EPQ. For MERGE, we leave this
+					 * to the caller (it must do additional rechecking, and
+					 * might end up executing a different action entirely).
+					 */
+					if (estate->es_plannedstmt->commandType == CMD_MERGE)
+					{
+						if (tmresultp)
+							*tmresultp = TM_Updated;
+						return false;
+					}
+
+					*epqslot = EvalPlanQual(epqstate,
+											relation,
+											relinfo->ri_RangeTableIndex,
+											oldslot);
+
+					/*
+					 * If PlanQual failed for updated tuple - we must not
+					 * process this tuple!
+					 */
+					if (TupIsNull(*epqslot))
+					{
+						*epqslot = NULL;
+						return false;
+					}
+				}
+				break;
+
+			case TM_Updated:
+				if (IsolationUsesXactSnapshot())
+					ereport(ERROR,
+							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+							 errmsg("could not serialize access due to concurrent update")));
+				elog(ERROR, "unexpected table_tuple_lock status: %u", test);
+				break;
+
+			case TM_Deleted:
+				if (IsolationUsesXactSnapshot())
+					ereport(ERROR,
+							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+							 errmsg("could not serialize access due to concurrent delete")));
+				/* tuple was deleted */
+				return false;
+
+			case TM_Invisible:
+				elog(ERROR, "attempted to lock invisible tuple");
+				break;
+
+			default:
+				elog(ERROR, "unrecognized table_tuple_lock status: %u", test);
+				return false;	/* keep compiler quiet */
+		}
+	}
+	else
+	{
+		/*
+		 * We expect the tuple to be present, thus very simple error handling
+		 * suffices.
+		 */
+		if (!table_tuple_fetch_row_version(relation, tid, SnapshotAny,
+										   oldslot))
+			elog(ERROR, "failed to fetch tuple for trigger");
+	}
+
+	return true;
+}
+
+/*
+ * Is trigger enabled to fire?
+ */
+static bool
+TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
+			   Trigger *trigger, TriggerEvent event,
+			   Bitmapset *modifiedCols,
+			   TupleTableSlot *oldslot, TupleTableSlot *newslot)
+{
+	/* Check replication-role-dependent enable state */
+	if (SessionReplicationRole == SESSION_REPLICATION_ROLE_REPLICA)
+	{
+		if (trigger->tgenabled == TRIGGER_FIRES_ON_ORIGIN ||
+			trigger->tgenabled == TRIGGER_DISABLED)
+			return false;
+	}
+	else						/* ORIGIN or LOCAL role */
+	{
+		if (trigger->tgenabled == TRIGGER_FIRES_ON_REPLICA ||
+			trigger->tgenabled == TRIGGER_DISABLED)
+			return false;
+	}
+
+	/*
+	 * Check for column-specific trigger (only possible for UPDATE, and in
+	 * fact we *must* ignore tgattr for other event types)
+	 */
+	if (trigger->tgnattr > 0 && TRIGGER_FIRED_BY_UPDATE(event))
+	{
+		int			i;
+		bool		modified;
+
+		modified = false;
+		for (i = 0; i < trigger->tgnattr; i++)
+		{
+			if (bms_is_member(trigger->tgattr[i] - FirstLowInvalidHeapAttributeNumber,
+							  modifiedCols))
+			{
+				modified = true;
+				break;
+			}
+		}
+		if (!modified)
+			return false;
+	}
+
+	/* Check for WHEN clause */
+	if (trigger->tgqual)
+	{
+		ExprState **predicate;
+		ExprContext *econtext;
+		MemoryContext oldContext;
+		int			i;
+
+		Assert(estate != NULL);
+
+		/*
+		 * trigger is an element of relinfo->ri_TrigDesc->triggers[]; find the
+		 * matching element of relinfo->ri_TrigWhenExprs[]
+		 */
+		i = trigger - relinfo->ri_TrigDesc->triggers;
+		predicate = &relinfo->ri_TrigWhenExprs[i];
+
+		/*
+		 * If first time through for this WHEN expression, build expression
+		 * nodetrees for it.  Keep them in the per-query memory context so
+		 * they'll survive throughout the query.
+		 */
+		if (*predicate == NULL)
+		{
+			Node	   *tgqual;
+
+			oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
+			tgqual = stringToNode(trigger->tgqual);
+			/* Change references to OLD and NEW to INNER_VAR and OUTER_VAR */
+			ChangeVarNodes(tgqual, PRS2_OLD_VARNO, INNER_VAR, 0);
+			ChangeVarNodes(tgqual, PRS2_NEW_VARNO, OUTER_VAR, 0);
+			/* ExecPrepareQual wants implicit-AND form */
+			tgqual = (Node *) make_ands_implicit((Expr *) tgqual);
+			*predicate = ExecPrepareQual((List *) tgqual, estate);
+			MemoryContextSwitchTo(oldContext);
+		}
+
+		/*
+		 * We will use the EState's per-tuple context for evaluating WHEN
+		 * expressions (creating it if it's not already there).
+		 */
+		econtext = GetPerTupleExprContext(estate);
+
+		/*
+		 * Finally evaluate the expression, making the old and/or new tuples
+		 * available as INNER_VAR/OUTER_VAR respectively.
+		 */
+		econtext->ecxt_innertuple = oldslot;
+		econtext->ecxt_outertuple = newslot;
+		if (!ExecQual(*predicate, econtext))
+			return false;
+	}
+
+	return true;
+}
+
+
+/* ----------
+ * After-trigger stuff
+ *
+ * The AfterTriggersData struct holds data about pending AFTER trigger events
+ * during the current transaction tree.  (BEFORE triggers are fired
+ * immediately so we don't need any persistent state about them.)  The struct
+ * and most of its subsidiary data are kept in TopTransactionContext; however
+ * some data that can be discarded sooner appears in the CurTransactionContext
+ * of the relevant subtransaction.  Also, the individual event records are
+ * kept in a separate sub-context of TopTransactionContext.  This is done
+ * mainly so that it's easy to tell from a memory context dump how much space
+ * is being eaten by trigger events.
+ *
+ * Because the list of pending events can grow large, we go to some
+ * considerable effort to minimize per-event memory consumption.  The event
+ * records are grouped into chunks and common data for similar events in the
+ * same chunk is only stored once.
+ *
+ * XXX We need to be able to save the per-event data in a file if it grows too
+ * large.
+ * ----------
+ */
+
+/* Per-trigger SET CONSTRAINT status */
+typedef struct SetConstraintTriggerData
+{
+	Oid			sct_tgoid;
+	bool		sct_tgisdeferred;
+} SetConstraintTriggerData;
+
+typedef struct SetConstraintTriggerData *SetConstraintTrigger;
+
+/*
+ * SET CONSTRAINT intra-transaction status.
+ *
+ * We make this a single palloc'd object so it can be copied and freed easily.
+ *
+ * all_isset and all_isdeferred are used to keep track
+ * of SET CONSTRAINTS ALL {DEFERRED, IMMEDIATE}.
+ *
+ * trigstates[] stores per-trigger tgisdeferred settings.
+ */
+typedef struct SetConstraintStateData
+{
+	bool		all_isset;
+	bool		all_isdeferred;
+	int			numstates;		/* number of trigstates[] entries in use */
+	int			numalloc;		/* allocated size of trigstates[] */
+	SetConstraintTriggerData trigstates[FLEXIBLE_ARRAY_MEMBER];
+} SetConstraintStateData;
+
+typedef SetConstraintStateData *SetConstraintState;
+
+
+/*
+ * Per-trigger-event data
+ *
+ * The actual per-event data, AfterTriggerEventData, includes DONE/IN_PROGRESS
+ * status bits, up to two tuple CTIDs, and optionally two OIDs of partitions.
+ * Each event record also has an associated AfterTriggerSharedData that is
+ * shared across all instances of similar events within a "chunk".
+ *
+ * For row-level triggers, we arrange not to waste storage on unneeded ctid
+ * fields.  Updates of regular tables use two; inserts and deletes of regular
+ * tables use one; foreign tables always use zero and save the tuple(s) to a
+ * tuplestore.  AFTER_TRIGGER_FDW_FETCH directs AfterTriggerExecute() to
+ * retrieve a fresh tuple or pair of tuples from that tuplestore, while
+ * AFTER_TRIGGER_FDW_REUSE directs it to use the most-recently-retrieved
+ * tuple(s).  This permits storing tuples once regardless of the number of
+ * row-level triggers on a foreign table.
+ *
+ * When updates on partitioned tables cause rows to move between partitions,
+ * the OIDs of both partitions are stored too, so that the tuples can be
+ * fetched; such entries are marked AFTER_TRIGGER_CP_UPDATE (for "cross-
+ * partition update").
+ *
+ * Note that we need triggers on foreign tables to be fired in exactly the
+ * order they were queued, so that the tuples come out of the tuplestore in
+ * the right order.  To ensure that, we forbid deferrable (constraint)
+ * triggers on foreign tables.  This also ensures that such triggers do not
+ * get deferred into outer trigger query levels, meaning that it's okay to
+ * destroy the tuplestore at the end of the query level.
+ *
+ * Statement-level triggers always bear AFTER_TRIGGER_1CTID, though they
+ * require no ctid field.  We lack the flag bit space to neatly represent that
+ * distinct case, and it seems unlikely to be worth much trouble.
+ *
+ * Note: ats_firing_id is initially zero and is set to something else when
+ * AFTER_TRIGGER_IN_PROGRESS is set.  It indicates which trigger firing
+ * cycle the trigger will be fired in (or was fired in, if DONE is set).
+ * Although this is mutable state, we can keep it in AfterTriggerSharedData
+ * because all instances of the same type of event in a given event list will
+ * be fired at the same time, if they were queued between the same firing
+ * cycles.  So we need only ensure that ats_firing_id is zero when attaching
+ * a new event to an existing AfterTriggerSharedData record.
+ */
+typedef uint32 TriggerFlags;
+
+#define AFTER_TRIGGER_OFFSET			0x07FFFFFF	/* must be low-order bits */
+#define AFTER_TRIGGER_DONE				0x80000000
+#define AFTER_TRIGGER_IN_PROGRESS		0x40000000
+/* bits describing the size and tuple sources of this event */
+#define AFTER_TRIGGER_FDW_REUSE			0x00000000
+#define AFTER_TRIGGER_FDW_FETCH			0x20000000
+#define AFTER_TRIGGER_1CTID				0x10000000
+#define AFTER_TRIGGER_2CTID				0x30000000
+#define AFTER_TRIGGER_CP_UPDATE			0x08000000
+#define AFTER_TRIGGER_TUP_BITS			0x38000000
+typedef struct AfterTriggerSharedData *AfterTriggerShared;
+
+typedef struct AfterTriggerSharedData
+{
+	TriggerEvent ats_event;		/* event type indicator, see trigger.h */
+	Oid			ats_tgoid;		/* the trigger's ID */
+	Oid			ats_relid;		/* the relation it's on */
+	CommandId	ats_firing_id;	/* ID for firing cycle */
+	struct AfterTriggersTableData *ats_table;	/* transition table access */
+	Bitmapset  *ats_modifiedcols;	/* modified columns */
+} AfterTriggerSharedData;
+
+typedef struct AfterTriggerEventData *AfterTriggerEvent;
+
+typedef struct AfterTriggerEventData
+{
+	TriggerFlags ate_flags;		/* status bits and offset to shared data */
+	ItemPointerData ate_ctid1;	/* inserted, deleted, or old updated tuple */
+	ItemPointerData ate_ctid2;	/* new updated tuple */
+
+	/*
+	 * During a cross-partition update of a partitioned table, we also store
+	 * the OIDs of source and destination partitions that are needed to fetch
+	 * the old (ctid1) and the new tuple (ctid2) from, respectively.
+	 */
+	Oid			ate_src_part;
+	Oid			ate_dst_part;
+} AfterTriggerEventData;
+
+/* AfterTriggerEventData, minus ate_src_part, ate_dst_part */
+typedef struct AfterTriggerEventDataNoOids
+{
+	TriggerFlags ate_flags;
+	ItemPointerData ate_ctid1;
+	ItemPointerData ate_ctid2;
+}			AfterTriggerEventDataNoOids;
+
+/* AfterTriggerEventData, minus ate_*_part and ate_ctid2 */
+typedef struct AfterTriggerEventDataOneCtid
+{
+	TriggerFlags ate_flags;		/* status bits and offset to shared data */
+	ItemPointerData ate_ctid1;	/* inserted, deleted, or old updated tuple */
+}			AfterTriggerEventDataOneCtid;
+
+/* AfterTriggerEventData, minus ate_*_part, ate_ctid1 and ate_ctid2 */
+typedef struct AfterTriggerEventDataZeroCtids
+{
+	TriggerFlags ate_flags;		/* status bits and offset to shared data */
+}			AfterTriggerEventDataZeroCtids;
+
+#define SizeofTriggerEvent(evt) \
+	(((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_CP_UPDATE ? \
+	 sizeof(AfterTriggerEventData) : \
+	 (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ? \
+	  sizeof(AfterTriggerEventDataNoOids) : \
+	  (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_1CTID ? \
+	   sizeof(AfterTriggerEventDataOneCtid) : \
+	   sizeof(AfterTriggerEventDataZeroCtids))))
+
+#define GetTriggerSharedData(evt) \
+	((AfterTriggerShared) ((char *) (evt) + ((evt)->ate_flags & AFTER_TRIGGER_OFFSET)))
+
+/*
+ * To avoid palloc overhead, we keep trigger events in arrays in successively-
+ * larger chunks (a slightly more sophisticated version of an expansible
+ * array).  The space between CHUNK_DATA_START and freeptr is occupied by
+ * AfterTriggerEventData records; the space between endfree and endptr is
+ * occupied by AfterTriggerSharedData records.
+ */
+typedef struct AfterTriggerEventChunk
+{
+	struct AfterTriggerEventChunk *next;	/* list link */
+	char	   *freeptr;		/* start of free space in chunk */
+	char	   *endfree;		/* end of free space in chunk */
+	char	   *endptr;			/* end of chunk */
+	/* event data follows here */
+} AfterTriggerEventChunk;
+
+#define CHUNK_DATA_START(cptr) ((char *) (cptr) + MAXALIGN(sizeof(AfterTriggerEventChunk)))
+
+/* A list of events */
+typedef struct AfterTriggerEventList
+{
+	AfterTriggerEventChunk *head;
+	AfterTriggerEventChunk *tail;
+	char	   *tailfree;		/* freeptr of tail chunk */
+} AfterTriggerEventList;
+
+/* Macros to help in iterating over a list of events */
+#define for_each_chunk(cptr, evtlist) \
+	for (cptr = (evtlist).head; cptr != NULL; cptr = cptr->next)
+#define for_each_event(eptr, cptr) \
+	for (eptr = (AfterTriggerEvent) CHUNK_DATA_START(cptr); \
+		 (char *) eptr < (cptr)->freeptr; \
+		 eptr = (AfterTriggerEvent) (((char *) eptr) + SizeofTriggerEvent(eptr)))
+/* Use this if no special per-chunk processing is needed */
+#define for_each_event_chunk(eptr, cptr, evtlist) \
+	for_each_chunk(cptr, evtlist) for_each_event(eptr, cptr)
+
+/* Macros for iterating from a start point that might not be list start */
+#define for_each_chunk_from(cptr) \
+	for (; cptr != NULL; cptr = cptr->next)
+#define for_each_event_from(eptr, cptr) \
+	for (; \
+		 (char *) eptr < (cptr)->freeptr; \
+		 eptr = (AfterTriggerEvent) (((char *) eptr) + SizeofTriggerEvent(eptr)))
+
+
+/*
+ * All per-transaction data for the AFTER TRIGGERS module.
+ *
+ * AfterTriggersData has the following fields:
+ *
+ * firing_counter is incremented for each call of afterTriggerInvokeEvents.
+ * We mark firable events with the current firing cycle's ID so that we can
+ * tell which ones to work on.  This ensures sane behavior if a trigger
+ * function chooses to do SET CONSTRAINTS: the inner SET CONSTRAINTS will
+ * only fire those events that weren't already scheduled for firing.
+ *
+ * state keeps track of the transaction-local effects of SET CONSTRAINTS.
+ * This is saved and restored across failed subtransactions.
+ *
+ * events is the current list of deferred events.  This is global across
+ * all subtransactions of the current transaction.  In a subtransaction
+ * abort, we know that the events added by the subtransaction are at the
+ * end of the list, so it is relatively easy to discard them.  The event
+ * list chunks themselves are stored in event_cxt.
+ *
+ * query_depth is the current depth of nested AfterTriggerBeginQuery calls
+ * (-1 when the stack is empty).
+ *
+ * query_stack[query_depth] is the per-query-level data, including these fields:
+ *
+ * events is a list of AFTER trigger events queued by the current query.
+ * None of these are valid until the matching AfterTriggerEndQuery call
+ * occurs.  At that point we fire immediate-mode triggers, and append any
+ * deferred events to the main events list.
+ *
+ * fdw_tuplestore is a tuplestore containing the foreign-table tuples
+ * needed by events queued by the current query.  (Note: we use just one
+ * tuplestore even though more than one foreign table might be involved.
+ * This is okay because tuplestores don't really care what's in the tuples
+ * they store; but it's possible that someday it'd break.)
+ *
+ * tables is a List of AfterTriggersTableData structs for target tables
+ * of the current query (see below).
+ *
+ * maxquerydepth is just the allocated length of query_stack.
+ *
+ * trans_stack holds per-subtransaction data, including these fields:
+ *
+ * state is NULL or a pointer to a saved copy of the SET CONSTRAINTS
+ * state data.  Each subtransaction level that modifies that state first
+ * saves a copy, which we use to restore the state if we abort.
+ *
+ * events is a copy of the events head/tail pointers,
+ * which we use to restore those values during subtransaction abort.
+ *
+ * query_depth is the subtransaction-start-time value of query_depth,
+ * which we similarly use to clean up at subtransaction abort.
+ *
+ * firing_counter is the subtransaction-start-time value of firing_counter.
+ * We use this to recognize which deferred triggers were fired (or marked
+ * for firing) within an aborted subtransaction.
+ *
+ * We use GetCurrentTransactionNestLevel() to determine the correct array
+ * index in trans_stack.  maxtransdepth is the number of allocated entries in
+ * trans_stack.  (By not keeping our own stack pointer, we can avoid trouble
+ * in cases where errors during subxact abort cause multiple invocations
+ * of AfterTriggerEndSubXact() at the same nesting depth.)
+ *
+ * We create an AfterTriggersTableData struct for each target table of the
+ * current query, and each operation mode (INSERT/UPDATE/DELETE), that has
+ * either transition tables or statement-level triggers.  This is used to
+ * hold the relevant transition tables, as well as info tracking whether
+ * we already queued the statement triggers.  (We use that info to prevent
+ * firing the same statement triggers more than once per statement, or really
+ * once per transition table set.)  These structs, along with the transition
+ * table tuplestores, live in the (sub)transaction's CurTransactionContext.
+ * That's sufficient lifespan because we don't allow transition tables to be
+ * used by deferrable triggers, so they only need to survive until
+ * AfterTriggerEndQuery.
+ */
+typedef struct AfterTriggersQueryData AfterTriggersQueryData;
+typedef struct AfterTriggersTransData AfterTriggersTransData;
+typedef struct AfterTriggersTableData AfterTriggersTableData;
+
+typedef struct AfterTriggersData
+{
+	CommandId	firing_counter; /* next firing ID to assign */
+	SetConstraintState state;	/* the active S C state */
+	AfterTriggerEventList events;	/* deferred-event list */
+	MemoryContext event_cxt;	/* memory context for events, if any */
+
+	/* per-query-level data: */
+	AfterTriggersQueryData *query_stack;	/* array of structs shown below */
+	int			query_depth;	/* current index in above array */
+	int			maxquerydepth;	/* allocated len of above array */
+
+	/* per-subtransaction-level data: */
+	AfterTriggersTransData *trans_stack;	/* array of structs shown below */
+	int			maxtransdepth;	/* allocated len of above array */
+} AfterTriggersData;
+
+struct AfterTriggersQueryData
+{
+	AfterTriggerEventList events;	/* events pending from this query */
+	Tuplestorestate *fdw_tuplestore;	/* foreign tuples for said events */
+	List	   *tables;			/* list of AfterTriggersTableData, see below */
+};
+
+struct AfterTriggersTransData
+{
+	/* these fields are just for resetting at subtrans abort: */
+	SetConstraintState state;	/* saved S C state, or NULL if not yet saved */
+	AfterTriggerEventList events;	/* saved list pointer */
+	int			query_depth;	/* saved query_depth */
+	CommandId	firing_counter; /* saved firing_counter */
+};
+
+struct AfterTriggersTableData
+{
+	/* relid + cmdType form the lookup key for these structs: */
+	Oid			relid;			/* target table's OID */
+	CmdType		cmdType;		/* event type, CMD_INSERT/UPDATE/DELETE */
+	bool		closed;			/* true when no longer OK to add tuples */
+	bool		before_trig_done;	/* did we already queue BS triggers? */
+	bool		after_trig_done;	/* did we already queue AS triggers? */
+	AfterTriggerEventList after_trig_events;	/* if so, saved list pointer */
+
+	/*
+	 * We maintain separate transition tables for UPDATE/INSERT/DELETE since
+	 * MERGE can run all three actions in a single statement. Note that UPDATE
+	 * needs both old and new transition tables whereas INSERT needs only new,
+	 * and DELETE needs only old.
+	 */
+
+	/* "old" transition table for UPDATE, if any */
+	Tuplestorestate *old_upd_tuplestore;
+	/* "new" transition table for UPDATE, if any */
+	Tuplestorestate *new_upd_tuplestore;
+	/* "old" transition table for DELETE, if any */
+	Tuplestorestate *old_del_tuplestore;
+	/* "new" transition table for INSERT, if any */
+	Tuplestorestate *new_ins_tuplestore;
+
+	TupleTableSlot *storeslot;	/* for converting to tuplestore's format */
+};
+
+static AfterTriggersData afterTriggers;
+
+static void AfterTriggerExecute(EState *estate,
+								AfterTriggerEvent event,
+								ResultRelInfo *relInfo,
+								ResultRelInfo *src_relInfo,
+								ResultRelInfo *dst_relInfo,
+								TriggerDesc *trigdesc,
+								FmgrInfo *finfo,
+								Instrumentation *instr,
+								MemoryContext per_tuple_context,
+								TupleTableSlot *trig_tuple_slot1,
+								TupleTableSlot *trig_tuple_slot2);
+static AfterTriggersTableData *GetAfterTriggersTableData(Oid relid,
+														 CmdType cmdType);
+static TupleTableSlot *GetAfterTriggersStoreSlot(AfterTriggersTableData *table,
+												 TupleDesc tupdesc);
+static Tuplestorestate *GetAfterTriggersTransitionTable(int event,
+														TupleTableSlot *oldslot,
+														TupleTableSlot *newslot,
+														TransitionCaptureState *transition_capture);
+static void TransitionTableAddTuple(EState *estate,
+									TransitionCaptureState *transition_capture,
+									ResultRelInfo *relinfo,
+									TupleTableSlot *slot,
+									TupleTableSlot *original_insert_tuple,
+									Tuplestorestate *tuplestore);
+static void AfterTriggerFreeQuery(AfterTriggersQueryData *qs);
+static SetConstraintState SetConstraintStateCreate(int numalloc);
+static SetConstraintState SetConstraintStateCopy(SetConstraintState state);
+static SetConstraintState SetConstraintStateAddItem(SetConstraintState state,
+													Oid tgoid, bool tgisdeferred);
+static void cancel_prior_stmt_triggers(Oid relid, CmdType cmdType, int tgevent);
+
+
+/*
+ * Get the FDW tuplestore for the current trigger query level, creating it
+ * if necessary.
+ */
+static Tuplestorestate *
+GetCurrentFDWTuplestore(void)
+{
+	Tuplestorestate *ret;
+
+	ret = afterTriggers.query_stack[afterTriggers.query_depth].fdw_tuplestore;
+	if (ret == NULL)
+	{
+		MemoryContext oldcxt;
+		ResourceOwner saveResourceOwner;
+
+		/*
+		 * Make the tuplestore valid until end of subtransaction.  We really
+		 * only need it until AfterTriggerEndQuery().
+		 */
+		oldcxt = MemoryContextSwitchTo(CurTransactionContext);
+		saveResourceOwner = CurrentResourceOwner;
+		CurrentResourceOwner = CurTransactionResourceOwner;
+
+		ret = tuplestore_begin_heap(false, false, work_mem);
+
+		CurrentResourceOwner = saveResourceOwner;
+		MemoryContextSwitchTo(oldcxt);
+
+		afterTriggers.query_stack[afterTriggers.query_depth].fdw_tuplestore = ret;
+	}
+
+	return ret;
+}
+
+/* ----------
+ * afterTriggerCheckState()
+ *
+ *	Returns true if the trigger event is actually in state DEFERRED.
+ * ----------
+ */
+static bool
+afterTriggerCheckState(AfterTriggerShared evtshared)
+{
+	Oid			tgoid = evtshared->ats_tgoid;
+	SetConstraintState state = afterTriggers.state;
+	int			i;
+
+	/*
+	 * For not-deferrable triggers (i.e. normal AFTER ROW triggers and
+	 * constraints declared NOT DEFERRABLE), the state is always false.
+	 */
+	if ((evtshared->ats_event & AFTER_TRIGGER_DEFERRABLE) == 0)
+		return false;
+
+	/*
+	 * If constraint state exists, SET CONSTRAINTS might have been executed
+	 * either for this trigger or for all triggers.
+	 */
+	if (state != NULL)
+	{
+		/* Check for SET CONSTRAINTS for this specific trigger. */
+		for (i = 0; i < state->numstates; i++)
+		{
+			if (state->trigstates[i].sct_tgoid == tgoid)
+				return state->trigstates[i].sct_tgisdeferred;
+		}
+
+		/* Check for SET CONSTRAINTS ALL. */
+		if (state->all_isset)
+			return state->all_isdeferred;
+	}
+
+	/*
+	 * Otherwise return the default state for the trigger.
+	 */
+	return ((evtshared->ats_event & AFTER_TRIGGER_INITDEFERRED) != 0);
+}
+
+/* ----------
+ * afterTriggerCopyBitmap()
+ *
+ * Copy bitmap into AfterTriggerEvents memory context, which is where the after
+ * trigger events are kept.
+ * ----------
+ */
+static Bitmapset *
+afterTriggerCopyBitmap(Bitmapset *src)
+{
+	Bitmapset	   *dst;
+	MemoryContext	oldcxt;
+
+	if (src == NULL)
+		return NULL;
+
+	/* Create event context if we didn't already */
+	if (afterTriggers.event_cxt == NULL)
+		afterTriggers.event_cxt =
+			AllocSetContextCreate(TopTransactionContext,
+								  "AfterTriggerEvents",
+								  ALLOCSET_DEFAULT_SIZES);
+
+	oldcxt = MemoryContextSwitchTo(afterTriggers.event_cxt);
+
+	dst = bms_copy(src);
+
+	MemoryContextSwitchTo(oldcxt);
+
+	return dst;
+}
+
+/* ----------
+ * afterTriggerAddEvent()
+ *
+ *	Add a new trigger event to the specified queue.
+ *	The passed-in event data is copied.
+ * ----------
+ */
+static void
+afterTriggerAddEvent(AfterTriggerEventList *events,
+					 AfterTriggerEvent event, AfterTriggerShared evtshared)
+{
+	Size		eventsize = SizeofTriggerEvent(event);
+	Size		needed = eventsize + sizeof(AfterTriggerSharedData);
+	AfterTriggerEventChunk *chunk;
+	AfterTriggerShared newshared;
+	AfterTriggerEvent newevent;
+
+	/*
+	 * If empty list or not enough room in the tail chunk, make a new chunk.
+	 * We assume here that a new shared record will always be needed.
+	 */
+	chunk = events->tail;
+	if (chunk == NULL ||
+		chunk->endfree - chunk->freeptr < needed)
+	{
+		Size		chunksize;
+
+		/* Create event context if we didn't already */
+		if (afterTriggers.event_cxt == NULL)
+			afterTriggers.event_cxt =
+				AllocSetContextCreate(TopTransactionContext,
+									  "AfterTriggerEvents",
+									  ALLOCSET_DEFAULT_SIZES);
+
+		/*
+		 * Chunk size starts at 1KB and is allowed to increase up to 1MB.
+		 * These numbers are fairly arbitrary, though there is a hard limit at
+		 * AFTER_TRIGGER_OFFSET; else we couldn't link event records to their
+		 * shared records using the available space in ate_flags.  Another
+		 * constraint is that if the chunk size gets too huge, the search loop
+		 * below would get slow given a (not too common) usage pattern with
+		 * many distinct event types in a chunk.  Therefore, we double the
+		 * preceding chunk size only if there weren't too many shared records
+		 * in the preceding chunk; otherwise we halve it.  This gives us some
+		 * ability to adapt to the actual usage pattern of the current query
+		 * while still having large chunk sizes in typical usage.  All chunk
+		 * sizes used should be MAXALIGN multiples, to ensure that the shared
+		 * records will be aligned safely.
+		 */
+#define MIN_CHUNK_SIZE 1024
+#define MAX_CHUNK_SIZE (1024*1024)
+
+#if MAX_CHUNK_SIZE > (AFTER_TRIGGER_OFFSET+1)
+#error MAX_CHUNK_SIZE must not exceed AFTER_TRIGGER_OFFSET
+#endif
+
+		if (chunk == NULL)
+			chunksize = MIN_CHUNK_SIZE;
+		else
+		{
+			/* preceding chunk size... */
+			chunksize = chunk->endptr - (char *) chunk;
+			/* check number of shared records in preceding chunk */
+			if ((chunk->endptr - chunk->endfree) <=
+				(100 * sizeof(AfterTriggerSharedData)))
+				chunksize *= 2; /* okay, double it */
+			else
+				chunksize /= 2; /* too many shared records */
+			chunksize = Min(chunksize, MAX_CHUNK_SIZE);
+		}
+		chunk = MemoryContextAlloc(afterTriggers.event_cxt, chunksize);
+		chunk->next = NULL;
+		chunk->freeptr = CHUNK_DATA_START(chunk);
+		chunk->endptr = chunk->endfree = (char *) chunk + chunksize;
+		Assert(chunk->endfree - chunk->freeptr >= needed);
+
+		if (events->head == NULL)
+			events->head = chunk;
+		else
+			events->tail->next = chunk;
+		events->tail = chunk;
+		/* events->tailfree is now out of sync, but we'll fix it below */
+	}
+
+	/*
+	 * Try to locate a matching shared-data record already in the chunk. If
+	 * none, make a new one.
+	 */
+	for (newshared = ((AfterTriggerShared) chunk->endptr) - 1;
+		 (char *) newshared >= chunk->endfree;
+		 newshared--)
+	{
+		if (newshared->ats_tgoid == evtshared->ats_tgoid &&
+			newshared->ats_relid == evtshared->ats_relid &&
+			newshared->ats_event == evtshared->ats_event &&
+			newshared->ats_table == evtshared->ats_table &&
+			newshared->ats_firing_id == 0)
+			break;
+	}
+	if ((char *) newshared < chunk->endfree)
+	{
+		*newshared = *evtshared;
+		newshared->ats_firing_id = 0;	/* just to be sure */
+		chunk->endfree = (char *) newshared;
+	}
+
+	/* Insert the data */
+	newevent = (AfterTriggerEvent) chunk->freeptr;
+	memcpy(newevent, event, eventsize);
+	/* ... and link the new event to its shared record */
+	newevent->ate_flags &= ~AFTER_TRIGGER_OFFSET;
+	newevent->ate_flags |= (char *) newshared - (char *) newevent;
+
+	chunk->freeptr += eventsize;
+	events->tailfree = chunk->freeptr;
+}
+
+/* ----------
+ * afterTriggerFreeEventList()
+ *
+ *	Free all the event storage in the given list.
+ * ----------
+ */
+static void
+afterTriggerFreeEventList(AfterTriggerEventList *events)
+{
+	AfterTriggerEventChunk *chunk;
+
+	while ((chunk = events->head) != NULL)
+	{
+		events->head = chunk->next;
+		pfree(chunk);
+	}
+	events->tail = NULL;
+	events->tailfree = NULL;
+}
+
+/* ----------
+ * afterTriggerRestoreEventList()
+ *
+ *	Restore an event list to its prior length, removing all the events
+ *	added since it had the value old_events.
+ * ----------
+ */
+static void
+afterTriggerRestoreEventList(AfterTriggerEventList *events,
+							 const AfterTriggerEventList *old_events)
+{
+	AfterTriggerEventChunk *chunk;
+	AfterTriggerEventChunk *next_chunk;
+
+	if (old_events->tail == NULL)
+	{
+		/* restoring to a completely empty state, so free everything */
+		afterTriggerFreeEventList(events);
+	}
+	else
+	{
+		*events = *old_events;
+		/* free any chunks after the last one we want to keep */
+		for (chunk = events->tail->next; chunk != NULL; chunk = next_chunk)
+		{
+			next_chunk = chunk->next;
+			pfree(chunk);
+		}
+		/* and clean up the tail chunk to be the right length */
+		events->tail->next = NULL;
+		events->tail->freeptr = events->tailfree;
+
+		/*
+		 * We don't make any effort to remove now-unused shared data records.
+		 * They might still be useful, anyway.
+		 */
+	}
+}
+
+/* ----------
+ * afterTriggerDeleteHeadEventChunk()
+ *
+ *	Remove the first chunk of events from the query level's event list.
+ *	Keep any event list pointers elsewhere in the query level's data
+ *	structures in sync.
+ * ----------
+ */
+static void
+afterTriggerDeleteHeadEventChunk(AfterTriggersQueryData *qs)
+{
+	AfterTriggerEventChunk *target = qs->events.head;
+	ListCell   *lc;
+
+	Assert(target && target->next);
+
+	/*
+	 * First, update any pointers in the per-table data, so that they won't be
+	 * dangling.  Resetting obsoleted pointers to NULL will make
+	 * cancel_prior_stmt_triggers start from the list head, which is fine.
+	 */
+	foreach(lc, qs->tables)
+	{
+		AfterTriggersTableData *table = (AfterTriggersTableData *) lfirst(lc);
+
+		if (table->after_trig_done &&
+			table->after_trig_events.tail == target)
+		{
+			table->after_trig_events.head = NULL;
+			table->after_trig_events.tail = NULL;
+			table->after_trig_events.tailfree = NULL;
+		}
+	}
+
+	/* Now we can flush the head chunk */
+	qs->events.head = target->next;
+	pfree(target);
+}
+
+
+/* ----------
+ * AfterTriggerExecute()
+ *
+ *	Fetch the required tuples back from the heap and fire one
+ *	single trigger function.
+ *
+ *	Frequently, this will be fired many times in a row for triggers of
+ *	a single relation.  Therefore, we cache the open relation and provide
+ *	fmgr lookup cache space at the caller level.  (For triggers fired at
+ *	the end of a query, we can even piggyback on the executor's state.)
+ *
+ *	When fired for a cross-partition update of a partitioned table, the old
+ *	tuple is fetched using 'src_relInfo' (the source leaf partition) and
+ *	the new tuple using 'dst_relInfo' (the destination leaf partition), though
+ *	both are converted into the root partitioned table's format before passing
+ *	to the trigger function.
+ *
+ *	event: event currently being fired.
+ *	relInfo: result relation for event.
+ *	src_relInfo: source partition of a cross-partition update
+ *	dst_relInfo: its destination partition
+ *	trigdesc: working copy of rel's trigger info.
+ *	finfo: array of fmgr lookup cache entries (one per trigger in trigdesc).
+ *	instr: array of EXPLAIN ANALYZE instrumentation nodes (one per trigger),
+ *		or NULL if no instrumentation is wanted.
+ *	per_tuple_context: memory context to call trigger function in.
+ *	trig_tuple_slot1: scratch slot for tg_trigtuple (foreign tables only)
+ *	trig_tuple_slot2: scratch slot for tg_newtuple (foreign tables only)
+ * ----------
+ */
+static void
+AfterTriggerExecute(EState *estate,
+					AfterTriggerEvent event,
+					ResultRelInfo *relInfo,
+					ResultRelInfo *src_relInfo,
+					ResultRelInfo *dst_relInfo,
+					TriggerDesc *trigdesc,
+					FmgrInfo *finfo, Instrumentation *instr,
+					MemoryContext per_tuple_context,
+					TupleTableSlot *trig_tuple_slot1,
+					TupleTableSlot *trig_tuple_slot2)
+{
+	Relation	rel = relInfo->ri_RelationDesc;
+	Relation	src_rel = src_relInfo->ri_RelationDesc;
+	Relation	dst_rel = dst_relInfo->ri_RelationDesc;
+	AfterTriggerShared evtshared = GetTriggerSharedData(event);
+	Oid			tgoid = evtshared->ats_tgoid;
+	TriggerData LocTriggerData = {0};
+	HeapTuple	rettuple;
+	int			tgindx;
+	bool		should_free_trig = false;
+	bool		should_free_new = false;
+
+	/*
+	 * Locate trigger in trigdesc.
+	 */
+	for (tgindx = 0; tgindx < trigdesc->numtriggers; tgindx++)
+	{
+		if (trigdesc->triggers[tgindx].tgoid == tgoid)
+		{
+			LocTriggerData.tg_trigger = &(trigdesc->triggers[tgindx]);
+			break;
+		}
+	}
+	if (LocTriggerData.tg_trigger == NULL)
+		elog(ERROR, "could not find trigger %u", tgoid);
+
+	/*
+	 * If doing EXPLAIN ANALYZE, start charging time to this trigger. We want
+	 * to include time spent re-fetching tuples in the trigger cost.
+	 */
+	if (instr)
+		InstrStartNode(instr + tgindx);
+
+	/*
+	 * Fetch the required tuple(s).
+	 */
+	switch (event->ate_flags & AFTER_TRIGGER_TUP_BITS)
+	{
+		case AFTER_TRIGGER_FDW_FETCH:
+			{
+				Tuplestorestate *fdw_tuplestore = GetCurrentFDWTuplestore();
+
+				if (!tuplestore_gettupleslot(fdw_tuplestore, true, false,
+											 trig_tuple_slot1))
+					elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
+
+				if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) ==
+					TRIGGER_EVENT_UPDATE &&
+					!tuplestore_gettupleslot(fdw_tuplestore, true, false,
+											 trig_tuple_slot2))
+					elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
+			}
+			/* fall through */
+		case AFTER_TRIGGER_FDW_REUSE:
+
+			/*
+			 * Store tuple in the slot so that tg_trigtuple does not reference
+			 * tuplestore memory.  (It is formally possible for the trigger
+			 * function to queue trigger events that add to the same
+			 * tuplestore, which can push other tuples out of memory.)  The
+			 * distinction is academic, because we start with a minimal tuple
+			 * that is stored as a heap tuple, constructed in different memory
+			 * context, in the slot anyway.
+			 */
+			LocTriggerData.tg_trigslot = trig_tuple_slot1;
+			LocTriggerData.tg_trigtuple =
+				ExecFetchSlotHeapTuple(trig_tuple_slot1, true, &should_free_trig);
+
+			if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) ==
+				TRIGGER_EVENT_UPDATE)
+			{
+				LocTriggerData.tg_newslot = trig_tuple_slot2;
+				LocTriggerData.tg_newtuple =
+					ExecFetchSlotHeapTuple(trig_tuple_slot2, true, &should_free_new);
+			}
+			else
+			{
+				LocTriggerData.tg_newtuple = NULL;
+			}
+			break;
+
+		default:
+			if (ItemPointerIsValid(&(event->ate_ctid1)))
+			{
+				TupleTableSlot *src_slot = ExecGetTriggerOldSlot(estate,
+																 src_relInfo);
+
+				if (!table_tuple_fetch_row_version(src_rel,
+												   &(event->ate_ctid1),
+												   SnapshotAny,
+												   src_slot))
+					elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
+
+				/*
+				 * Store the tuple fetched from the source partition into the
+				 * target (root partitioned) table slot, converting if needed.
+				 */
+				if (src_relInfo != relInfo)
+				{
+					TupleConversionMap *map = ExecGetChildToRootMap(src_relInfo);
+
+					LocTriggerData.tg_trigslot = ExecGetTriggerOldSlot(estate, relInfo);
+					if (map)
+					{
+						execute_attr_map_slot(map->attrMap,
+											  src_slot,
+											  LocTriggerData.tg_trigslot);
+					}
+					else
+						ExecCopySlot(LocTriggerData.tg_trigslot, src_slot);
+				}
+				else
+					LocTriggerData.tg_trigslot = src_slot;
+				LocTriggerData.tg_trigtuple =
+					ExecFetchSlotHeapTuple(LocTriggerData.tg_trigslot, false, &should_free_trig);
+			}
+			else
+			{
+				LocTriggerData.tg_trigtuple = NULL;
+			}
+
+			/* don't touch ctid2 if not there */
+			if (((event->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ||
+				 (event->ate_flags & AFTER_TRIGGER_CP_UPDATE)) &&
+				ItemPointerIsValid(&(event->ate_ctid2)))
+			{
+				TupleTableSlot *dst_slot = ExecGetTriggerNewSlot(estate,
+																 dst_relInfo);
+
+				if (!table_tuple_fetch_row_version(dst_rel,
+												   &(event->ate_ctid2),
+												   SnapshotAny,
+												   dst_slot))
+					elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
+
+				/*
+				 * Store the tuple fetched from the destination partition into
+				 * the target (root partitioned) table slot, converting if
+				 * needed.
+				 */
+				if (dst_relInfo != relInfo)
+				{
+					TupleConversionMap *map = ExecGetChildToRootMap(dst_relInfo);
+
+					LocTriggerData.tg_newslot = ExecGetTriggerNewSlot(estate, relInfo);
+					if (map)
+					{
+						execute_attr_map_slot(map->attrMap,
+											  dst_slot,
+											  LocTriggerData.tg_newslot);
+					}
+					else
+						ExecCopySlot(LocTriggerData.tg_newslot, dst_slot);
+				}
+				else
+					LocTriggerData.tg_newslot = dst_slot;
+				LocTriggerData.tg_newtuple =
+					ExecFetchSlotHeapTuple(LocTriggerData.tg_newslot, false, &should_free_new);
+			}
+			else
+			{
+				LocTriggerData.tg_newtuple = NULL;
+			}
+	}
+
+	/*
+	 * Set up the tuplestore information to let the trigger have access to
+	 * transition tables.  When we first make a transition table available to
+	 * a trigger, mark it "closed" so that it cannot change anymore.  If any
+	 * additional events of the same type get queued in the current trigger
+	 * query level, they'll go into new transition tables.
+	 */
+	LocTriggerData.tg_oldtable = LocTriggerData.tg_newtable = NULL;
+	if (evtshared->ats_table)
+	{
+		if (LocTriggerData.tg_trigger->tgoldtable)
+		{
+			if (TRIGGER_FIRED_BY_UPDATE(evtshared->ats_event))
+				LocTriggerData.tg_oldtable = evtshared->ats_table->old_upd_tuplestore;
+			else
+				LocTriggerData.tg_oldtable = evtshared->ats_table->old_del_tuplestore;
+			evtshared->ats_table->closed = true;
+		}
+
+		if (LocTriggerData.tg_trigger->tgnewtable)
+		{
+			if (TRIGGER_FIRED_BY_INSERT(evtshared->ats_event))
+				LocTriggerData.tg_newtable = evtshared->ats_table->new_ins_tuplestore;
+			else
+				LocTriggerData.tg_newtable = evtshared->ats_table->new_upd_tuplestore;
+			evtshared->ats_table->closed = true;
+		}
+	}
+
+	/*
+	 * Setup the remaining trigger information
+	 */
+	LocTriggerData.type = T_TriggerData;
+	LocTriggerData.tg_event =
+		evtshared->ats_event & (TRIGGER_EVENT_OPMASK | TRIGGER_EVENT_ROW);
+	LocTriggerData.tg_relation = rel;
+	if (TRIGGER_FOR_UPDATE(LocTriggerData.tg_trigger->tgtype))
+		LocTriggerData.tg_updatedcols = evtshared->ats_modifiedcols;
+
+	MemoryContextReset(per_tuple_context);
+
+	/*
+	 * Call the trigger and throw away any possibly returned updated tuple.
+	 * (Don't let ExecCallTriggerFunc measure EXPLAIN time.)
+	 */
+	rettuple = ExecCallTriggerFunc(&LocTriggerData,
+								   tgindx,
+								   finfo,
+								   NULL,
+								   per_tuple_context);
+	if (rettuple != NULL &&
+		rettuple != LocTriggerData.tg_trigtuple &&
+		rettuple != LocTriggerData.tg_newtuple)
+		heap_freetuple(rettuple);
+
+	/*
+	 * Release resources
+	 */
+	if (should_free_trig)
+		heap_freetuple(LocTriggerData.tg_trigtuple);
+	if (should_free_new)
+		heap_freetuple(LocTriggerData.tg_newtuple);
+
+	/* don't clear slots' contents if foreign table */
+	if (trig_tuple_slot1 == NULL)
+	{
+		if (LocTriggerData.tg_trigslot)
+			ExecClearTuple(LocTriggerData.tg_trigslot);
+		if (LocTriggerData.tg_newslot)
+			ExecClearTuple(LocTriggerData.tg_newslot);
+	}
+
+	/*
+	 * If doing EXPLAIN ANALYZE, stop charging time to this trigger, and count
+	 * one "tuple returned" (really the number of firings).
+	 */
+	if (instr)
+		InstrStopNode(instr + tgindx, 1);
+}
+
+
+/*
+ * afterTriggerMarkEvents()
+ *
+ *	Scan the given event list for not yet invoked events.  Mark the ones
+ *	that can be invoked now with the current firing ID.
+ *
+ *	If move_list isn't NULL, events that are not to be invoked now are
+ *	transferred to move_list.
+ *
+ *	When immediate_only is true, do not invoke currently-deferred triggers.
+ *	(This will be false only at main transaction exit.)
+ *
+ *	Returns true if any invokable events were found.
+ */
+static bool
+afterTriggerMarkEvents(AfterTriggerEventList *events,
+					   AfterTriggerEventList *move_list,
+					   bool immediate_only)
+{
+	bool		found = false;
+	bool		deferred_found = false;
+	AfterTriggerEvent event;
+	AfterTriggerEventChunk *chunk;
+
+	for_each_event_chunk(event, chunk, *events)
+	{
+		AfterTriggerShared evtshared = GetTriggerSharedData(event);
+		bool		defer_it = false;
+
+		if (!(event->ate_flags &
+			  (AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS)))
+		{
+			/*
+			 * This trigger hasn't been called or scheduled yet. Check if we
+			 * should call it now.
+			 */
+			if (immediate_only && afterTriggerCheckState(evtshared))
+			{
+				defer_it = true;
+			}
+			else
+			{
+				/*
+				 * Mark it as to be fired in this firing cycle.
+				 */
+				evtshared->ats_firing_id = afterTriggers.firing_counter;
+				event->ate_flags |= AFTER_TRIGGER_IN_PROGRESS;
+				found = true;
+			}
+		}
+
+		/*
+		 * If it's deferred, move it to move_list, if requested.
+		 */
+		if (defer_it && move_list != NULL)
+		{
+			deferred_found = true;
+			/* add it to move_list */
+			afterTriggerAddEvent(move_list, event, evtshared);
+			/* mark original copy "done" so we don't do it again */
+			event->ate_flags |= AFTER_TRIGGER_DONE;
+		}
+	}
+
+	/*
+	 * We could allow deferred triggers if, before the end of the
+	 * security-restricted operation, we were to verify that a SET CONSTRAINTS
+	 * ... IMMEDIATE has fired all such triggers.  For now, don't bother.
+	 */
+	if (deferred_found && InSecurityRestrictedOperation())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("cannot fire deferred trigger within security-restricted operation")));
+
+	return found;
+}
+
+/*
+ * afterTriggerInvokeEvents()
+ *
+ *	Scan the given event list for events that are marked as to be fired
+ *	in the current firing cycle, and fire them.
+ *
+ *	If estate isn't NULL, we use its result relation info to avoid repeated
+ *	openings and closing of trigger target relations.  If it is NULL, we
+ *	make one locally to cache the info in case there are multiple trigger
+ *	events per rel.
+ *
+ *	When delete_ok is true, it's safe to delete fully-processed events.
+ *	(We are not very tense about that: we simply reset a chunk to be empty
+ *	if all its events got fired.  The objective here is just to avoid useless
+ *	rescanning of events when a trigger queues new events during transaction
+ *	end, so it's not necessary to worry much about the case where only
+ *	some events are fired.)
+ *
+ *	Returns true if no unfired events remain in the list (this allows us
+ *	to avoid repeating afterTriggerMarkEvents).
+ */
+static bool
+afterTriggerInvokeEvents(AfterTriggerEventList *events,
+						 CommandId firing_id,
+						 EState *estate,
+						 bool delete_ok)
+{
+	bool		all_fired = true;
+	AfterTriggerEventChunk *chunk;
+	MemoryContext per_tuple_context;
+	bool		local_estate = false;
+	ResultRelInfo *rInfo = NULL;
+	Relation	rel = NULL;
+	TriggerDesc *trigdesc = NULL;
+	FmgrInfo   *finfo = NULL;
+	Instrumentation *instr = NULL;
+	TupleTableSlot *slot1 = NULL,
+			   *slot2 = NULL;
+
+	/* Make a local EState if need be */
+	if (estate == NULL)
+	{
+		estate = CreateExecutorState();
+		local_estate = true;
+	}
+
+	/* Make a per-tuple memory context for trigger function calls */
+	per_tuple_context =
+		AllocSetContextCreate(CurrentMemoryContext,
+							  "AfterTriggerTupleContext",
+							  ALLOCSET_DEFAULT_SIZES);
+
+	for_each_chunk(chunk, *events)
+	{
+		AfterTriggerEvent event;
+		bool		all_fired_in_chunk = true;
+
+		for_each_event(event, chunk)
+		{
+			AfterTriggerShared evtshared = GetTriggerSharedData(event);
+
+			/*
+			 * Is it one for me to fire?
+			 */
+			if ((event->ate_flags & AFTER_TRIGGER_IN_PROGRESS) &&
+				evtshared->ats_firing_id == firing_id)
+			{
+				ResultRelInfo *src_rInfo,
+						   *dst_rInfo;
+
+				/*
+				 * So let's fire it... but first, find the correct relation if
+				 * this is not the same relation as before.
+				 */
+				if (rel == NULL || RelationGetRelid(rel) != evtshared->ats_relid)
+				{
+					rInfo = ExecGetTriggerResultRel(estate, evtshared->ats_relid,
+													NULL);
+					rel = rInfo->ri_RelationDesc;
+					/* Catch calls with insufficient relcache refcounting */
+					Assert(!RelationHasReferenceCountZero(rel));
+					trigdesc = rInfo->ri_TrigDesc;
+					finfo = rInfo->ri_TrigFunctions;
+					instr = rInfo->ri_TrigInstrument;
+					if (slot1 != NULL)
+					{
+						ExecDropSingleTupleTableSlot(slot1);
+						ExecDropSingleTupleTableSlot(slot2);
+						slot1 = slot2 = NULL;
+					}
+					if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+					{
+						slot1 = MakeSingleTupleTableSlot(rel->rd_att,
+														 &TTSOpsMinimalTuple);
+						slot2 = MakeSingleTupleTableSlot(rel->rd_att,
+														 &TTSOpsMinimalTuple);
+					}
+					if (trigdesc == NULL)	/* should not happen */
+						elog(ERROR, "relation %u has no triggers",
+							 evtshared->ats_relid);
+				}
+
+				/*
+				 * Look up source and destination partition result rels of a
+				 * cross-partition update event.
+				 */
+				if ((event->ate_flags & AFTER_TRIGGER_TUP_BITS) ==
+					AFTER_TRIGGER_CP_UPDATE)
+				{
+					Assert(OidIsValid(event->ate_src_part) &&
+						   OidIsValid(event->ate_dst_part));
+					src_rInfo = ExecGetTriggerResultRel(estate,
+														event->ate_src_part,
+														rInfo);
+					dst_rInfo = ExecGetTriggerResultRel(estate,
+														event->ate_dst_part,
+														rInfo);
+				}
+				else
+					src_rInfo = dst_rInfo = rInfo;
+
+				/*
+				 * Fire it.  Note that the AFTER_TRIGGER_IN_PROGRESS flag is
+				 * still set, so recursive examinations of the event list
+				 * won't try to re-fire it.
+				 */
+				AfterTriggerExecute(estate, event, rInfo,
+									src_rInfo, dst_rInfo,
+									trigdesc, finfo, instr,
+									per_tuple_context, slot1, slot2);
+
+				/*
+				 * Mark the event as done.
+				 */
+				event->ate_flags &= ~AFTER_TRIGGER_IN_PROGRESS;
+				event->ate_flags |= AFTER_TRIGGER_DONE;
+			}
+			else if (!(event->ate_flags & AFTER_TRIGGER_DONE))
+			{
+				/* something remains to be done */
+				all_fired = all_fired_in_chunk = false;
+			}
+		}
+
+		/* Clear the chunk if delete_ok and nothing left of interest */
+		if (delete_ok && all_fired_in_chunk)
+		{
+			chunk->freeptr = CHUNK_DATA_START(chunk);
+			chunk->endfree = chunk->endptr;
+
+			/*
+			 * If it's last chunk, must sync event list's tailfree too.  Note
+			 * that delete_ok must NOT be passed as true if there could be
+			 * additional AfterTriggerEventList values pointing at this event
+			 * list, since we'd fail to fix their copies of tailfree.
+			 */
+			if (chunk == events->tail)
+				events->tailfree = chunk->freeptr;
+		}
+	}
+	if (slot1 != NULL)
+	{
+		ExecDropSingleTupleTableSlot(slot1);
+		ExecDropSingleTupleTableSlot(slot2);
+	}
+
+	/* Release working resources */
+	MemoryContextDelete(per_tuple_context);
+
+	if (local_estate)
+	{
+		ExecCloseResultRelations(estate);
+		ExecResetTupleTable(estate->es_tupleTable, false);
+		FreeExecutorState(estate);
+	}
+
+	return all_fired;
+}
+
+
+/*
+ * GetAfterTriggersTableData
+ *
+ * Find or create an AfterTriggersTableData struct for the specified
+ * trigger event (relation + operation type).  Ignore existing structs
+ * marked "closed"; we don't want to put any additional tuples into them,
+ * nor change their stmt-triggers-fired state.
+ *
+ * Note: the AfterTriggersTableData list is allocated in the current
+ * (sub)transaction's CurTransactionContext.  This is OK because
+ * we don't need it to live past AfterTriggerEndQuery.
+ */
+static AfterTriggersTableData *
+GetAfterTriggersTableData(Oid relid, CmdType cmdType)
+{
+	AfterTriggersTableData *table;
+	AfterTriggersQueryData *qs;
+	MemoryContext oldcxt;
+	ListCell   *lc;
+
+	/* Caller should have ensured query_depth is OK. */
+	Assert(afterTriggers.query_depth >= 0 &&
+		   afterTriggers.query_depth < afterTriggers.maxquerydepth);
+	qs = &afterTriggers.query_stack[afterTriggers.query_depth];
+
+	foreach(lc, qs->tables)
+	{
+		table = (AfterTriggersTableData *) lfirst(lc);
+		if (table->relid == relid && table->cmdType == cmdType &&
+			!table->closed)
+			return table;
+	}
+
+	oldcxt = MemoryContextSwitchTo(CurTransactionContext);
+
+	table = (AfterTriggersTableData *) palloc0(sizeof(AfterTriggersTableData));
+	table->relid = relid;
+	table->cmdType = cmdType;
+	qs->tables = lappend(qs->tables, table);
+
+	MemoryContextSwitchTo(oldcxt);
+
+	return table;
+}
+
+/*
+ * Returns a TupleTableSlot suitable for holding the tuples to be put
+ * into AfterTriggersTableData's transition table tuplestores.
+ */
+static TupleTableSlot *
+GetAfterTriggersStoreSlot(AfterTriggersTableData *table,
+						  TupleDesc tupdesc)
+{
+	/* Create it if not already done. */
+	if (!table->storeslot)
+	{
+		MemoryContext oldcxt;
+
+		/*
+		 * We need this slot only until AfterTriggerEndQuery, but making it
+		 * last till end-of-subxact is good enough.  It'll be freed by
+		 * AfterTriggerFreeQuery().  However, the passed-in tupdesc might have
+		 * a different lifespan, so we'd better make a copy of that.
+		 */
+		oldcxt = MemoryContextSwitchTo(CurTransactionContext);
+		tupdesc = CreateTupleDescCopy(tupdesc);
+		table->storeslot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual);
+		MemoryContextSwitchTo(oldcxt);
+	}
+
+	return table->storeslot;
+}
+
+/*
+ * MakeTransitionCaptureState
+ *
+ * Make a TransitionCaptureState object for the given TriggerDesc, target
+ * relation, and operation type.  The TCS object holds all the state needed
+ * to decide whether to capture tuples in transition tables.
+ *
+ * If there are no triggers in 'trigdesc' that request relevant transition
+ * tables, then return NULL.
+ *
+ * The resulting object can be passed to the ExecAR* functions.  When
+ * dealing with child tables, the caller can set tcs_original_insert_tuple
+ * to avoid having to reconstruct the original tuple in the root table's
+ * format.
+ *
+ * Note that we copy the flags from a parent table into this struct (rather
+ * than subsequently using the relation's TriggerDesc directly) so that we can
+ * use it to control collection of transition tuples from child tables.
+ *
+ * Per SQL spec, all operations of the same kind (INSERT/UPDATE/DELETE)
+ * on the same table during one query should share one transition table.
+ * Therefore, the Tuplestores are owned by an AfterTriggersTableData struct
+ * looked up using the table OID + CmdType, and are merely referenced by
+ * the TransitionCaptureState objects we hand out to callers.
+ */
+TransitionCaptureState *
+MakeTransitionCaptureState(TriggerDesc *trigdesc, Oid relid, CmdType cmdType)
+{
+	TransitionCaptureState *state;
+	bool		need_old_upd,
+				need_new_upd,
+				need_old_del,
+				need_new_ins;
+	AfterTriggersTableData *table;
+	MemoryContext oldcxt;
+	ResourceOwner saveResourceOwner;
+
+	if (trigdesc == NULL)
+		return NULL;
+
+	/* Detect which table(s) we need. */
+	switch (cmdType)
+	{
+		case CMD_INSERT:
+			need_old_upd = need_old_del = need_new_upd = false;
+			need_new_ins = trigdesc->trig_insert_new_table;
+			break;
+		case CMD_UPDATE:
+			need_old_upd = trigdesc->trig_update_old_table;
+			need_new_upd = trigdesc->trig_update_new_table;
+			need_old_del = need_new_ins = false;
+			break;
+		case CMD_DELETE:
+			need_old_del = trigdesc->trig_delete_old_table;
+			need_old_upd = need_new_upd = need_new_ins = false;
+			break;
+		case CMD_MERGE:
+			need_old_upd = trigdesc->trig_update_old_table;
+			need_new_upd = trigdesc->trig_update_new_table;
+			need_old_del = trigdesc->trig_delete_old_table;
+			need_new_ins = trigdesc->trig_insert_new_table;
+			break;
+		default:
+			elog(ERROR, "unexpected CmdType: %d", (int) cmdType);
+			/* keep compiler quiet */
+			need_old_upd = need_new_upd = need_old_del = need_new_ins = false;
+			break;
+	}
+	if (!need_old_upd && !need_new_upd && !need_new_ins && !need_old_del)
+		return NULL;
+
+	/* Check state, like AfterTriggerSaveEvent. */
+	if (afterTriggers.query_depth < 0)
+		elog(ERROR, "MakeTransitionCaptureState() called outside of query");
+
+	/* Be sure we have enough space to record events at this query depth. */
+	if (afterTriggers.query_depth >= afterTriggers.maxquerydepth)
+		AfterTriggerEnlargeQueryState();
+
+	/*
+	 * Find or create an AfterTriggersTableData struct to hold the
+	 * tuplestore(s).  If there's a matching struct but it's marked closed,
+	 * ignore it; we need a newer one.
+	 *
+	 * Note: the AfterTriggersTableData list, as well as the tuplestores, are
+	 * allocated in the current (sub)transaction's CurTransactionContext, and
+	 * the tuplestores are managed by the (sub)transaction's resource owner.
+	 * This is sufficient lifespan because we do not allow triggers using
+	 * transition tables to be deferrable; they will be fired during
+	 * AfterTriggerEndQuery, after which it's okay to delete the data.
+	 */
+	table = GetAfterTriggersTableData(relid, cmdType);
+
+	/* Now create required tuplestore(s), if we don't have them already. */
+	oldcxt = MemoryContextSwitchTo(CurTransactionContext);
+	saveResourceOwner = CurrentResourceOwner;
+	CurrentResourceOwner = CurTransactionResourceOwner;
+
+	if (need_old_upd && table->old_upd_tuplestore == NULL)
+		table->old_upd_tuplestore = tuplestore_begin_heap(false, false, work_mem);
+	if (need_new_upd && table->new_upd_tuplestore == NULL)
+		table->new_upd_tuplestore = tuplestore_begin_heap(false, false, work_mem);
+	if (need_old_del && table->old_del_tuplestore == NULL)
+		table->old_del_tuplestore = tuplestore_begin_heap(false, false, work_mem);
+	if (need_new_ins && table->new_ins_tuplestore == NULL)
+		table->new_ins_tuplestore = tuplestore_begin_heap(false, false, work_mem);
+
+	CurrentResourceOwner = saveResourceOwner;
+	MemoryContextSwitchTo(oldcxt);
+
+	/* Now build the TransitionCaptureState struct, in caller's context */
+	state = (TransitionCaptureState *) palloc0(sizeof(TransitionCaptureState));
+	state->tcs_delete_old_table = trigdesc->trig_delete_old_table;
+	state->tcs_update_old_table = trigdesc->trig_update_old_table;
+	state->tcs_update_new_table = trigdesc->trig_update_new_table;
+	state->tcs_insert_new_table = trigdesc->trig_insert_new_table;
+	state->tcs_private = table;
+
+	return state;
+}
+
+
+/* ----------
+ * AfterTriggerBeginXact()
+ *
+ *	Called at transaction start (either BEGIN or implicit for single
+ *	statement outside of transaction block).
+ * ----------
+ */
+void
+AfterTriggerBeginXact(void)
+{
+	/*
+	 * Initialize after-trigger state structure to empty
+	 */
+	afterTriggers.firing_counter = (CommandId) 1;	/* mustn't be 0 */
+	afterTriggers.query_depth = -1;
+
+	/*
+	 * Verify that there is no leftover state remaining.  If these assertions
+	 * trip, it means that AfterTriggerEndXact wasn't called or didn't clean
+	 * up properly.
+	 */
+	Assert(afterTriggers.state == NULL);
+	Assert(afterTriggers.query_stack == NULL);
+	Assert(afterTriggers.maxquerydepth == 0);
+	Assert(afterTriggers.event_cxt == NULL);
+	Assert(afterTriggers.events.head == NULL);
+	Assert(afterTriggers.trans_stack == NULL);
+	Assert(afterTriggers.maxtransdepth == 0);
+}
+
+
+/* ----------
+ * AfterTriggerBeginQuery()
+ *
+ *	Called just before we start processing a single query within a
+ *	transaction (or subtransaction).  Most of the real work gets deferred
+ *	until somebody actually tries to queue a trigger event.
+ * ----------
+ */
+void
+AfterTriggerBeginQuery(void)
+{
+	/* Increase the query stack depth */
+	afterTriggers.query_depth++;
+}
+
+
+/* ----------
+ * AfterTriggerEndQuery()
+ *
+ *	Called after one query has been completely processed. At this time
+ *	we invoke all AFTER IMMEDIATE trigger events queued by the query, and
+ *	transfer deferred trigger events to the global deferred-trigger list.
+ *
+ *	Note that this must be called BEFORE closing down the executor
+ *	with ExecutorEnd, because we make use of the EState's info about
+ *	target relations.  Normally it is called from ExecutorFinish.
+ * ----------
+ */
+void
+AfterTriggerEndQuery(EState *estate)
+{
+	AfterTriggersQueryData *qs;
+
+	/* Must be inside a query, too */
+	Assert(afterTriggers.query_depth >= 0);
+
+	/*
+	 * If we never even got as far as initializing the event stack, there
+	 * certainly won't be any events, so exit quickly.
+	 */
+	if (afterTriggers.query_depth >= afterTriggers.maxquerydepth)
+	{
+		afterTriggers.query_depth--;
+		return;
+	}
+
+	/*
+	 * Process all immediate-mode triggers queued by the query, and move the
+	 * deferred ones to the main list of deferred events.
+	 *
+	 * Notice that we decide which ones will be fired, and put the deferred
+	 * ones on the main list, before anything is actually fired.  This ensures
+	 * reasonably sane behavior if a trigger function does SET CONSTRAINTS ...
+	 * IMMEDIATE: all events we have decided to defer will be available for it
+	 * to fire.
+	 *
+	 * We loop in case a trigger queues more events at the same query level.
+	 * Ordinary trigger functions, including all PL/pgSQL trigger functions,
+	 * will instead fire any triggers in a dedicated query level.  Foreign key
+	 * enforcement triggers do add to the current query level, thanks to their
+	 * passing fire_triggers = false to SPI_execute_snapshot().  Other
+	 * C-language triggers might do likewise.
+	 *
+	 * If we find no firable events, we don't have to increment
+	 * firing_counter.
+	 */
+	qs = &afterTriggers.query_stack[afterTriggers.query_depth];
+
+	for (;;)
+	{
+		if (afterTriggerMarkEvents(&qs->events, &afterTriggers.events, true))
+		{
+			CommandId	firing_id = afterTriggers.firing_counter++;
+			AfterTriggerEventChunk *oldtail = qs->events.tail;
+
+			if (afterTriggerInvokeEvents(&qs->events, firing_id, estate, false))
+				break;			/* all fired */
+
+			/*
+			 * Firing a trigger could result in query_stack being repalloc'd,
+			 * so we must recalculate qs after each afterTriggerInvokeEvents
+			 * call.  Furthermore, it's unsafe to pass delete_ok = true here,
+			 * because that could cause afterTriggerInvokeEvents to try to
+			 * access qs->events after the stack has been repalloc'd.
+			 */
+			qs = &afterTriggers.query_stack[afterTriggers.query_depth];
+
+			/*
+			 * We'll need to scan the events list again.  To reduce the cost
+			 * of doing so, get rid of completely-fired chunks.  We know that
+			 * all events were marked IN_PROGRESS or DONE at the conclusion of
+			 * afterTriggerMarkEvents, so any still-interesting events must
+			 * have been added after that, and so must be in the chunk that
+			 * was then the tail chunk, or in later chunks.  So, zap all
+			 * chunks before oldtail.  This is approximately the same set of
+			 * events we would have gotten rid of by passing delete_ok = true.
+			 */
+			Assert(oldtail != NULL);
+			while (qs->events.head != oldtail)
+				afterTriggerDeleteHeadEventChunk(qs);
+		}
+		else
+			break;
+	}
+
+	/* Release query-level-local storage, including tuplestores if any */
+	AfterTriggerFreeQuery(&afterTriggers.query_stack[afterTriggers.query_depth]);
+
+	afterTriggers.query_depth--;
+}
+
+
+/*
+ * AfterTriggerFreeQuery
+ *	Release subsidiary storage for a trigger query level.
+ *	This includes closing down tuplestores.
+ *	Note: it's important for this to be safe if interrupted by an error
+ *	and then called again for the same query level.
+ */
+static void
+AfterTriggerFreeQuery(AfterTriggersQueryData *qs)
+{
+	Tuplestorestate *ts;
+	List	   *tables;
+	ListCell   *lc;
+
+	/* Drop the trigger events */
+	afterTriggerFreeEventList(&qs->events);
+
+	/* Drop FDW tuplestore if any */
+	ts = qs->fdw_tuplestore;
+	qs->fdw_tuplestore = NULL;
+	if (ts)
+		tuplestore_end(ts);
+
+	/* Release per-table subsidiary storage */
+	tables = qs->tables;
+	foreach(lc, tables)
+	{
+		AfterTriggersTableData *table = (AfterTriggersTableData *) lfirst(lc);
+
+		ts = table->old_upd_tuplestore;
+		table->old_upd_tuplestore = NULL;
+		if (ts)
+			tuplestore_end(ts);
+		ts = table->new_upd_tuplestore;
+		table->new_upd_tuplestore = NULL;
+		if (ts)
+			tuplestore_end(ts);
+		ts = table->old_del_tuplestore;
+		table->old_del_tuplestore = NULL;
+		if (ts)
+			tuplestore_end(ts);
+		ts = table->new_ins_tuplestore;
+		table->new_ins_tuplestore = NULL;
+		if (ts)
+			tuplestore_end(ts);
+		if (table->storeslot)
+		{
+			TupleTableSlot *slot = table->storeslot;
+
+			table->storeslot = NULL;
+			ExecDropSingleTupleTableSlot(slot);
+		}
+	}
+
+	/*
+	 * Now free the AfterTriggersTableData structs and list cells.  Reset list
+	 * pointer first; if list_free_deep somehow gets an error, better to leak
+	 * that storage than have an infinite loop.
+	 */
+	qs->tables = NIL;
+	list_free_deep(tables);
+}
+
+
+/* ----------
+ * AfterTriggerFireDeferred()
+ *
+ *	Called just before the current transaction is committed. At this
+ *	time we invoke all pending DEFERRED triggers.
+ *
+ *	It is possible for other modules to queue additional deferred triggers
+ *	during pre-commit processing; therefore xact.c may have to call this
+ *	multiple times.
+ * ----------
+ */
+void
+AfterTriggerFireDeferred(void)
+{
+	AfterTriggerEventList *events;
+	bool		snap_pushed = false;
+
+	/* Must not be inside a query */
+	Assert(afterTriggers.query_depth == -1);
+
+	/*
+	 * If there are any triggers to fire, make sure we have set a snapshot for
+	 * them to use.  (Since PortalRunUtility doesn't set a snap for COMMIT, we
+	 * can't assume ActiveSnapshot is valid on entry.)
+	 */
+	events = &afterTriggers.events;
+	if (events->head != NULL)
+	{
+		PushActiveSnapshot(GetTransactionSnapshot());
+		snap_pushed = true;
+	}
+
+	/*
+	 * Run all the remaining triggers.  Loop until they are all gone, in case
+	 * some trigger queues more for us to do.
+	 */
+	while (afterTriggerMarkEvents(events, NULL, false))
+	{
+		CommandId	firing_id = afterTriggers.firing_counter++;
+
+		if (afterTriggerInvokeEvents(events, firing_id, NULL, true))
+			break;				/* all fired */
+	}
+
+	/*
+	 * We don't bother freeing the event list, since it will go away anyway
+	 * (and more efficiently than via pfree) in AfterTriggerEndXact.
+	 */
+
+	if (snap_pushed)
+		PopActiveSnapshot();
+}
+
+
+/* ----------
+ * AfterTriggerEndXact()
+ *
+ *	The current transaction is finishing.
+ *
+ *	Any unfired triggers are canceled so we simply throw
+ *	away anything we know.
+ *
+ *	Note: it is possible for this to be called repeatedly in case of
+ *	error during transaction abort; therefore, do not complain if
+ *	already closed down.
+ * ----------
+ */
+void
+AfterTriggerEndXact(bool isCommit)
+{
+	/*
+	 * Forget the pending-events list.
+	 *
+	 * Since all the info is in TopTransactionContext or children thereof, we
+	 * don't really need to do anything to reclaim memory.  However, the
+	 * pending-events list could be large, and so it's useful to discard it as
+	 * soon as possible --- especially if we are aborting because we ran out
+	 * of memory for the list!
+	 */
+	if (afterTriggers.event_cxt)
+	{
+		MemoryContextDelete(afterTriggers.event_cxt);
+		afterTriggers.event_cxt = NULL;
+		afterTriggers.events.head = NULL;
+		afterTriggers.events.tail = NULL;
+		afterTriggers.events.tailfree = NULL;
+	}
+
+	/*
+	 * Forget any subtransaction state as well.  Since this can't be very
+	 * large, we let the eventual reset of TopTransactionContext free the
+	 * memory instead of doing it here.
+	 */
+	afterTriggers.trans_stack = NULL;
+	afterTriggers.maxtransdepth = 0;
+
+
+	/*
+	 * Forget the query stack and constraint-related state information.  As
+	 * with the subtransaction state information, we don't bother freeing the
+	 * memory here.
+	 */
+	afterTriggers.query_stack = NULL;
+	afterTriggers.maxquerydepth = 0;
+	afterTriggers.state = NULL;
+
+	/* No more afterTriggers manipulation until next transaction starts. */
+	afterTriggers.query_depth = -1;
+}
+
+/*
+ * AfterTriggerBeginSubXact()
+ *
+ *	Start a subtransaction.
+ */
+void
+AfterTriggerBeginSubXact(void)
+{
+	int			my_level = GetCurrentTransactionNestLevel();
+
+	/*
+	 * Allocate more space in the trans_stack if needed.  (Note: because the
+	 * minimum nest level of a subtransaction is 2, we waste the first couple
+	 * entries of the array; not worth the notational effort to avoid it.)
+	 */
+	while (my_level >= afterTriggers.maxtransdepth)
+	{
+		if (afterTriggers.maxtransdepth == 0)
+		{
+			/* Arbitrarily initialize for max of 8 subtransaction levels */
+			afterTriggers.trans_stack = (AfterTriggersTransData *)
+				MemoryContextAlloc(TopTransactionContext,
+								   8 * sizeof(AfterTriggersTransData));
+			afterTriggers.maxtransdepth = 8;
+		}
+		else
+		{
+			/* repalloc will keep the stack in the same context */
+			int			new_alloc = afterTriggers.maxtransdepth * 2;
+
+			afterTriggers.trans_stack = (AfterTriggersTransData *)
+				repalloc(afterTriggers.trans_stack,
+						 new_alloc * sizeof(AfterTriggersTransData));
+			afterTriggers.maxtransdepth = new_alloc;
+		}
+	}
+
+	/*
+	 * Push the current information into the stack.  The SET CONSTRAINTS state
+	 * is not saved until/unless changed.  Likewise, we don't make a
+	 * per-subtransaction event context until needed.
+	 */
+	afterTriggers.trans_stack[my_level].state = NULL;
+	afterTriggers.trans_stack[my_level].events = afterTriggers.events;
+	afterTriggers.trans_stack[my_level].query_depth = afterTriggers.query_depth;
+	afterTriggers.trans_stack[my_level].firing_counter = afterTriggers.firing_counter;
+}
+
+/*
+ * AfterTriggerEndSubXact()
+ *
+ *	The current subtransaction is ending.
+ */
+void
+AfterTriggerEndSubXact(bool isCommit)
+{
+	int			my_level = GetCurrentTransactionNestLevel();
+	SetConstraintState state;
+	AfterTriggerEvent event;
+	AfterTriggerEventChunk *chunk;
+	CommandId	subxact_firing_id;
+
+	/*
+	 * Pop the prior state if needed.
+	 */
+	if (isCommit)
+	{
+		Assert(my_level < afterTriggers.maxtransdepth);
+		/* If we saved a prior state, we don't need it anymore */
+		state = afterTriggers.trans_stack[my_level].state;
+		if (state != NULL)
+			pfree(state);
+		/* this avoids double pfree if error later: */
+		afterTriggers.trans_stack[my_level].state = NULL;
+		Assert(afterTriggers.query_depth ==
+			   afterTriggers.trans_stack[my_level].query_depth);
+	}
+	else
+	{
+		/*
+		 * Aborting.  It is possible subxact start failed before calling
+		 * AfterTriggerBeginSubXact, in which case we mustn't risk touching
+		 * trans_stack levels that aren't there.
+		 */
+		if (my_level >= afterTriggers.maxtransdepth)
+			return;
+
+		/*
+		 * Release query-level storage for queries being aborted, and restore
+		 * query_depth to its pre-subxact value.  This assumes that a
+		 * subtransaction will not add events to query levels started in a
+		 * earlier transaction state.
+		 */
+		while (afterTriggers.query_depth > afterTriggers.trans_stack[my_level].query_depth)
+		{
+			if (afterTriggers.query_depth < afterTriggers.maxquerydepth)
+				AfterTriggerFreeQuery(&afterTriggers.query_stack[afterTriggers.query_depth]);
+			afterTriggers.query_depth--;
+		}
+		Assert(afterTriggers.query_depth ==
+			   afterTriggers.trans_stack[my_level].query_depth);
+
+		/*
+		 * Restore the global deferred-event list to its former length,
+		 * discarding any events queued by the subxact.
+		 */
+		afterTriggerRestoreEventList(&afterTriggers.events,
+									 &afterTriggers.trans_stack[my_level].events);
+
+		/*
+		 * Restore the trigger state.  If the saved state is NULL, then this
+		 * subxact didn't save it, so it doesn't need restoring.
+		 */
+		state = afterTriggers.trans_stack[my_level].state;
+		if (state != NULL)
+		{
+			pfree(afterTriggers.state);
+			afterTriggers.state = state;
+		}
+		/* this avoids double pfree if error later: */
+		afterTriggers.trans_stack[my_level].state = NULL;
+
+		/*
+		 * Scan for any remaining deferred events that were marked DONE or IN
+		 * PROGRESS by this subxact or a child, and un-mark them. We can
+		 * recognize such events because they have a firing ID greater than or
+		 * equal to the firing_counter value we saved at subtransaction start.
+		 * (This essentially assumes that the current subxact includes all
+		 * subxacts started after it.)
+		 */
+		subxact_firing_id = afterTriggers.trans_stack[my_level].firing_counter;
+		for_each_event_chunk(event, chunk, afterTriggers.events)
+		{
+			AfterTriggerShared evtshared = GetTriggerSharedData(event);
+
+			if (event->ate_flags &
+				(AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS))
+			{
+				if (evtshared->ats_firing_id >= subxact_firing_id)
+					event->ate_flags &=
+						~(AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS);
+			}
+		}
+	}
+}
+
+/*
+ * Get the transition table for the given event and depending on whether we are
+ * processing the old or the new tuple.
+ */
+static Tuplestorestate *
+GetAfterTriggersTransitionTable(int event,
+								TupleTableSlot *oldslot,
+								TupleTableSlot *newslot,
+								TransitionCaptureState *transition_capture)
+{
+	Tuplestorestate *tuplestore = NULL;
+	bool		delete_old_table = transition_capture->tcs_delete_old_table;
+	bool		update_old_table = transition_capture->tcs_update_old_table;
+	bool		update_new_table = transition_capture->tcs_update_new_table;
+	bool		insert_new_table = transition_capture->tcs_insert_new_table;
+
+	/*
+	 * For INSERT events NEW should be non-NULL, for DELETE events OLD should
+	 * be non-NULL, whereas for UPDATE events normally both OLD and NEW are
+	 * non-NULL.  But for UPDATE events fired for capturing transition tuples
+	 * during UPDATE partition-key row movement, OLD is NULL when the event is
+	 * for a row being inserted, whereas NEW is NULL when the event is for a
+	 * row being deleted.
+	 */
+	Assert(!(event == TRIGGER_EVENT_DELETE && delete_old_table &&
+			 TupIsNull(oldslot)));
+	Assert(!(event == TRIGGER_EVENT_INSERT && insert_new_table &&
+			 TupIsNull(newslot)));
+
+	if (!TupIsNull(oldslot))
+	{
+		Assert(TupIsNull(newslot));
+		if (event == TRIGGER_EVENT_DELETE && delete_old_table)
+			tuplestore = transition_capture->tcs_private->old_del_tuplestore;
+		else if (event == TRIGGER_EVENT_UPDATE && update_old_table)
+			tuplestore = transition_capture->tcs_private->old_upd_tuplestore;
+	}
+	else if (!TupIsNull(newslot))
+	{
+		Assert(TupIsNull(oldslot));
+		if (event == TRIGGER_EVENT_INSERT && insert_new_table)
+			tuplestore = transition_capture->tcs_private->new_ins_tuplestore;
+		else if (event == TRIGGER_EVENT_UPDATE && update_new_table)
+			tuplestore = transition_capture->tcs_private->new_upd_tuplestore;
+	}
+
+	return tuplestore;
+}
+
+/*
+ * Add the given heap tuple to the given tuplestore, applying the conversion
+ * map if necessary.
+ *
+ * If original_insert_tuple is given, we can add that tuple without conversion.
+ */
+static void
+TransitionTableAddTuple(EState *estate,
+						TransitionCaptureState *transition_capture,
+						ResultRelInfo *relinfo,
+						TupleTableSlot *slot,
+						TupleTableSlot *original_insert_tuple,
+						Tuplestorestate *tuplestore)
+{
+	TupleConversionMap *map;
+
+	/*
+	 * Nothing needs to be done if we don't have a tuplestore.
+	 */
+	if (tuplestore == NULL)
+		return;
+
+	if (original_insert_tuple)
+		tuplestore_puttupleslot(tuplestore, original_insert_tuple);
+	else if ((map = ExecGetChildToRootMap(relinfo)) != NULL)
+	{
+		AfterTriggersTableData *table = transition_capture->tcs_private;
+		TupleTableSlot *storeslot;
+
+		storeslot = GetAfterTriggersStoreSlot(table, map->outdesc);
+		execute_attr_map_slot(map->attrMap, slot, storeslot);
+		tuplestore_puttupleslot(tuplestore, storeslot);
+	}
+	else
+		tuplestore_puttupleslot(tuplestore, slot);
+}
+
+/* ----------
+ * AfterTriggerEnlargeQueryState()
+ *
+ *	Prepare the necessary state so that we can record AFTER trigger events
+ *	queued by a query.  It is allowed to have nested queries within a
+ *	(sub)transaction, so we need to have separate state for each query
+ *	nesting level.
+ * ----------
+ */
+static void
+AfterTriggerEnlargeQueryState(void)
+{
+	int			init_depth = afterTriggers.maxquerydepth;
+
+	Assert(afterTriggers.query_depth >= afterTriggers.maxquerydepth);
+
+	if (afterTriggers.maxquerydepth == 0)
+	{
+		int			new_alloc = Max(afterTriggers.query_depth + 1, 8);
+
+		afterTriggers.query_stack = (AfterTriggersQueryData *)
+			MemoryContextAlloc(TopTransactionContext,
+							   new_alloc * sizeof(AfterTriggersQueryData));
+		afterTriggers.maxquerydepth = new_alloc;
+	}
+	else
+	{
+		/* repalloc will keep the stack in the same context */
+		int			old_alloc = afterTriggers.maxquerydepth;
+		int			new_alloc = Max(afterTriggers.query_depth + 1,
+									old_alloc * 2);
+
+		afterTriggers.query_stack = (AfterTriggersQueryData *)
+			repalloc(afterTriggers.query_stack,
+					 new_alloc * sizeof(AfterTriggersQueryData));
+		afterTriggers.maxquerydepth = new_alloc;
+	}
+
+	/* Initialize new array entries to empty */
+	while (init_depth < afterTriggers.maxquerydepth)
+	{
+		AfterTriggersQueryData *qs = &afterTriggers.query_stack[init_depth];
+
+		qs->events.head = NULL;
+		qs->events.tail = NULL;
+		qs->events.tailfree = NULL;
+		qs->fdw_tuplestore = NULL;
+		qs->tables = NIL;
+
+		++init_depth;
+	}
+}
+
+/*
+ * Create an empty SetConstraintState with room for numalloc trigstates
+ */
+static SetConstraintState
+SetConstraintStateCreate(int numalloc)
+{
+	SetConstraintState state;
+
+	/* Behave sanely with numalloc == 0 */
+	if (numalloc <= 0)
+		numalloc = 1;
+
+	/*
+	 * We assume that zeroing will correctly initialize the state values.
+	 */
+	state = (SetConstraintState)
+		MemoryContextAllocZero(TopTransactionContext,
+							   offsetof(SetConstraintStateData, trigstates) +
+							   numalloc * sizeof(SetConstraintTriggerData));
+
+	state->numalloc = numalloc;
+
+	return state;
+}
+
+/*
+ * Copy a SetConstraintState
+ */
+static SetConstraintState
+SetConstraintStateCopy(SetConstraintState origstate)
+{
+	SetConstraintState state;
+
+	state = SetConstraintStateCreate(origstate->numstates);
+
+	state->all_isset = origstate->all_isset;
+	state->all_isdeferred = origstate->all_isdeferred;
+	state->numstates = origstate->numstates;
+	memcpy(state->trigstates, origstate->trigstates,
+		   origstate->numstates * sizeof(SetConstraintTriggerData));
+
+	return state;
+}
+
+/*
+ * Add a per-trigger item to a SetConstraintState.  Returns possibly-changed
+ * pointer to the state object (it will change if we have to repalloc).
+ */
+static SetConstraintState
+SetConstraintStateAddItem(SetConstraintState state,
+						  Oid tgoid, bool tgisdeferred)
+{
+	if (state->numstates >= state->numalloc)
+	{
+		int			newalloc = state->numalloc * 2;
+
+		newalloc = Max(newalloc, 8);	/* in case original has size 0 */
+		state = (SetConstraintState)
+			repalloc(state,
+					 offsetof(SetConstraintStateData, trigstates) +
+					 newalloc * sizeof(SetConstraintTriggerData));
+		state->numalloc = newalloc;
+		Assert(state->numstates < state->numalloc);
+	}
+
+	state->trigstates[state->numstates].sct_tgoid = tgoid;
+	state->trigstates[state->numstates].sct_tgisdeferred = tgisdeferred;
+	state->numstates++;
+
+	return state;
+}
+
+/* ----------
+ * AfterTriggerSetState()
+ *
+ *	Execute the SET CONSTRAINTS ... utility command.
+ * ----------
+ */
+void
+AfterTriggerSetState(ConstraintsSetStmt *stmt)
+{
+	int			my_level = GetCurrentTransactionNestLevel();
+
+	/* If we haven't already done so, initialize our state. */
+	if (afterTriggers.state == NULL)
+		afterTriggers.state = SetConstraintStateCreate(8);
+
+	/*
+	 * If in a subtransaction, and we didn't save the current state already,
+	 * save it so it can be restored if the subtransaction aborts.
+	 */
+	if (my_level > 1 &&
+		afterTriggers.trans_stack[my_level].state == NULL)
+	{
+		afterTriggers.trans_stack[my_level].state =
+			SetConstraintStateCopy(afterTriggers.state);
+	}
+
+	/*
+	 * Handle SET CONSTRAINTS ALL ...
+	 */
+	if (stmt->constraints == NIL)
+	{
+		/*
+		 * Forget any previous SET CONSTRAINTS commands in this transaction.
+		 */
+		afterTriggers.state->numstates = 0;
+
+		/*
+		 * Set the per-transaction ALL state to known.
+		 */
+		afterTriggers.state->all_isset = true;
+		afterTriggers.state->all_isdeferred = stmt->deferred;
+	}
+	else
+	{
+		Relation	conrel;
+		Relation	tgrel;
+		List	   *conoidlist = NIL;
+		List	   *tgoidlist = NIL;
+		ListCell   *lc;
+
+		/*
+		 * Handle SET CONSTRAINTS constraint-name [, ...]
+		 *
+		 * First, identify all the named constraints and make a list of their
+		 * OIDs.  Since, unlike the SQL spec, we allow multiple constraints of
+		 * the same name within a schema, the specifications are not
+		 * necessarily unique.  Our strategy is to target all matching
+		 * constraints within the first search-path schema that has any
+		 * matches, but disregard matches in schemas beyond the first match.
+		 * (This is a bit odd but it's the historical behavior.)
+		 *
+		 * A constraint in a partitioned table may have corresponding
+		 * constraints in the partitions.  Grab those too.
+		 */
+		conrel = table_open(ConstraintRelationId, AccessShareLock);
+
+		foreach(lc, stmt->constraints)
+		{
+			RangeVar   *constraint = lfirst(lc);
+			bool		found;
+			List	   *namespacelist;
+			ListCell   *nslc;
+
+			if (constraint->catalogname)
+			{
+				if (strcmp(constraint->catalogname, get_database_name(MyDatabaseId)) != 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cross-database references are not implemented: \"%s.%s.%s\"",
+									constraint->catalogname, constraint->schemaname,
+									constraint->relname)));
+			}
+
+			/*
+			 * If we're given the schema name with the constraint, look only
+			 * in that schema.  If given a bare constraint name, use the
+			 * search path to find the first matching constraint.
+			 */
+			if (constraint->schemaname)
+			{
+				Oid			namespaceId = LookupExplicitNamespace(constraint->schemaname,
+																  false);
+
+				namespacelist = list_make1_oid(namespaceId);
+			}
+			else
+			{
+				namespacelist = fetch_search_path(true);
+			}
+
+			found = false;
+			foreach(nslc, namespacelist)
+			{
+				Oid			namespaceId = lfirst_oid(nslc);
+				SysScanDesc conscan;
+				ScanKeyData skey[2];
+				HeapTuple	tup;
+
+				ScanKeyInit(&skey[0],
+							Anum_pg_constraint_conname,
+							BTEqualStrategyNumber, F_NAMEEQ,
+							CStringGetDatum(constraint->relname));
+				ScanKeyInit(&skey[1],
+							Anum_pg_constraint_connamespace,
+							BTEqualStrategyNumber, F_OIDEQ,
+							ObjectIdGetDatum(namespaceId));
+
+				conscan = systable_beginscan(conrel, ConstraintNameNspIndexId,
+											 true, NULL, 2, skey);
+
+				while (HeapTupleIsValid(tup = systable_getnext(conscan)))
+				{
+					Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tup);
+
+					if (con->condeferrable)
+						conoidlist = lappend_oid(conoidlist, con->oid);
+					else if (stmt->deferred)
+						ereport(ERROR,
+								(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+								 errmsg("constraint \"%s\" is not deferrable",
+										constraint->relname)));
+					found = true;
+				}
+
+				systable_endscan(conscan);
+
+				/*
+				 * Once we've found a matching constraint we do not search
+				 * later parts of the search path.
+				 */
+				if (found)
+					break;
+			}
+
+			list_free(namespacelist);
+
+			/*
+			 * Not found ?
+			 */
+			if (!found)
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_OBJECT),
+						 errmsg("constraint \"%s\" does not exist",
+								constraint->relname)));
+		}
+
+		/*
+		 * Scan for any possible descendants of the constraints.  We append
+		 * whatever we find to the same list that we're scanning; this has the
+		 * effect that we create new scans for those, too, so if there are
+		 * further descendents, we'll also catch them.
+		 */
+		foreach(lc, conoidlist)
+		{
+			Oid			parent = lfirst_oid(lc);
+			ScanKeyData key;
+			SysScanDesc scan;
+			HeapTuple	tuple;
+
+			ScanKeyInit(&key,
+						Anum_pg_constraint_conparentid,
+						BTEqualStrategyNumber, F_OIDEQ,
+						ObjectIdGetDatum(parent));
+
+			scan = systable_beginscan(conrel, ConstraintParentIndexId, true, NULL, 1, &key);
+
+			while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+			{
+				Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple);
+
+				conoidlist = lappend_oid(conoidlist, con->oid);
+			}
+
+			systable_endscan(scan);
+		}
+
+		table_close(conrel, AccessShareLock);
+
+		/*
+		 * Now, locate the trigger(s) implementing each of these constraints,
+		 * and make a list of their OIDs.
+		 */
+		tgrel = table_open(TriggerRelationId, AccessShareLock);
+
+		foreach(lc, conoidlist)
+		{
+			Oid			conoid = lfirst_oid(lc);
+			ScanKeyData skey;
+			SysScanDesc tgscan;
+			HeapTuple	htup;
+
+			ScanKeyInit(&skey,
+						Anum_pg_trigger_tgconstraint,
+						BTEqualStrategyNumber, F_OIDEQ,
+						ObjectIdGetDatum(conoid));
+
+			tgscan = systable_beginscan(tgrel, TriggerConstraintIndexId, true,
+										NULL, 1, &skey);
+
+			while (HeapTupleIsValid(htup = systable_getnext(tgscan)))
+			{
+				Form_pg_trigger pg_trigger = (Form_pg_trigger) GETSTRUCT(htup);
+
+				/*
+				 * Silently skip triggers that are marked as non-deferrable in
+				 * pg_trigger.  This is not an error condition, since a
+				 * deferrable RI constraint may have some non-deferrable
+				 * actions.
+				 */
+				if (pg_trigger->tgdeferrable)
+					tgoidlist = lappend_oid(tgoidlist, pg_trigger->oid);
+			}
+
+			systable_endscan(tgscan);
+		}
+
+		table_close(tgrel, AccessShareLock);
+
+		/*
+		 * Now we can set the trigger states of individual triggers for this
+		 * xact.
+		 */
+		foreach(lc, tgoidlist)
+		{
+			Oid			tgoid = lfirst_oid(lc);
+			SetConstraintState state = afterTriggers.state;
+			bool		found = false;
+			int			i;
+
+			for (i = 0; i < state->numstates; i++)
+			{
+				if (state->trigstates[i].sct_tgoid == tgoid)
+				{
+					state->trigstates[i].sct_tgisdeferred = stmt->deferred;
+					found = true;
+					break;
+				}
+			}
+			if (!found)
+			{
+				afterTriggers.state =
+					SetConstraintStateAddItem(state, tgoid, stmt->deferred);
+			}
+		}
+	}
+
+	/*
+	 * SQL99 requires that when a constraint is set to IMMEDIATE, any deferred
+	 * checks against that constraint must be made when the SET CONSTRAINTS
+	 * command is executed -- i.e. the effects of the SET CONSTRAINTS command
+	 * apply retroactively.  We've updated the constraints state, so scan the
+	 * list of previously deferred events to fire any that have now become
+	 * immediate.
+	 *
+	 * Obviously, if this was SET ... DEFERRED then it can't have converted
+	 * any unfired events to immediate, so we need do nothing in that case.
+	 */
+	if (!stmt->deferred)
+	{
+		AfterTriggerEventList *events = &afterTriggers.events;
+		bool		snapshot_set = false;
+
+		while (afterTriggerMarkEvents(events, NULL, true))
+		{
+			CommandId	firing_id = afterTriggers.firing_counter++;
+
+			/*
+			 * Make sure a snapshot has been established in case trigger
+			 * functions need one.  Note that we avoid setting a snapshot if
+			 * we don't find at least one trigger that has to be fired now.
+			 * This is so that BEGIN; SET CONSTRAINTS ...; SET TRANSACTION
+			 * ISOLATION LEVEL SERIALIZABLE; ... works properly.  (If we are
+			 * at the start of a transaction it's not possible for any trigger
+			 * events to be queued yet.)
+			 */
+			if (!snapshot_set)
+			{
+				PushActiveSnapshot(GetTransactionSnapshot());
+				snapshot_set = true;
+			}
+
+			/*
+			 * We can delete fired events if we are at top transaction level,
+			 * but we'd better not if inside a subtransaction, since the
+			 * subtransaction could later get rolled back.
+			 */
+			if (afterTriggerInvokeEvents(events, firing_id, NULL,
+										 !IsSubTransaction()))
+				break;			/* all fired */
+		}
+
+		if (snapshot_set)
+			PopActiveSnapshot();
+	}
+}
+
+/* ----------
+ * AfterTriggerPendingOnRel()
+ *		Test to see if there are any pending after-trigger events for rel.
+ *
+ * This is used by TRUNCATE, CLUSTER, ALTER TABLE, etc to detect whether
+ * it is unsafe to perform major surgery on a relation.  Note that only
+ * local pending events are examined.  We assume that having exclusive lock
+ * on a rel guarantees there are no unserviced events in other backends ---
+ * but having a lock does not prevent there being such events in our own.
+ *
+ * In some scenarios it'd be reasonable to remove pending events (more
+ * specifically, mark them DONE by the current subxact) but without a lot
+ * of knowledge of the trigger semantics we can't do this in general.
+ * ----------
+ */
+bool
+AfterTriggerPendingOnRel(Oid relid)
+{
+	AfterTriggerEvent event;
+	AfterTriggerEventChunk *chunk;
+	int			depth;
+
+	/* Scan queued events */
+	for_each_event_chunk(event, chunk, afterTriggers.events)
+	{
+		AfterTriggerShared evtshared = GetTriggerSharedData(event);
+
+		/*
+		 * We can ignore completed events.  (Even if a DONE flag is rolled
+		 * back by subxact abort, it's OK because the effects of the TRUNCATE
+		 * or whatever must get rolled back too.)
+		 */
+		if (event->ate_flags & AFTER_TRIGGER_DONE)
+			continue;
+
+		if (evtshared->ats_relid == relid)
+			return true;
+	}
+
+	/*
+	 * Also scan events queued by incomplete queries.  This could only matter
+	 * if TRUNCATE/etc is executed by a function or trigger within an updating
+	 * query on the same relation, which is pretty perverse, but let's check.
+	 */
+	for (depth = 0; depth <= afterTriggers.query_depth && depth < afterTriggers.maxquerydepth; depth++)
+	{
+		for_each_event_chunk(event, chunk, afterTriggers.query_stack[depth].events)
+		{
+			AfterTriggerShared evtshared = GetTriggerSharedData(event);
+
+			if (event->ate_flags & AFTER_TRIGGER_DONE)
+				continue;
+
+			if (evtshared->ats_relid == relid)
+				return true;
+		}
+	}
+
+	return false;
+}
+
+/* ----------
+ * AfterTriggerSaveEvent()
+ *
+ *	Called by ExecA[RS]...Triggers() to queue up the triggers that should
+ *	be fired for an event.
+ *
+ *	NOTE: this is called whenever there are any triggers associated with
+ *	the event (even if they are disabled).  This function decides which
+ *	triggers actually need to be queued.  It is also called after each row,
+ *	even if there are no triggers for that event, if there are any AFTER
+ *	STATEMENT triggers for the statement which use transition tables, so that
+ *	the transition tuplestores can be built.  Furthermore, if the transition
+ *	capture is happening for UPDATEd rows being moved to another partition due
+ *	to the partition-key being changed, then this function is called once when
+ *	the row is deleted (to capture OLD row), and once when the row is inserted
+ *	into another partition (to capture NEW row).  This is done separately because
+ *	DELETE and INSERT happen on different tables.
+ *
+ *	Transition tuplestores are built now, rather than when events are pulled
+ *	off of the queue because AFTER ROW triggers are allowed to select from the
+ *	transition tables for the statement.
+ *
+ *	This contains special support to queue the update events for the case where
+ *	a partitioned table undergoing a cross-partition update may have foreign
+ *	keys pointing into it.  Normally, a partitioned table's row triggers are
+ *	not fired because the leaf partition(s) which are modified as a result of
+ *	the operation on the partitioned table contain the same triggers which are
+ *	fired instead. But that general scheme can cause problematic behavior with
+ *	foreign key triggers during cross-partition updates, which are implemented
+ *	as DELETE on the source partition followed by INSERT into the destination
+ *	partition.  Specifically, firing DELETE triggers would lead to the wrong
+ *	foreign key action to be enforced considering that the original command is
+ *	UPDATE; in this case, this function is called with relinfo as the
+ *	partitioned table, and src_partinfo and dst_partinfo referring to the
+ *	source and target leaf partitions, respectively.
+ *
+ *	is_crosspart_update is true either when a DELETE event is fired on the
+ *	source partition (which is to be ignored) or an UPDATE event is fired on
+ *	the root partitioned table.
+ * ----------
+ */
+static void
+AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
+					  ResultRelInfo *src_partinfo,
+					  ResultRelInfo *dst_partinfo,
+					  int event, bool row_trigger,
+					  TupleTableSlot *oldslot, TupleTableSlot *newslot,
+					  List *recheckIndexes, Bitmapset *modifiedCols,
+					  TransitionCaptureState *transition_capture,
+					  bool is_crosspart_update)
+{
+	Relation	rel = relinfo->ri_RelationDesc;
+	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+	AfterTriggerEventData new_event;
+	AfterTriggerSharedData new_shared;
+	char		relkind = rel->rd_rel->relkind;
+	int			tgtype_event;
+	int			tgtype_level;
+	int			i;
+	Tuplestorestate *fdw_tuplestore = NULL;
+
+	/*
+	 * Check state.  We use a normal test not Assert because it is possible to
+	 * reach here in the wrong state given misconfigured RI triggers, in
+	 * particular deferring a cascade action trigger.
+	 */
+	if (afterTriggers.query_depth < 0)
+		elog(ERROR, "AfterTriggerSaveEvent() called outside of query");
+
+	/* Be sure we have enough space to record events at this query depth. */
+	if (afterTriggers.query_depth >= afterTriggers.maxquerydepth)
+		AfterTriggerEnlargeQueryState();
+
+	/*
+	 * If the directly named relation has any triggers with transition tables,
+	 * then we need to capture transition tuples.
+	 */
+	if (row_trigger && transition_capture != NULL)
+	{
+		TupleTableSlot *original_insert_tuple = transition_capture->tcs_original_insert_tuple;
+
+		/*
+		 * Capture the old tuple in the appropriate transition table based on
+		 * the event.
+		 */
+		if (!TupIsNull(oldslot))
+		{
+			Tuplestorestate *old_tuplestore;
+
+			old_tuplestore = GetAfterTriggersTransitionTable(event,
+															 oldslot,
+															 NULL,
+															 transition_capture);
+			TransitionTableAddTuple(estate, transition_capture, relinfo,
+									oldslot, NULL, old_tuplestore);
+		}
+
+		/*
+		 * Capture the new tuple in the appropriate transition table based on
+		 * the event.
+		 */
+		if (!TupIsNull(newslot))
+		{
+			Tuplestorestate *new_tuplestore;
+
+			new_tuplestore = GetAfterTriggersTransitionTable(event,
+															 NULL,
+															 newslot,
+															 transition_capture);
+			TransitionTableAddTuple(estate, transition_capture, relinfo,
+									newslot, original_insert_tuple, new_tuplestore);
+		}
+
+		/*
+		 * If transition tables are the only reason we're here, return. As
+		 * mentioned above, we can also be here during update tuple routing in
+		 * presence of transition tables, in which case this function is
+		 * called separately for OLD and NEW, so we expect exactly one of them
+		 * to be NULL.
+		 */
+		if (trigdesc == NULL ||
+			(event == TRIGGER_EVENT_DELETE && !trigdesc->trig_delete_after_row) ||
+			(event == TRIGGER_EVENT_INSERT && !trigdesc->trig_insert_after_row) ||
+			(event == TRIGGER_EVENT_UPDATE && !trigdesc->trig_update_after_row) ||
+			(event == TRIGGER_EVENT_UPDATE && (TupIsNull(oldslot) ^ TupIsNull(newslot))))
+			return;
+	}
+
+	/*
+	 * We normally don't see partitioned tables here for row level triggers
+	 * except in the special case of a cross-partition update.  In that case,
+	 * nodeModifyTable.c:ExecCrossPartitionUpdateForeignKey() calls here to
+	 * queue an update event on the root target partitioned table, also
+	 * passing the source and destination partitions and their tuples.
+	 */
+	Assert(!row_trigger ||
+		   rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE ||
+		   (is_crosspart_update &&
+			TRIGGER_FIRED_BY_UPDATE(event) &&
+			src_partinfo != NULL && dst_partinfo != NULL));
+
+	/*
+	 * Validate the event code and collect the associated tuple CTIDs.
+	 *
+	 * The event code will be used both as a bitmask and an array offset, so
+	 * validation is important to make sure we don't walk off the edge of our
+	 * arrays.
+	 *
+	 * Also, if we're considering statement-level triggers, check whether we
+	 * already queued a set of them for this event, and cancel the prior set
+	 * if so.  This preserves the behavior that statement-level triggers fire
+	 * just once per statement and fire after row-level triggers.
+	 */
+	switch (event)
+	{
+		case TRIGGER_EVENT_INSERT:
+			tgtype_event = TRIGGER_TYPE_INSERT;
+			if (row_trigger)
+			{
+				Assert(oldslot == NULL);
+				Assert(newslot != NULL);
+				ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid1));
+				ItemPointerSetInvalid(&(new_event.ate_ctid2));
+			}
+			else
+			{
+				Assert(oldslot == NULL);
+				Assert(newslot == NULL);
+				ItemPointerSetInvalid(&(new_event.ate_ctid1));
+				ItemPointerSetInvalid(&(new_event.ate_ctid2));
+				cancel_prior_stmt_triggers(RelationGetRelid(rel),
+										   CMD_INSERT, event);
+			}
+			break;
+		case TRIGGER_EVENT_DELETE:
+			tgtype_event = TRIGGER_TYPE_DELETE;
+			if (row_trigger)
+			{
+				Assert(oldslot != NULL);
+				Assert(newslot == NULL);
+				ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
+				ItemPointerSetInvalid(&(new_event.ate_ctid2));
+			}
+			else
+			{
+				Assert(oldslot == NULL);
+				Assert(newslot == NULL);
+				ItemPointerSetInvalid(&(new_event.ate_ctid1));
+				ItemPointerSetInvalid(&(new_event.ate_ctid2));
+				cancel_prior_stmt_triggers(RelationGetRelid(rel),
+										   CMD_DELETE, event);
+			}
+			break;
+		case TRIGGER_EVENT_UPDATE:
+			tgtype_event = TRIGGER_TYPE_UPDATE;
+			if (row_trigger)
+			{
+				Assert(oldslot != NULL);
+				Assert(newslot != NULL);
+				ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
+				ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid2));
+
+				/*
+				 * Also remember the OIDs of partitions to fetch these tuples
+				 * out of later in AfterTriggerExecute().
+				 */
+				if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+				{
+					Assert(src_partinfo != NULL && dst_partinfo != NULL);
+					new_event.ate_src_part =
+						RelationGetRelid(src_partinfo->ri_RelationDesc);
+					new_event.ate_dst_part =
+						RelationGetRelid(dst_partinfo->ri_RelationDesc);
+				}
+			}
+			else
+			{
+				Assert(oldslot == NULL);
+				Assert(newslot == NULL);
+				ItemPointerSetInvalid(&(new_event.ate_ctid1));
+				ItemPointerSetInvalid(&(new_event.ate_ctid2));
+				cancel_prior_stmt_triggers(RelationGetRelid(rel),
+										   CMD_UPDATE, event);
+			}
+			break;
+		case TRIGGER_EVENT_TRUNCATE:
+			tgtype_event = TRIGGER_TYPE_TRUNCATE;
+			Assert(oldslot == NULL);
+			Assert(newslot == NULL);
+			ItemPointerSetInvalid(&(new_event.ate_ctid1));
+			ItemPointerSetInvalid(&(new_event.ate_ctid2));
+			break;
+		default:
+			elog(ERROR, "invalid after-trigger event code: %d", event);
+			tgtype_event = 0;	/* keep compiler quiet */
+			break;
+	}
+
+	/* Determine flags */
+	if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger))
+	{
+		if (row_trigger && event == TRIGGER_EVENT_UPDATE)
+		{
+			if (relkind == RELKIND_PARTITIONED_TABLE)
+				new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE;
+			else
+				new_event.ate_flags = AFTER_TRIGGER_2CTID;
+		}
+		else
+			new_event.ate_flags = AFTER_TRIGGER_1CTID;
+	}
+
+	/* else, we'll initialize ate_flags for each trigger */
+
+	tgtype_level = (row_trigger ? TRIGGER_TYPE_ROW : TRIGGER_TYPE_STATEMENT);
+
+	/*
+	 * Must convert/copy the source and destination partition tuples into the
+	 * root partitioned table's format/slot, because the processing in the
+	 * loop below expects both oldslot and newslot tuples to be in that form.
+	 */
+	if (row_trigger && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		TupleTableSlot *rootslot;
+		TupleConversionMap *map;
+
+		rootslot = ExecGetTriggerOldSlot(estate, relinfo);
+		map = ExecGetChildToRootMap(src_partinfo);
+		if (map)
+			oldslot = execute_attr_map_slot(map->attrMap,
+											oldslot,
+											rootslot);
+		else
+			oldslot = ExecCopySlot(rootslot, oldslot);
+
+		rootslot = ExecGetTriggerNewSlot(estate, relinfo);
+		map = ExecGetChildToRootMap(dst_partinfo);
+		if (map)
+			newslot = execute_attr_map_slot(map->attrMap,
+											newslot,
+											rootslot);
+		else
+			newslot = ExecCopySlot(rootslot, newslot);
+	}
+
+	for (i = 0; i < trigdesc->numtriggers; i++)
+	{
+		Trigger    *trigger = &trigdesc->triggers[i];
+
+		if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+								  tgtype_level,
+								  TRIGGER_TYPE_AFTER,
+								  tgtype_event))
+			continue;
+		if (!TriggerEnabled(estate, relinfo, trigger, event,
+							modifiedCols, oldslot, newslot))
+			continue;
+
+		if (relkind == RELKIND_FOREIGN_TABLE && row_trigger)
+		{
+			if (fdw_tuplestore == NULL)
+			{
+				fdw_tuplestore = GetCurrentFDWTuplestore();
+				new_event.ate_flags = AFTER_TRIGGER_FDW_FETCH;
+			}
+			else
+				/* subsequent event for the same tuple */
+				new_event.ate_flags = AFTER_TRIGGER_FDW_REUSE;
+		}
+
+		/*
+		 * If the trigger is a foreign key enforcement trigger, there are
+		 * certain cases where we can skip queueing the event because we can
+		 * tell by inspection that the FK constraint will still pass. There
+		 * are also some cases during cross-partition updates of a partitioned
+		 * table where queuing the event can be skipped.
+		 */
+		if (TRIGGER_FIRED_BY_UPDATE(event) || TRIGGER_FIRED_BY_DELETE(event))
+		{
+			switch (RI_FKey_trigger_type(trigger->tgfoid))
+			{
+				case RI_TRIGGER_PK:
+
+					/*
+					 * For cross-partitioned updates of partitioned PK table,
+					 * skip the event fired by the component delete on the
+					 * source leaf partition unless the constraint originates
+					 * in the partition itself (!tgisclone), because the
+					 * update event that will be fired on the root
+					 * (partitioned) target table will be used to perform the
+					 * necessary foreign key enforcement action.
+					 */
+					if (is_crosspart_update &&
+						TRIGGER_FIRED_BY_DELETE(event) &&
+						trigger->tgisclone)
+						continue;
+
+					/* Update or delete on trigger's PK table */
+					if (!RI_FKey_pk_upd_check_required(trigger, rel,
+													   oldslot, newslot))
+					{
+						/* skip queuing this event */
+						continue;
+					}
+					break;
+
+				case RI_TRIGGER_FK:
+
+					/*
+					 * Update on trigger's FK table.  We can skip the update
+					 * event fired on a partitioned table during a
+					 * cross-partition of that table, because the insert event
+					 * that is fired on the destination leaf partition would
+					 * suffice to perform the necessary foreign key check.
+					 * Moreover, RI_FKey_fk_upd_check_required() expects to be
+					 * passed a tuple that contains system attributes, most of
+					 * which are not present in the virtual slot belonging to
+					 * a partitioned table.
+					 */
+					if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
+						!RI_FKey_fk_upd_check_required(trigger, rel,
+													   oldslot, newslot))
+					{
+						/* skip queuing this event */
+						continue;
+					}
+					break;
+
+				case RI_TRIGGER_NONE:
+
+					/*
+					 * Not an FK trigger.  No need to queue the update event
+					 * fired during a cross-partitioned update of a
+					 * partitioned table, because the same row trigger must be
+					 * present in the leaf partition(s) that are affected as
+					 * part of this update and the events fired on them are
+					 * queued instead.
+					 */
+					if (row_trigger &&
+						rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+						continue;
+					break;
+			}
+		}
+
+		/*
+		 * If the trigger is a deferred unique constraint check trigger, only
+		 * queue it if the unique constraint was potentially violated, which
+		 * we know from index insertion time.
+		 */
+		if (trigger->tgfoid == F_UNIQUE_KEY_RECHECK)
+		{
+			if (!list_member_oid(recheckIndexes, trigger->tgconstrindid))
+				continue;		/* Uniqueness definitely not violated */
+		}
+
+		/*
+		 * Fill in event structure and add it to the current query's queue.
+		 * Note we set ats_table to NULL whenever this trigger doesn't use
+		 * transition tables, to improve sharability of the shared event data.
+		 */
+		new_shared.ats_event =
+			(event & TRIGGER_EVENT_OPMASK) |
+			(row_trigger ? TRIGGER_EVENT_ROW : 0) |
+			(trigger->tgdeferrable ? AFTER_TRIGGER_DEFERRABLE : 0) |
+			(trigger->tginitdeferred ? AFTER_TRIGGER_INITDEFERRED : 0);
+		new_shared.ats_tgoid = trigger->tgoid;
+		new_shared.ats_relid = RelationGetRelid(rel);
+		new_shared.ats_firing_id = 0;
+		if ((trigger->tgoldtable || trigger->tgnewtable) &&
+			transition_capture != NULL)
+			new_shared.ats_table = transition_capture->tcs_private;
+		else
+			new_shared.ats_table = NULL;
+		new_shared.ats_modifiedcols = afterTriggerCopyBitmap(modifiedCols);
+
+		afterTriggerAddEvent(&afterTriggers.query_stack[afterTriggers.query_depth].events,
+							 &new_event, &new_shared);
+	}
+
+	/*
+	 * Finally, spool any foreign tuple(s).  The tuplestore squashes them to
+	 * minimal tuples, so this loses any system columns.  The executor lost
+	 * those columns before us, for an unrelated reason, so this is fine.
+	 */
+	if (fdw_tuplestore)
+	{
+		if (oldslot != NULL)
+			tuplestore_puttupleslot(fdw_tuplestore, oldslot);
+		if (newslot != NULL)
+			tuplestore_puttupleslot(fdw_tuplestore, newslot);
+	}
+}
+
+/*
+ * Detect whether we already queued BEFORE STATEMENT triggers for the given
+ * relation + operation, and set the flag so the next call will report "true".
+ */
+static bool
+before_stmt_triggers_fired(Oid relid, CmdType cmdType)
+{
+	bool		result;
+	AfterTriggersTableData *table;
+
+	/* Check state, like AfterTriggerSaveEvent. */
+	if (afterTriggers.query_depth < 0)
+		elog(ERROR, "before_stmt_triggers_fired() called outside of query");
+
+	/* Be sure we have enough space to record events at this query depth. */
+	if (afterTriggers.query_depth >= afterTriggers.maxquerydepth)
+		AfterTriggerEnlargeQueryState();
+
+	/*
+	 * We keep this state in the AfterTriggersTableData that also holds
+	 * transition tables for the relation + operation.  In this way, if we are
+	 * forced to make a new set of transition tables because more tuples get
+	 * entered after we've already fired triggers, we will allow a new set of
+	 * statement triggers to get queued.
+	 */
+	table = GetAfterTriggersTableData(relid, cmdType);
+	result = table->before_trig_done;
+	table->before_trig_done = true;
+	return result;
+}
+
+/*
+ * If we previously queued a set of AFTER STATEMENT triggers for the given
+ * relation + operation, and they've not been fired yet, cancel them.  The
+ * caller will queue a fresh set that's after any row-level triggers that may
+ * have been queued by the current sub-statement, preserving (as much as
+ * possible) the property that AFTER ROW triggers fire before AFTER STATEMENT
+ * triggers, and that the latter only fire once.  This deals with the
+ * situation where several FK enforcement triggers sequentially queue triggers
+ * for the same table into the same trigger query level.  We can't fully
+ * prevent odd behavior though: if there are AFTER ROW triggers taking
+ * transition tables, we don't want to change the transition tables once the
+ * first such trigger has seen them.  In such a case, any additional events
+ * will result in creating new transition tables and allowing new firings of
+ * statement triggers.
+ *
+ * This also saves the current event list location so that a later invocation
+ * of this function can cheaply find the triggers we're about to queue and
+ * cancel them.
+ */
+static void
+cancel_prior_stmt_triggers(Oid relid, CmdType cmdType, int tgevent)
+{
+	AfterTriggersTableData *table;
+	AfterTriggersQueryData *qs = &afterTriggers.query_stack[afterTriggers.query_depth];
+
+	/*
+	 * We keep this state in the AfterTriggersTableData that also holds
+	 * transition tables for the relation + operation.  In this way, if we are
+	 * forced to make a new set of transition tables because more tuples get
+	 * entered after we've already fired triggers, we will allow a new set of
+	 * statement triggers to get queued without canceling the old ones.
+	 */
+	table = GetAfterTriggersTableData(relid, cmdType);
+
+	if (table->after_trig_done)
+	{
+		/*
+		 * We want to start scanning from the tail location that existed just
+		 * before we inserted any statement triggers.  But the events list
+		 * might've been entirely empty then, in which case scan from the
+		 * current head.
+		 */
+		AfterTriggerEvent event;
+		AfterTriggerEventChunk *chunk;
+
+		if (table->after_trig_events.tail)
+		{
+			chunk = table->after_trig_events.tail;
+			event = (AfterTriggerEvent) table->after_trig_events.tailfree;
+		}
+		else
+		{
+			chunk = qs->events.head;
+			event = NULL;
+		}
+
+		for_each_chunk_from(chunk)
+		{
+			if (event == NULL)
+				event = (AfterTriggerEvent) CHUNK_DATA_START(chunk);
+			for_each_event_from(event, chunk)
+			{
+				AfterTriggerShared evtshared = GetTriggerSharedData(event);
+
+				/*
+				 * Exit loop when we reach events that aren't AS triggers for
+				 * the target relation.
+				 */
+				if (evtshared->ats_relid != relid)
+					goto done;
+				if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) != tgevent)
+					goto done;
+				if (!TRIGGER_FIRED_FOR_STATEMENT(evtshared->ats_event))
+					goto done;
+				if (!TRIGGER_FIRED_AFTER(evtshared->ats_event))
+					goto done;
+				/* OK, mark it DONE */
+				event->ate_flags &= ~AFTER_TRIGGER_IN_PROGRESS;
+				event->ate_flags |= AFTER_TRIGGER_DONE;
+			}
+			/* signal we must reinitialize event ptr for next chunk */
+			event = NULL;
+		}
+	}
+done:
+
+	/* In any case, save current insertion point for next time */
+	table->after_trig_done = true;
+	table->after_trig_events = qs->events;
+}
+
+/*
+ * SQL function pg_trigger_depth()
+ */
+Datum
+pg_trigger_depth(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_INT32(MyTriggerDepth);
+}
diff --git a/src/backend/commands/tsearchcmds.c b/src/backend/commands/tsearchcmds.c
new file mode 100644
index 0000000..4cc4e3c
--- /dev/null
+++ b/src/backend/commands/tsearchcmds.c
@@ -0,0 +1,1759 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsearchcmds.c
+ *
+ *	  Routines for tsearch manipulation commands
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/tsearchcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_ts_config.h"
+#include "catalog/pg_ts_config_map.h"
+#include "catalog/pg_ts_dict.h"
+#include "catalog/pg_ts_parser.h"
+#include "catalog/pg_ts_template.h"
+#include "catalog/pg_type.h"
+#include "commands/alter.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "common/string.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "parser/parse_func.h"
+#include "tsearch/ts_cache.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+static void MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
+									 HeapTuple tup, Relation relMap);
+static void DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
+									 HeapTuple tup, Relation relMap);
+static DefElem *buildDefItem(const char *name, const char *val,
+							 bool was_quoted);
+
+
+/* --------------------- TS Parser commands ------------------------ */
+
+/*
+ * lookup a parser support function and return its OID (as a Datum)
+ *
+ * attnum is the pg_ts_parser column the function will go into
+ */
+static Datum
+get_ts_parser_func(DefElem *defel, int attnum)
+{
+	List	   *funcName = defGetQualifiedName(defel);
+	Oid			typeId[3];
+	Oid			retTypeId;
+	int			nargs;
+	Oid			procOid;
+
+	retTypeId = INTERNALOID;	/* correct for most */
+	typeId[0] = INTERNALOID;
+	switch (attnum)
+	{
+		case Anum_pg_ts_parser_prsstart:
+			nargs = 2;
+			typeId[1] = INT4OID;
+			break;
+		case Anum_pg_ts_parser_prstoken:
+			nargs = 3;
+			typeId[1] = INTERNALOID;
+			typeId[2] = INTERNALOID;
+			break;
+		case Anum_pg_ts_parser_prsend:
+			nargs = 1;
+			retTypeId = VOIDOID;
+			break;
+		case Anum_pg_ts_parser_prsheadline:
+			nargs = 3;
+			typeId[1] = INTERNALOID;
+			typeId[2] = TSQUERYOID;
+			break;
+		case Anum_pg_ts_parser_prslextype:
+			nargs = 1;
+
+			/*
+			 * Note: because the lextype method returns type internal, it must
+			 * have an internal-type argument for security reasons.  The
+			 * argument is not actually used, but is just passed as a zero.
+			 */
+			break;
+		default:
+			/* should not be here */
+			elog(ERROR, "unrecognized attribute for text search parser: %d",
+				 attnum);
+			nargs = 0;			/* keep compiler quiet */
+	}
+
+	procOid = LookupFuncName(funcName, nargs, typeId, false);
+	if (get_func_rettype(procOid) != retTypeId)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("function %s should return type %s",
+						func_signature_string(funcName, nargs, NIL, typeId),
+						format_type_be(retTypeId))));
+
+	return ObjectIdGetDatum(procOid);
+}
+
+/*
+ * make pg_depend entries for a new pg_ts_parser entry
+ *
+ * Return value is the address of said new entry.
+ */
+static ObjectAddress
+makeParserDependencies(HeapTuple tuple)
+{
+	Form_pg_ts_parser prs = (Form_pg_ts_parser) GETSTRUCT(tuple);
+	ObjectAddress myself,
+				referenced;
+	ObjectAddresses *addrs;
+
+	ObjectAddressSet(myself, TSParserRelationId, prs->oid);
+
+	/* dependency on extension */
+	recordDependencyOnCurrentExtension(&myself, false);
+
+	addrs = new_object_addresses();
+
+	/* dependency on namespace */
+	ObjectAddressSet(referenced, NamespaceRelationId, prs->prsnamespace);
+	add_exact_object_address(&referenced, addrs);
+
+	/* dependencies on functions */
+	ObjectAddressSet(referenced, ProcedureRelationId, prs->prsstart);
+	add_exact_object_address(&referenced, addrs);
+
+	referenced.objectId = prs->prstoken;
+	add_exact_object_address(&referenced, addrs);
+
+	referenced.objectId = prs->prsend;
+	add_exact_object_address(&referenced, addrs);
+
+	referenced.objectId = prs->prslextype;
+	add_exact_object_address(&referenced, addrs);
+
+	if (OidIsValid(prs->prsheadline))
+	{
+		referenced.objectId = prs->prsheadline;
+		add_exact_object_address(&referenced, addrs);
+	}
+
+	record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
+	free_object_addresses(addrs);
+
+	return myself;
+}
+
+/*
+ * CREATE TEXT SEARCH PARSER
+ */
+ObjectAddress
+DefineTSParser(List *names, List *parameters)
+{
+	char	   *prsname;
+	ListCell   *pl;
+	Relation	prsRel;
+	HeapTuple	tup;
+	Datum		values[Natts_pg_ts_parser];
+	bool		nulls[Natts_pg_ts_parser];
+	NameData	pname;
+	Oid			prsOid;
+	Oid			namespaceoid;
+	ObjectAddress address;
+
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be superuser to create text search parsers")));
+
+	prsRel = table_open(TSParserRelationId, RowExclusiveLock);
+
+	/* Convert list of names to a name and namespace */
+	namespaceoid = QualifiedNameGetCreationNamespace(names, &prsname);
+
+	/* initialize tuple fields with name/namespace */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	prsOid = GetNewOidWithIndex(prsRel, TSParserOidIndexId,
+								Anum_pg_ts_parser_oid);
+	values[Anum_pg_ts_parser_oid - 1] = ObjectIdGetDatum(prsOid);
+	namestrcpy(&pname, prsname);
+	values[Anum_pg_ts_parser_prsname - 1] = NameGetDatum(&pname);
+	values[Anum_pg_ts_parser_prsnamespace - 1] = ObjectIdGetDatum(namespaceoid);
+
+	/*
+	 * loop over the definition list and extract the information we need.
+	 */
+	foreach(pl, parameters)
+	{
+		DefElem    *defel = (DefElem *) lfirst(pl);
+
+		if (strcmp(defel->defname, "start") == 0)
+		{
+			values[Anum_pg_ts_parser_prsstart - 1] =
+				get_ts_parser_func(defel, Anum_pg_ts_parser_prsstart);
+		}
+		else if (strcmp(defel->defname, "gettoken") == 0)
+		{
+			values[Anum_pg_ts_parser_prstoken - 1] =
+				get_ts_parser_func(defel, Anum_pg_ts_parser_prstoken);
+		}
+		else if (strcmp(defel->defname, "end") == 0)
+		{
+			values[Anum_pg_ts_parser_prsend - 1] =
+				get_ts_parser_func(defel, Anum_pg_ts_parser_prsend);
+		}
+		else if (strcmp(defel->defname, "headline") == 0)
+		{
+			values[Anum_pg_ts_parser_prsheadline - 1] =
+				get_ts_parser_func(defel, Anum_pg_ts_parser_prsheadline);
+		}
+		else if (strcmp(defel->defname, "lextypes") == 0)
+		{
+			values[Anum_pg_ts_parser_prslextype - 1] =
+				get_ts_parser_func(defel, Anum_pg_ts_parser_prslextype);
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("text search parser parameter \"%s\" not recognized",
+							defel->defname)));
+	}
+
+	/*
+	 * Validation
+	 */
+	if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prsstart - 1])))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("text search parser start method is required")));
+
+	if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prstoken - 1])))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("text search parser gettoken method is required")));
+
+	if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prsend - 1])))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("text search parser end method is required")));
+
+	if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prslextype - 1])))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("text search parser lextypes method is required")));
+
+	/*
+	 * Looks good, insert
+	 */
+	tup = heap_form_tuple(prsRel->rd_att, values, nulls);
+
+	CatalogTupleInsert(prsRel, tup);
+
+	address = makeParserDependencies(tup);
+
+	/* Post creation hook for new text search parser */
+	InvokeObjectPostCreateHook(TSParserRelationId, prsOid, 0);
+
+	heap_freetuple(tup);
+
+	table_close(prsRel, RowExclusiveLock);
+
+	return address;
+}
+
+/* ---------------------- TS Dictionary commands -----------------------*/
+
+/*
+ * make pg_depend entries for a new pg_ts_dict entry
+ *
+ * Return value is address of the new entry
+ */
+static ObjectAddress
+makeDictionaryDependencies(HeapTuple tuple)
+{
+	Form_pg_ts_dict dict = (Form_pg_ts_dict) GETSTRUCT(tuple);
+	ObjectAddress myself,
+				referenced;
+	ObjectAddresses *addrs;
+
+	ObjectAddressSet(myself, TSDictionaryRelationId, dict->oid);
+
+	/* dependency on owner */
+	recordDependencyOnOwner(myself.classId, myself.objectId, dict->dictowner);
+
+	/* dependency on extension */
+	recordDependencyOnCurrentExtension(&myself, false);
+
+	addrs = new_object_addresses();
+
+	/* dependency on namespace */
+	ObjectAddressSet(referenced, NamespaceRelationId, dict->dictnamespace);
+	add_exact_object_address(&referenced, addrs);
+
+	/* dependency on template */
+	ObjectAddressSet(referenced, TSTemplateRelationId, dict->dicttemplate);
+	add_exact_object_address(&referenced, addrs);
+
+	record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
+	free_object_addresses(addrs);
+
+	return myself;
+}
+
+/*
+ * verify that a template's init method accepts a proposed option list
+ */
+static void
+verify_dictoptions(Oid tmplId, List *dictoptions)
+{
+	HeapTuple	tup;
+	Form_pg_ts_template tform;
+	Oid			initmethod;
+
+	/*
+	 * Suppress this test when running in a standalone backend.  This is a
+	 * hack to allow initdb to create prefab dictionaries that might not
+	 * actually be usable in template1's encoding (due to using external files
+	 * that can't be translated into template1's encoding).  We want to create
+	 * them anyway, since they might be usable later in other databases.
+	 */
+	if (!IsUnderPostmaster)
+		return;
+
+	tup = SearchSysCache1(TSTEMPLATEOID, ObjectIdGetDatum(tmplId));
+	if (!HeapTupleIsValid(tup)) /* should not happen */
+		elog(ERROR, "cache lookup failed for text search template %u",
+			 tmplId);
+	tform = (Form_pg_ts_template) GETSTRUCT(tup);
+
+	initmethod = tform->tmplinit;
+
+	if (!OidIsValid(initmethod))
+	{
+		/* If there is no init method, disallow any options */
+		if (dictoptions)
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("text search template \"%s\" does not accept options",
+							NameStr(tform->tmplname))));
+	}
+	else
+	{
+		/*
+		 * Copy the options just in case init method thinks it can scribble on
+		 * them ...
+		 */
+		dictoptions = copyObject(dictoptions);
+
+		/*
+		 * Call the init method and see if it complains.  We don't worry about
+		 * it leaking memory, since our command will soon be over anyway.
+		 */
+		(void) OidFunctionCall1(initmethod, PointerGetDatum(dictoptions));
+	}
+
+	ReleaseSysCache(tup);
+}
+
+/*
+ * CREATE TEXT SEARCH DICTIONARY
+ */
+ObjectAddress
+DefineTSDictionary(List *names, List *parameters)
+{
+	ListCell   *pl;
+	Relation	dictRel;
+	HeapTuple	tup;
+	Datum		values[Natts_pg_ts_dict];
+	bool		nulls[Natts_pg_ts_dict];
+	NameData	dname;
+	Oid			templId = InvalidOid;
+	List	   *dictoptions = NIL;
+	Oid			dictOid;
+	Oid			namespaceoid;
+	AclResult	aclresult;
+	char	   *dictname;
+	ObjectAddress address;
+
+	/* Convert list of names to a name and namespace */
+	namespaceoid = QualifiedNameGetCreationNamespace(names, &dictname);
+
+	/* Check we have creation rights in target namespace */
+	aclresult = pg_namespace_aclcheck(namespaceoid, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_SCHEMA,
+					   get_namespace_name(namespaceoid));
+
+	/*
+	 * loop over the definition list and extract the information we need.
+	 */
+	foreach(pl, parameters)
+	{
+		DefElem    *defel = (DefElem *) lfirst(pl);
+
+		if (strcmp(defel->defname, "template") == 0)
+		{
+			templId = get_ts_template_oid(defGetQualifiedName(defel), false);
+		}
+		else
+		{
+			/* Assume it's an option for the dictionary itself */
+			dictoptions = lappend(dictoptions, defel);
+		}
+	}
+
+	/*
+	 * Validation
+	 */
+	if (!OidIsValid(templId))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("text search template is required")));
+
+	verify_dictoptions(templId, dictoptions);
+
+
+	dictRel = table_open(TSDictionaryRelationId, RowExclusiveLock);
+
+	/*
+	 * Looks good, insert
+	 */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	dictOid = GetNewOidWithIndex(dictRel, TSDictionaryOidIndexId,
+								 Anum_pg_ts_dict_oid);
+	values[Anum_pg_ts_dict_oid - 1] = ObjectIdGetDatum(dictOid);
+	namestrcpy(&dname, dictname);
+	values[Anum_pg_ts_dict_dictname - 1] = NameGetDatum(&dname);
+	values[Anum_pg_ts_dict_dictnamespace - 1] = ObjectIdGetDatum(namespaceoid);
+	values[Anum_pg_ts_dict_dictowner - 1] = ObjectIdGetDatum(GetUserId());
+	values[Anum_pg_ts_dict_dicttemplate - 1] = ObjectIdGetDatum(templId);
+	if (dictoptions)
+		values[Anum_pg_ts_dict_dictinitoption - 1] =
+			PointerGetDatum(serialize_deflist(dictoptions));
+	else
+		nulls[Anum_pg_ts_dict_dictinitoption - 1] = true;
+
+	tup = heap_form_tuple(dictRel->rd_att, values, nulls);
+
+	CatalogTupleInsert(dictRel, tup);
+
+	address = makeDictionaryDependencies(tup);
+
+	/* Post creation hook for new text search dictionary */
+	InvokeObjectPostCreateHook(TSDictionaryRelationId, dictOid, 0);
+
+	heap_freetuple(tup);
+
+	table_close(dictRel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * ALTER TEXT SEARCH DICTIONARY
+ */
+ObjectAddress
+AlterTSDictionary(AlterTSDictionaryStmt *stmt)
+{
+	HeapTuple	tup,
+				newtup;
+	Relation	rel;
+	Oid			dictId;
+	ListCell   *pl;
+	List	   *dictoptions;
+	Datum		opt;
+	bool		isnull;
+	Datum		repl_val[Natts_pg_ts_dict];
+	bool		repl_null[Natts_pg_ts_dict];
+	bool		repl_repl[Natts_pg_ts_dict];
+	ObjectAddress address;
+
+	dictId = get_ts_dict_oid(stmt->dictname, false);
+
+	rel = table_open(TSDictionaryRelationId, RowExclusiveLock);
+
+	tup = SearchSysCache1(TSDICTOID, ObjectIdGetDatum(dictId));
+
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for text search dictionary %u",
+			 dictId);
+
+	/* must be owner */
+	if (!pg_ts_dict_ownercheck(dictId, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TSDICTIONARY,
+					   NameListToString(stmt->dictname));
+
+	/* deserialize the existing set of options */
+	opt = SysCacheGetAttr(TSDICTOID, tup,
+						  Anum_pg_ts_dict_dictinitoption,
+						  &isnull);
+	if (isnull)
+		dictoptions = NIL;
+	else
+		dictoptions = deserialize_deflist(opt);
+
+	/*
+	 * Modify the options list as per specified changes
+	 */
+	foreach(pl, stmt->options)
+	{
+		DefElem    *defel = (DefElem *) lfirst(pl);
+		ListCell   *cell;
+
+		/*
+		 * Remove any matches ...
+		 */
+		foreach(cell, dictoptions)
+		{
+			DefElem    *oldel = (DefElem *) lfirst(cell);
+
+			if (strcmp(oldel->defname, defel->defname) == 0)
+				dictoptions = foreach_delete_current(dictoptions, cell);
+		}
+
+		/*
+		 * and add new value if it's got one
+		 */
+		if (defel->arg)
+			dictoptions = lappend(dictoptions, defel);
+	}
+
+	/*
+	 * Validate
+	 */
+	verify_dictoptions(((Form_pg_ts_dict) GETSTRUCT(tup))->dicttemplate,
+					   dictoptions);
+
+	/*
+	 * Looks good, update
+	 */
+	memset(repl_val, 0, sizeof(repl_val));
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+
+	if (dictoptions)
+		repl_val[Anum_pg_ts_dict_dictinitoption - 1] =
+			PointerGetDatum(serialize_deflist(dictoptions));
+	else
+		repl_null[Anum_pg_ts_dict_dictinitoption - 1] = true;
+	repl_repl[Anum_pg_ts_dict_dictinitoption - 1] = true;
+
+	newtup = heap_modify_tuple(tup, RelationGetDescr(rel),
+							   repl_val, repl_null, repl_repl);
+
+	CatalogTupleUpdate(rel, &newtup->t_self, newtup);
+
+	InvokeObjectPostAlterHook(TSDictionaryRelationId, dictId, 0);
+
+	ObjectAddressSet(address, TSDictionaryRelationId, dictId);
+
+	/*
+	 * NOTE: because we only support altering the options, not the template,
+	 * there is no need to update dependencies.  This might have to change if
+	 * the options ever reference inside-the-database objects.
+	 */
+
+	heap_freetuple(newtup);
+	ReleaseSysCache(tup);
+
+	table_close(rel, RowExclusiveLock);
+
+	return address;
+}
+
+/* ---------------------- TS Template commands -----------------------*/
+
+/*
+ * lookup a template support function and return its OID (as a Datum)
+ *
+ * attnum is the pg_ts_template column the function will go into
+ */
+static Datum
+get_ts_template_func(DefElem *defel, int attnum)
+{
+	List	   *funcName = defGetQualifiedName(defel);
+	Oid			typeId[4];
+	Oid			retTypeId;
+	int			nargs;
+	Oid			procOid;
+
+	retTypeId = INTERNALOID;
+	typeId[0] = INTERNALOID;
+	typeId[1] = INTERNALOID;
+	typeId[2] = INTERNALOID;
+	typeId[3] = INTERNALOID;
+	switch (attnum)
+	{
+		case Anum_pg_ts_template_tmplinit:
+			nargs = 1;
+			break;
+		case Anum_pg_ts_template_tmpllexize:
+			nargs = 4;
+			break;
+		default:
+			/* should not be here */
+			elog(ERROR, "unrecognized attribute for text search template: %d",
+				 attnum);
+			nargs = 0;			/* keep compiler quiet */
+	}
+
+	procOid = LookupFuncName(funcName, nargs, typeId, false);
+	if (get_func_rettype(procOid) != retTypeId)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("function %s should return type %s",
+						func_signature_string(funcName, nargs, NIL, typeId),
+						format_type_be(retTypeId))));
+
+	return ObjectIdGetDatum(procOid);
+}
+
+/*
+ * make pg_depend entries for a new pg_ts_template entry
+ */
+static ObjectAddress
+makeTSTemplateDependencies(HeapTuple tuple)
+{
+	Form_pg_ts_template tmpl = (Form_pg_ts_template) GETSTRUCT(tuple);
+	ObjectAddress myself,
+				referenced;
+	ObjectAddresses *addrs;
+
+	ObjectAddressSet(myself, TSTemplateRelationId, tmpl->oid);
+
+	/* dependency on extension */
+	recordDependencyOnCurrentExtension(&myself, false);
+
+	addrs = new_object_addresses();
+
+	/* dependency on namespace */
+	ObjectAddressSet(referenced, NamespaceRelationId, tmpl->tmplnamespace);
+	add_exact_object_address(&referenced, addrs);
+
+	/* dependencies on functions */
+	ObjectAddressSet(referenced, ProcedureRelationId, tmpl->tmpllexize);
+	add_exact_object_address(&referenced, addrs);
+
+	if (OidIsValid(tmpl->tmplinit))
+	{
+		referenced.objectId = tmpl->tmplinit;
+		add_exact_object_address(&referenced, addrs);
+	}
+
+	record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
+	free_object_addresses(addrs);
+
+	return myself;
+}
+
+/*
+ * CREATE TEXT SEARCH TEMPLATE
+ */
+ObjectAddress
+DefineTSTemplate(List *names, List *parameters)
+{
+	ListCell   *pl;
+	Relation	tmplRel;
+	HeapTuple	tup;
+	Datum		values[Natts_pg_ts_template];
+	bool		nulls[Natts_pg_ts_template];
+	NameData	dname;
+	int			i;
+	Oid			tmplOid;
+	Oid			namespaceoid;
+	char	   *tmplname;
+	ObjectAddress address;
+
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be superuser to create text search templates")));
+
+	/* Convert list of names to a name and namespace */
+	namespaceoid = QualifiedNameGetCreationNamespace(names, &tmplname);
+
+	tmplRel = table_open(TSTemplateRelationId, RowExclusiveLock);
+
+	for (i = 0; i < Natts_pg_ts_template; i++)
+	{
+		nulls[i] = false;
+		values[i] = ObjectIdGetDatum(InvalidOid);
+	}
+
+	tmplOid = GetNewOidWithIndex(tmplRel, TSTemplateOidIndexId,
+								 Anum_pg_ts_dict_oid);
+	values[Anum_pg_ts_template_oid - 1] = ObjectIdGetDatum(tmplOid);
+	namestrcpy(&dname, tmplname);
+	values[Anum_pg_ts_template_tmplname - 1] = NameGetDatum(&dname);
+	values[Anum_pg_ts_template_tmplnamespace - 1] = ObjectIdGetDatum(namespaceoid);
+
+	/*
+	 * loop over the definition list and extract the information we need.
+	 */
+	foreach(pl, parameters)
+	{
+		DefElem    *defel = (DefElem *) lfirst(pl);
+
+		if (strcmp(defel->defname, "init") == 0)
+		{
+			values[Anum_pg_ts_template_tmplinit - 1] =
+				get_ts_template_func(defel, Anum_pg_ts_template_tmplinit);
+			nulls[Anum_pg_ts_template_tmplinit - 1] = false;
+		}
+		else if (strcmp(defel->defname, "lexize") == 0)
+		{
+			values[Anum_pg_ts_template_tmpllexize - 1] =
+				get_ts_template_func(defel, Anum_pg_ts_template_tmpllexize);
+			nulls[Anum_pg_ts_template_tmpllexize - 1] = false;
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("text search template parameter \"%s\" not recognized",
+							defel->defname)));
+	}
+
+	/*
+	 * Validation
+	 */
+	if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_template_tmpllexize - 1])))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("text search template lexize method is required")));
+
+	/*
+	 * Looks good, insert
+	 */
+	tup = heap_form_tuple(tmplRel->rd_att, values, nulls);
+
+	CatalogTupleInsert(tmplRel, tup);
+
+	address = makeTSTemplateDependencies(tup);
+
+	/* Post creation hook for new text search template */
+	InvokeObjectPostCreateHook(TSTemplateRelationId, tmplOid, 0);
+
+	heap_freetuple(tup);
+
+	table_close(tmplRel, RowExclusiveLock);
+
+	return address;
+}
+
+/* ---------------------- TS Configuration commands -----------------------*/
+
+/*
+ * Finds syscache tuple of configuration.
+ * Returns NULL if no such cfg.
+ */
+static HeapTuple
+GetTSConfigTuple(List *names)
+{
+	HeapTuple	tup;
+	Oid			cfgId;
+
+	cfgId = get_ts_config_oid(names, true);
+	if (!OidIsValid(cfgId))
+		return NULL;
+
+	tup = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(cfgId));
+
+	if (!HeapTupleIsValid(tup)) /* should not happen */
+		elog(ERROR, "cache lookup failed for text search configuration %u",
+			 cfgId);
+
+	return tup;
+}
+
+/*
+ * make pg_depend entries for a new or updated pg_ts_config entry
+ *
+ * Pass opened pg_ts_config_map relation if there might be any config map
+ * entries for the config.
+ */
+static ObjectAddress
+makeConfigurationDependencies(HeapTuple tuple, bool removeOld,
+							  Relation mapRel)
+{
+	Form_pg_ts_config cfg = (Form_pg_ts_config) GETSTRUCT(tuple);
+	ObjectAddresses *addrs;
+	ObjectAddress myself,
+				referenced;
+
+	myself.classId = TSConfigRelationId;
+	myself.objectId = cfg->oid;
+	myself.objectSubId = 0;
+
+	/* for ALTER case, first flush old dependencies, except extension deps */
+	if (removeOld)
+	{
+		deleteDependencyRecordsFor(myself.classId, myself.objectId, true);
+		deleteSharedDependencyRecordsFor(myself.classId, myself.objectId, 0);
+	}
+
+	/*
+	 * We use an ObjectAddresses list to remove possible duplicate
+	 * dependencies from the config map info.  The pg_ts_config items
+	 * shouldn't be duplicates, but might as well fold them all into one call.
+	 */
+	addrs = new_object_addresses();
+
+	/* dependency on namespace */
+	referenced.classId = NamespaceRelationId;
+	referenced.objectId = cfg->cfgnamespace;
+	referenced.objectSubId = 0;
+	add_exact_object_address(&referenced, addrs);
+
+	/* dependency on owner */
+	recordDependencyOnOwner(myself.classId, myself.objectId, cfg->cfgowner);
+
+	/* dependency on extension */
+	recordDependencyOnCurrentExtension(&myself, removeOld);
+
+	/* dependency on parser */
+	referenced.classId = TSParserRelationId;
+	referenced.objectId = cfg->cfgparser;
+	referenced.objectSubId = 0;
+	add_exact_object_address(&referenced, addrs);
+
+	/* dependencies on dictionaries listed in config map */
+	if (mapRel)
+	{
+		ScanKeyData skey;
+		SysScanDesc scan;
+		HeapTuple	maptup;
+
+		/* CCI to ensure we can see effects of caller's changes */
+		CommandCounterIncrement();
+
+		ScanKeyInit(&skey,
+					Anum_pg_ts_config_map_mapcfg,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(myself.objectId));
+
+		scan = systable_beginscan(mapRel, TSConfigMapIndexId, true,
+								  NULL, 1, &skey);
+
+		while (HeapTupleIsValid((maptup = systable_getnext(scan))))
+		{
+			Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup);
+
+			referenced.classId = TSDictionaryRelationId;
+			referenced.objectId = cfgmap->mapdict;
+			referenced.objectSubId = 0;
+			add_exact_object_address(&referenced, addrs);
+		}
+
+		systable_endscan(scan);
+	}
+
+	/* Record 'em (this includes duplicate elimination) */
+	record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
+
+	free_object_addresses(addrs);
+
+	return myself;
+}
+
+/*
+ * CREATE TEXT SEARCH CONFIGURATION
+ */
+ObjectAddress
+DefineTSConfiguration(List *names, List *parameters, ObjectAddress *copied)
+{
+	Relation	cfgRel;
+	Relation	mapRel = NULL;
+	HeapTuple	tup;
+	Datum		values[Natts_pg_ts_config];
+	bool		nulls[Natts_pg_ts_config];
+	AclResult	aclresult;
+	Oid			namespaceoid;
+	char	   *cfgname;
+	NameData	cname;
+	Oid			sourceOid = InvalidOid;
+	Oid			prsOid = InvalidOid;
+	Oid			cfgOid;
+	ListCell   *pl;
+	ObjectAddress address;
+
+	/* Convert list of names to a name and namespace */
+	namespaceoid = QualifiedNameGetCreationNamespace(names, &cfgname);
+
+	/* Check we have creation rights in target namespace */
+	aclresult = pg_namespace_aclcheck(namespaceoid, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_SCHEMA,
+					   get_namespace_name(namespaceoid));
+
+	/*
+	 * loop over the definition list and extract the information we need.
+	 */
+	foreach(pl, parameters)
+	{
+		DefElem    *defel = (DefElem *) lfirst(pl);
+
+		if (strcmp(defel->defname, "parser") == 0)
+			prsOid = get_ts_parser_oid(defGetQualifiedName(defel), false);
+		else if (strcmp(defel->defname, "copy") == 0)
+			sourceOid = get_ts_config_oid(defGetQualifiedName(defel), false);
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("text search configuration parameter \"%s\" not recognized",
+							defel->defname)));
+	}
+
+	if (OidIsValid(sourceOid) && OidIsValid(prsOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("cannot specify both PARSER and COPY options")));
+
+	/* make copied tsconfig available to callers */
+	if (copied && OidIsValid(sourceOid))
+	{
+		ObjectAddressSet(*copied,
+						 TSConfigRelationId,
+						 sourceOid);
+	}
+
+	/*
+	 * Look up source config if given.
+	 */
+	if (OidIsValid(sourceOid))
+	{
+		Form_pg_ts_config cfg;
+
+		tup = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(sourceOid));
+		if (!HeapTupleIsValid(tup))
+			elog(ERROR, "cache lookup failed for text search configuration %u",
+				 sourceOid);
+
+		cfg = (Form_pg_ts_config) GETSTRUCT(tup);
+
+		/* use source's parser */
+		prsOid = cfg->cfgparser;
+
+		ReleaseSysCache(tup);
+	}
+
+	/*
+	 * Validation
+	 */
+	if (!OidIsValid(prsOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("text search parser is required")));
+
+	cfgRel = table_open(TSConfigRelationId, RowExclusiveLock);
+
+	/*
+	 * Looks good, build tuple and insert
+	 */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	cfgOid = GetNewOidWithIndex(cfgRel, TSConfigOidIndexId,
+								Anum_pg_ts_config_oid);
+	values[Anum_pg_ts_config_oid - 1] = ObjectIdGetDatum(cfgOid);
+	namestrcpy(&cname, cfgname);
+	values[Anum_pg_ts_config_cfgname - 1] = NameGetDatum(&cname);
+	values[Anum_pg_ts_config_cfgnamespace - 1] = ObjectIdGetDatum(namespaceoid);
+	values[Anum_pg_ts_config_cfgowner - 1] = ObjectIdGetDatum(GetUserId());
+	values[Anum_pg_ts_config_cfgparser - 1] = ObjectIdGetDatum(prsOid);
+
+	tup = heap_form_tuple(cfgRel->rd_att, values, nulls);
+
+	CatalogTupleInsert(cfgRel, tup);
+
+	if (OidIsValid(sourceOid))
+	{
+		/*
+		 * Copy token-dicts map from source config
+		 */
+		ScanKeyData skey;
+		SysScanDesc scan;
+		HeapTuple	maptup;
+
+		mapRel = table_open(TSConfigMapRelationId, RowExclusiveLock);
+
+		ScanKeyInit(&skey,
+					Anum_pg_ts_config_map_mapcfg,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(sourceOid));
+
+		scan = systable_beginscan(mapRel, TSConfigMapIndexId, true,
+								  NULL, 1, &skey);
+
+		while (HeapTupleIsValid((maptup = systable_getnext(scan))))
+		{
+			Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup);
+			HeapTuple	newmaptup;
+			Datum		mapvalues[Natts_pg_ts_config_map];
+			bool		mapnulls[Natts_pg_ts_config_map];
+
+			memset(mapvalues, 0, sizeof(mapvalues));
+			memset(mapnulls, false, sizeof(mapnulls));
+
+			mapvalues[Anum_pg_ts_config_map_mapcfg - 1] = cfgOid;
+			mapvalues[Anum_pg_ts_config_map_maptokentype - 1] = cfgmap->maptokentype;
+			mapvalues[Anum_pg_ts_config_map_mapseqno - 1] = cfgmap->mapseqno;
+			mapvalues[Anum_pg_ts_config_map_mapdict - 1] = cfgmap->mapdict;
+
+			newmaptup = heap_form_tuple(mapRel->rd_att, mapvalues, mapnulls);
+
+			CatalogTupleInsert(mapRel, newmaptup);
+
+			heap_freetuple(newmaptup);
+		}
+
+		systable_endscan(scan);
+	}
+
+	address = makeConfigurationDependencies(tup, false, mapRel);
+
+	/* Post creation hook for new text search configuration */
+	InvokeObjectPostCreateHook(TSConfigRelationId, cfgOid, 0);
+
+	heap_freetuple(tup);
+
+	if (mapRel)
+		table_close(mapRel, RowExclusiveLock);
+	table_close(cfgRel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * Guts of TS configuration deletion.
+ */
+void
+RemoveTSConfigurationById(Oid cfgId)
+{
+	Relation	relCfg,
+				relMap;
+	HeapTuple	tup;
+	ScanKeyData skey;
+	SysScanDesc scan;
+
+	/* Remove the pg_ts_config entry */
+	relCfg = table_open(TSConfigRelationId, RowExclusiveLock);
+
+	tup = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(cfgId));
+
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for text search dictionary %u",
+			 cfgId);
+
+	CatalogTupleDelete(relCfg, &tup->t_self);
+
+	ReleaseSysCache(tup);
+
+	table_close(relCfg, RowExclusiveLock);
+
+	/* Remove any pg_ts_config_map entries */
+	relMap = table_open(TSConfigMapRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&skey,
+				Anum_pg_ts_config_map_mapcfg,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(cfgId));
+
+	scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
+							  NULL, 1, &skey);
+
+	while (HeapTupleIsValid((tup = systable_getnext(scan))))
+	{
+		CatalogTupleDelete(relMap, &tup->t_self);
+	}
+
+	systable_endscan(scan);
+
+	table_close(relMap, RowExclusiveLock);
+}
+
+/*
+ * ALTER TEXT SEARCH CONFIGURATION - main entry point
+ */
+ObjectAddress
+AlterTSConfiguration(AlterTSConfigurationStmt *stmt)
+{
+	HeapTuple	tup;
+	Oid			cfgId;
+	Relation	relMap;
+	ObjectAddress address;
+
+	/* Find the configuration */
+	tup = GetTSConfigTuple(stmt->cfgname);
+	if (!HeapTupleIsValid(tup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("text search configuration \"%s\" does not exist",
+						NameListToString(stmt->cfgname))));
+
+	cfgId = ((Form_pg_ts_config) GETSTRUCT(tup))->oid;
+
+	/* must be owner */
+	if (!pg_ts_config_ownercheck(cfgId, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TSCONFIGURATION,
+					   NameListToString(stmt->cfgname));
+
+	relMap = table_open(TSConfigMapRelationId, RowExclusiveLock);
+
+	/* Add or drop mappings */
+	if (stmt->dicts)
+		MakeConfigurationMapping(stmt, tup, relMap);
+	else if (stmt->tokentype)
+		DropConfigurationMapping(stmt, tup, relMap);
+
+	/* Update dependencies */
+	makeConfigurationDependencies(tup, true, relMap);
+
+	InvokeObjectPostAlterHook(TSConfigRelationId, cfgId, 0);
+
+	ObjectAddressSet(address, TSConfigRelationId, cfgId);
+
+	table_close(relMap, RowExclusiveLock);
+
+	ReleaseSysCache(tup);
+
+	return address;
+}
+
+/*
+ * Translate a list of token type names to an array of token type numbers
+ */
+static int *
+getTokenTypes(Oid prsId, List *tokennames)
+{
+	TSParserCacheEntry *prs = lookup_ts_parser_cache(prsId);
+	LexDescr   *list;
+	int		   *res,
+				i,
+				ntoken;
+	ListCell   *tn;
+
+	ntoken = list_length(tokennames);
+	if (ntoken == 0)
+		return NULL;
+	res = (int *) palloc(sizeof(int) * ntoken);
+
+	if (!OidIsValid(prs->lextypeOid))
+		elog(ERROR, "method lextype isn't defined for text search parser %u",
+			 prsId);
+
+	/* lextype takes one dummy argument */
+	list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
+														 (Datum) 0));
+
+	i = 0;
+	foreach(tn, tokennames)
+	{
+		String	   *val = lfirst_node(String, tn);
+		bool		found = false;
+		int			j;
+
+		j = 0;
+		while (list && list[j].lexid)
+		{
+			if (strcmp(strVal(val), list[j].alias) == 0)
+			{
+				res[i] = list[j].lexid;
+				found = true;
+				break;
+			}
+			j++;
+		}
+		if (!found)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("token type \"%s\" does not exist",
+							strVal(val))));
+		i++;
+	}
+
+	return res;
+}
+
+/*
+ * ALTER TEXT SEARCH CONFIGURATION ADD/ALTER MAPPING
+ */
+static void
+MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
+						 HeapTuple tup, Relation relMap)
+{
+	Form_pg_ts_config tsform;
+	Oid			cfgId;
+	ScanKeyData skey[2];
+	SysScanDesc scan;
+	HeapTuple	maptup;
+	int			i;
+	int			j;
+	Oid			prsId;
+	int		   *tokens,
+				ntoken;
+	Oid		   *dictIds;
+	int			ndict;
+	ListCell   *c;
+
+	tsform = (Form_pg_ts_config) GETSTRUCT(tup);
+	cfgId = tsform->oid;
+	prsId = tsform->cfgparser;
+
+	tokens = getTokenTypes(prsId, stmt->tokentype);
+	ntoken = list_length(stmt->tokentype);
+
+	if (stmt->override)
+	{
+		/*
+		 * delete maps for tokens if they exist and command was ALTER
+		 */
+		for (i = 0; i < ntoken; i++)
+		{
+			ScanKeyInit(&skey[0],
+						Anum_pg_ts_config_map_mapcfg,
+						BTEqualStrategyNumber, F_OIDEQ,
+						ObjectIdGetDatum(cfgId));
+			ScanKeyInit(&skey[1],
+						Anum_pg_ts_config_map_maptokentype,
+						BTEqualStrategyNumber, F_INT4EQ,
+						Int32GetDatum(tokens[i]));
+
+			scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
+									  NULL, 2, skey);
+
+			while (HeapTupleIsValid((maptup = systable_getnext(scan))))
+			{
+				CatalogTupleDelete(relMap, &maptup->t_self);
+			}
+
+			systable_endscan(scan);
+		}
+	}
+
+	/*
+	 * Convert list of dictionary names to array of dict OIDs
+	 */
+	ndict = list_length(stmt->dicts);
+	dictIds = (Oid *) palloc(sizeof(Oid) * ndict);
+	i = 0;
+	foreach(c, stmt->dicts)
+	{
+		List	   *names = (List *) lfirst(c);
+
+		dictIds[i] = get_ts_dict_oid(names, false);
+		i++;
+	}
+
+	if (stmt->replace)
+	{
+		/*
+		 * Replace a specific dictionary in existing entries
+		 */
+		Oid			dictOld = dictIds[0],
+					dictNew = dictIds[1];
+
+		ScanKeyInit(&skey[0],
+					Anum_pg_ts_config_map_mapcfg,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(cfgId));
+
+		scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
+								  NULL, 1, skey);
+
+		while (HeapTupleIsValid((maptup = systable_getnext(scan))))
+		{
+			Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup);
+
+			/*
+			 * check if it's one of target token types
+			 */
+			if (tokens)
+			{
+				bool		tokmatch = false;
+
+				for (j = 0; j < ntoken; j++)
+				{
+					if (cfgmap->maptokentype == tokens[j])
+					{
+						tokmatch = true;
+						break;
+					}
+				}
+				if (!tokmatch)
+					continue;
+			}
+
+			/*
+			 * replace dictionary if match
+			 */
+			if (cfgmap->mapdict == dictOld)
+			{
+				Datum		repl_val[Natts_pg_ts_config_map];
+				bool		repl_null[Natts_pg_ts_config_map];
+				bool		repl_repl[Natts_pg_ts_config_map];
+				HeapTuple	newtup;
+
+				memset(repl_val, 0, sizeof(repl_val));
+				memset(repl_null, false, sizeof(repl_null));
+				memset(repl_repl, false, sizeof(repl_repl));
+
+				repl_val[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(dictNew);
+				repl_repl[Anum_pg_ts_config_map_mapdict - 1] = true;
+
+				newtup = heap_modify_tuple(maptup,
+										   RelationGetDescr(relMap),
+										   repl_val, repl_null, repl_repl);
+				CatalogTupleUpdate(relMap, &newtup->t_self, newtup);
+			}
+		}
+
+		systable_endscan(scan);
+	}
+	else
+	{
+		/*
+		 * Insertion of new entries
+		 */
+		for (i = 0; i < ntoken; i++)
+		{
+			for (j = 0; j < ndict; j++)
+			{
+				Datum		values[Natts_pg_ts_config_map];
+				bool		nulls[Natts_pg_ts_config_map];
+
+				memset(nulls, false, sizeof(nulls));
+				values[Anum_pg_ts_config_map_mapcfg - 1] = ObjectIdGetDatum(cfgId);
+				values[Anum_pg_ts_config_map_maptokentype - 1] = Int32GetDatum(tokens[i]);
+				values[Anum_pg_ts_config_map_mapseqno - 1] = Int32GetDatum(j + 1);
+				values[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(dictIds[j]);
+
+				tup = heap_form_tuple(relMap->rd_att, values, nulls);
+				CatalogTupleInsert(relMap, tup);
+
+				heap_freetuple(tup);
+			}
+		}
+	}
+
+	EventTriggerCollectAlterTSConfig(stmt, cfgId, dictIds, ndict);
+}
+
+/*
+ * ALTER TEXT SEARCH CONFIGURATION DROP MAPPING
+ */
+static void
+DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
+						 HeapTuple tup, Relation relMap)
+{
+	Form_pg_ts_config tsform;
+	Oid			cfgId;
+	ScanKeyData skey[2];
+	SysScanDesc scan;
+	HeapTuple	maptup;
+	int			i;
+	Oid			prsId;
+	int		   *tokens;
+	ListCell   *c;
+
+	tsform = (Form_pg_ts_config) GETSTRUCT(tup);
+	cfgId = tsform->oid;
+	prsId = tsform->cfgparser;
+
+	tokens = getTokenTypes(prsId, stmt->tokentype);
+
+	i = 0;
+	foreach(c, stmt->tokentype)
+	{
+		String	   *val = lfirst_node(String, c);
+		bool		found = false;
+
+		ScanKeyInit(&skey[0],
+					Anum_pg_ts_config_map_mapcfg,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(cfgId));
+		ScanKeyInit(&skey[1],
+					Anum_pg_ts_config_map_maptokentype,
+					BTEqualStrategyNumber, F_INT4EQ,
+					Int32GetDatum(tokens[i]));
+
+		scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
+								  NULL, 2, skey);
+
+		while (HeapTupleIsValid((maptup = systable_getnext(scan))))
+		{
+			CatalogTupleDelete(relMap, &maptup->t_self);
+			found = true;
+		}
+
+		systable_endscan(scan);
+
+		if (!found)
+		{
+			if (!stmt->missing_ok)
+			{
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_OBJECT),
+						 errmsg("mapping for token type \"%s\" does not exist",
+								strVal(val))));
+			}
+			else
+			{
+				ereport(NOTICE,
+						(errmsg("mapping for token type \"%s\" does not exist, skipping",
+								strVal(val))));
+			}
+		}
+
+		i++;
+	}
+
+	EventTriggerCollectAlterTSConfig(stmt, cfgId, NULL, 0);
+}
+
+
+/*
+ * Serialize dictionary options, producing a TEXT datum from a List of DefElem
+ *
+ * This is used to form the value stored in pg_ts_dict.dictinitoption.
+ * For the convenience of pg_dump, the output is formatted exactly as it
+ * would need to appear in CREATE TEXT SEARCH DICTIONARY to reproduce the
+ * same options.
+ */
+text *
+serialize_deflist(List *deflist)
+{
+	text	   *result;
+	StringInfoData buf;
+	ListCell   *l;
+
+	initStringInfo(&buf);
+
+	foreach(l, deflist)
+	{
+		DefElem    *defel = (DefElem *) lfirst(l);
+		char	   *val = defGetString(defel);
+
+		appendStringInfo(&buf, "%s = ",
+						 quote_identifier(defel->defname));
+
+		/*
+		 * If the value is a T_Integer or T_Float, emit it without quotes,
+		 * otherwise with quotes.  This is essential to allow correct
+		 * reconstruction of the node type as well as the value.
+		 */
+		if (IsA(defel->arg, Integer) || IsA(defel->arg, Float))
+			appendStringInfoString(&buf, val);
+		else
+		{
+			/* If backslashes appear, force E syntax to quote them safely */
+			if (strchr(val, '\\'))
+				appendStringInfoChar(&buf, ESCAPE_STRING_SYNTAX);
+			appendStringInfoChar(&buf, '\'');
+			while (*val)
+			{
+				char		ch = *val++;
+
+				if (SQL_STR_DOUBLE(ch, true))
+					appendStringInfoChar(&buf, ch);
+				appendStringInfoChar(&buf, ch);
+			}
+			appendStringInfoChar(&buf, '\'');
+		}
+		if (lnext(deflist, l) != NULL)
+			appendStringInfoString(&buf, ", ");
+	}
+
+	result = cstring_to_text_with_len(buf.data, buf.len);
+	pfree(buf.data);
+	return result;
+}
+
+/*
+ * Deserialize dictionary options, reconstructing a List of DefElem from TEXT
+ *
+ * This is also used for prsheadline options, so for backward compatibility
+ * we need to accept a few things serialize_deflist() will never emit:
+ * in particular, unquoted and double-quoted strings.
+ */
+List *
+deserialize_deflist(Datum txt)
+{
+	text	   *in = DatumGetTextPP(txt);	/* in case it's toasted */
+	List	   *result = NIL;
+	int			len = VARSIZE_ANY_EXHDR(in);
+	char	   *ptr,
+			   *endptr,
+			   *workspace,
+			   *wsptr = NULL,
+			   *startvalue = NULL;
+	typedef enum
+	{
+		CS_WAITKEY,
+		CS_INKEY,
+		CS_INQKEY,
+		CS_WAITEQ,
+		CS_WAITVALUE,
+		CS_INSQVALUE,
+		CS_INDQVALUE,
+		CS_INWVALUE
+	} ds_state;
+	ds_state	state = CS_WAITKEY;
+
+	workspace = (char *) palloc(len + 1);	/* certainly enough room */
+	ptr = VARDATA_ANY(in);
+	endptr = ptr + len;
+	for (; ptr < endptr; ptr++)
+	{
+		switch (state)
+		{
+			case CS_WAITKEY:
+				if (isspace((unsigned char) *ptr) || *ptr == ',')
+					continue;
+				if (*ptr == '"')
+				{
+					wsptr = workspace;
+					state = CS_INQKEY;
+				}
+				else
+				{
+					wsptr = workspace;
+					*wsptr++ = *ptr;
+					state = CS_INKEY;
+				}
+				break;
+			case CS_INKEY:
+				if (isspace((unsigned char) *ptr))
+				{
+					*wsptr++ = '\0';
+					state = CS_WAITEQ;
+				}
+				else if (*ptr == '=')
+				{
+					*wsptr++ = '\0';
+					state = CS_WAITVALUE;
+				}
+				else
+				{
+					*wsptr++ = *ptr;
+				}
+				break;
+			case CS_INQKEY:
+				if (*ptr == '"')
+				{
+					if (ptr + 1 < endptr && ptr[1] == '"')
+					{
+						/* copy only one of the two quotes */
+						*wsptr++ = *ptr++;
+					}
+					else
+					{
+						*wsptr++ = '\0';
+						state = CS_WAITEQ;
+					}
+				}
+				else
+				{
+					*wsptr++ = *ptr;
+				}
+				break;
+			case CS_WAITEQ:
+				if (*ptr == '=')
+					state = CS_WAITVALUE;
+				else if (!isspace((unsigned char) *ptr))
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("invalid parameter list format: \"%s\"",
+									text_to_cstring(in))));
+				break;
+			case CS_WAITVALUE:
+				if (*ptr == '\'')
+				{
+					startvalue = wsptr;
+					state = CS_INSQVALUE;
+				}
+				else if (*ptr == 'E' && ptr + 1 < endptr && ptr[1] == '\'')
+				{
+					ptr++;
+					startvalue = wsptr;
+					state = CS_INSQVALUE;
+				}
+				else if (*ptr == '"')
+				{
+					startvalue = wsptr;
+					state = CS_INDQVALUE;
+				}
+				else if (!isspace((unsigned char) *ptr))
+				{
+					startvalue = wsptr;
+					*wsptr++ = *ptr;
+					state = CS_INWVALUE;
+				}
+				break;
+			case CS_INSQVALUE:
+				if (*ptr == '\'')
+				{
+					if (ptr + 1 < endptr && ptr[1] == '\'')
+					{
+						/* copy only one of the two quotes */
+						*wsptr++ = *ptr++;
+					}
+					else
+					{
+						*wsptr++ = '\0';
+						result = lappend(result,
+										 buildDefItem(workspace,
+													  startvalue,
+													  true));
+						state = CS_WAITKEY;
+					}
+				}
+				else if (*ptr == '\\')
+				{
+					if (ptr + 1 < endptr && ptr[1] == '\\')
+					{
+						/* copy only one of the two backslashes */
+						*wsptr++ = *ptr++;
+					}
+					else
+						*wsptr++ = *ptr;
+				}
+				else
+				{
+					*wsptr++ = *ptr;
+				}
+				break;
+			case CS_INDQVALUE:
+				if (*ptr == '"')
+				{
+					if (ptr + 1 < endptr && ptr[1] == '"')
+					{
+						/* copy only one of the two quotes */
+						*wsptr++ = *ptr++;
+					}
+					else
+					{
+						*wsptr++ = '\0';
+						result = lappend(result,
+										 buildDefItem(workspace,
+													  startvalue,
+													  true));
+						state = CS_WAITKEY;
+					}
+				}
+				else
+				{
+					*wsptr++ = *ptr;
+				}
+				break;
+			case CS_INWVALUE:
+				if (*ptr == ',' || isspace((unsigned char) *ptr))
+				{
+					*wsptr++ = '\0';
+					result = lappend(result,
+									 buildDefItem(workspace,
+												  startvalue,
+												  false));
+					state = CS_WAITKEY;
+				}
+				else
+				{
+					*wsptr++ = *ptr;
+				}
+				break;
+			default:
+				elog(ERROR, "unrecognized deserialize_deflist state: %d",
+					 state);
+		}
+	}
+
+	if (state == CS_INWVALUE)
+	{
+		*wsptr++ = '\0';
+		result = lappend(result,
+						 buildDefItem(workspace,
+									  startvalue,
+									  false));
+	}
+	else if (state != CS_WAITKEY)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("invalid parameter list format: \"%s\"",
+						text_to_cstring(in))));
+
+	pfree(workspace);
+
+	return result;
+}
+
+/*
+ * Build one DefElem for deserialize_deflist
+ */
+static DefElem *
+buildDefItem(const char *name, const char *val, bool was_quoted)
+{
+	/* If input was quoted, always emit as string */
+	if (!was_quoted && val[0] != '\0')
+	{
+		int			v;
+		char	   *endptr;
+
+		/* Try to parse as an integer */
+		errno = 0;
+		v = strtoint(val, &endptr, 10);
+		if (errno == 0 && *endptr == '\0')
+			return makeDefElem(pstrdup(name),
+							   (Node *) makeInteger(v),
+							   -1);
+		/* Nope, how about as a float? */
+		errno = 0;
+		(void) strtod(val, &endptr);
+		if (errno == 0 && *endptr == '\0')
+			return makeDefElem(pstrdup(name),
+							   (Node *) makeFloat(pstrdup(val)),
+							   -1);
+
+		if (strcmp(val, "true") == 0)
+			return makeDefElem(pstrdup(name),
+							   (Node *) makeBoolean(true),
+							   -1);
+		if (strcmp(val, "false") == 0)
+			return makeDefElem(pstrdup(name),
+							   (Node *) makeBoolean(false),
+							   -1);
+	}
+	/* Just make it a string */
+	return makeDefElem(pstrdup(name),
+					   (Node *) makeString(pstrdup(val)),
+					   -1);
+}
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
new file mode 100644
index 0000000..9b92b04
--- /dev/null
+++ b/src/backend/commands/typecmds.c
@@ -0,0 +1,4495 @@
+/*-------------------------------------------------------------------------
+ *
+ * typecmds.c
+ *	  Routines for SQL commands that manipulate types (and domains).
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/typecmds.c
+ *
+ * DESCRIPTION
+ *	  The "DefineFoo" routines take the parse tree and pick out the
+ *	  appropriate arguments/flags, passing the results to the
+ *	  corresponding "FooDefine" routines (in src/catalog) that do
+ *	  the actual catalog-munging.  These routines also verify permission
+ *	  of the user to execute the command.
+ *
+ * NOTES
+ *	  These things must be defined and committed in the following order:
+ *		"create function":
+ *				input/output, recv/send functions
+ *		"create type":
+ *				type
+ *		"create operator":
+ *				operators
+ *
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "catalog/binary_upgrade.h"
+#include "catalog/catalog.h"
+#include "catalog/heap.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_cast.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_constraint.h"
+#include "catalog/pg_depend.h"
+#include "catalog/pg_enum.h"
+#include "catalog/pg_language.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_range.h"
+#include "catalog/pg_type.h"
+#include "commands/defrem.h"
+#include "commands/tablecmds.h"
+#include "commands/typecmds.h"
+#include "executor/executor.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "optimizer/optimizer.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_func.h"
+#include "parser/parse_type.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/ruleutils.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+
+
+/* result structure for get_rels_with_domain() */
+typedef struct
+{
+	Relation	rel;			/* opened and locked relation */
+	int			natts;			/* number of attributes of interest */
+	int		   *atts;			/* attribute numbers */
+	/* atts[] is of allocated length RelationGetNumberOfAttributes(rel) */
+} RelToCheck;
+
+/* parameter structure for AlterTypeRecurse() */
+typedef struct
+{
+	/* Flags indicating which type attributes to update */
+	bool		updateStorage;
+	bool		updateReceive;
+	bool		updateSend;
+	bool		updateTypmodin;
+	bool		updateTypmodout;
+	bool		updateAnalyze;
+	bool		updateSubscript;
+	/* New values for relevant attributes */
+	char		storage;
+	Oid			receiveOid;
+	Oid			sendOid;
+	Oid			typmodinOid;
+	Oid			typmodoutOid;
+	Oid			analyzeOid;
+	Oid			subscriptOid;
+} AlterTypeRecurseParams;
+
+/* Potentially set by pg_upgrade_support functions */
+Oid			binary_upgrade_next_array_pg_type_oid = InvalidOid;
+Oid			binary_upgrade_next_mrng_pg_type_oid = InvalidOid;
+Oid			binary_upgrade_next_mrng_array_pg_type_oid = InvalidOid;
+
+static void makeRangeConstructors(const char *name, Oid namespace,
+								  Oid rangeOid, Oid subtype);
+static void makeMultirangeConstructors(const char *name, Oid namespace,
+									   Oid multirangeOid, Oid rangeOid,
+									   Oid rangeArrayOid, Oid *castFuncOid);
+static Oid	findTypeInputFunction(List *procname, Oid typeOid);
+static Oid	findTypeOutputFunction(List *procname, Oid typeOid);
+static Oid	findTypeReceiveFunction(List *procname, Oid typeOid);
+static Oid	findTypeSendFunction(List *procname, Oid typeOid);
+static Oid	findTypeTypmodinFunction(List *procname);
+static Oid	findTypeTypmodoutFunction(List *procname);
+static Oid	findTypeAnalyzeFunction(List *procname, Oid typeOid);
+static Oid	findTypeSubscriptingFunction(List *procname, Oid typeOid);
+static Oid	findRangeSubOpclass(List *opcname, Oid subtype);
+static Oid	findRangeCanonicalFunction(List *procname, Oid typeOid);
+static Oid	findRangeSubtypeDiffFunction(List *procname, Oid subtype);
+static void validateDomainConstraint(Oid domainoid, char *ccbin);
+static List *get_rels_with_domain(Oid domainOid, LOCKMODE lockmode);
+static void checkEnumOwner(HeapTuple tup);
+static char *domainAddConstraint(Oid domainOid, Oid domainNamespace,
+								 Oid baseTypeOid,
+								 int typMod, Constraint *constr,
+								 const char *domainName, ObjectAddress *constrAddr);
+static Node *replace_domain_constraint_value(ParseState *pstate,
+											 ColumnRef *cref);
+static void AlterTypeRecurse(Oid typeOid, bool isImplicitArray,
+							 HeapTuple tup, Relation catalog,
+							 AlterTypeRecurseParams *atparams);
+
+
+/*
+ * DefineType
+ *		Registers a new base type.
+ */
+ObjectAddress
+DefineType(ParseState *pstate, List *names, List *parameters)
+{
+	char	   *typeName;
+	Oid			typeNamespace;
+	int16		internalLength = -1;	/* default: variable-length */
+	List	   *inputName = NIL;
+	List	   *outputName = NIL;
+	List	   *receiveName = NIL;
+	List	   *sendName = NIL;
+	List	   *typmodinName = NIL;
+	List	   *typmodoutName = NIL;
+	List	   *analyzeName = NIL;
+	List	   *subscriptName = NIL;
+	char		category = TYPCATEGORY_USER;
+	bool		preferred = false;
+	char		delimiter = DEFAULT_TYPDELIM;
+	Oid			elemType = InvalidOid;
+	char	   *defaultValue = NULL;
+	bool		byValue = false;
+	char		alignment = TYPALIGN_INT;	/* default alignment */
+	char		storage = TYPSTORAGE_PLAIN; /* default TOAST storage method */
+	Oid			collation = InvalidOid;
+	DefElem    *likeTypeEl = NULL;
+	DefElem    *internalLengthEl = NULL;
+	DefElem    *inputNameEl = NULL;
+	DefElem    *outputNameEl = NULL;
+	DefElem    *receiveNameEl = NULL;
+	DefElem    *sendNameEl = NULL;
+	DefElem    *typmodinNameEl = NULL;
+	DefElem    *typmodoutNameEl = NULL;
+	DefElem    *analyzeNameEl = NULL;
+	DefElem    *subscriptNameEl = NULL;
+	DefElem    *categoryEl = NULL;
+	DefElem    *preferredEl = NULL;
+	DefElem    *delimiterEl = NULL;
+	DefElem    *elemTypeEl = NULL;
+	DefElem    *defaultValueEl = NULL;
+	DefElem    *byValueEl = NULL;
+	DefElem    *alignmentEl = NULL;
+	DefElem    *storageEl = NULL;
+	DefElem    *collatableEl = NULL;
+	Oid			inputOid;
+	Oid			outputOid;
+	Oid			receiveOid = InvalidOid;
+	Oid			sendOid = InvalidOid;
+	Oid			typmodinOid = InvalidOid;
+	Oid			typmodoutOid = InvalidOid;
+	Oid			analyzeOid = InvalidOid;
+	Oid			subscriptOid = InvalidOid;
+	char	   *array_type;
+	Oid			array_oid;
+	Oid			typoid;
+	ListCell   *pl;
+	ObjectAddress address;
+
+	/*
+	 * As of Postgres 8.4, we require superuser privilege to create a base
+	 * type.  This is simple paranoia: there are too many ways to mess up the
+	 * system with an incorrect type definition (for instance, representation
+	 * parameters that don't match what the C code expects).  In practice it
+	 * takes superuser privilege to create the I/O functions, and so the
+	 * former requirement that you own the I/O functions pretty much forced
+	 * superuserness anyway.  We're just making doubly sure here.
+	 *
+	 * XXX re-enable NOT_USED code sections below if you remove this test.
+	 */
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be superuser to create a base type")));
+
+	/* Convert list of names to a name and namespace */
+	typeNamespace = QualifiedNameGetCreationNamespace(names, &typeName);
+
+#ifdef NOT_USED
+	/* XXX this is unnecessary given the superuser check above */
+	/* Check we have creation rights in target namespace */
+	aclresult = pg_namespace_aclcheck(typeNamespace, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_SCHEMA,
+					   get_namespace_name(typeNamespace));
+#endif
+
+	/*
+	 * Look to see if type already exists.
+	 */
+	typoid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid,
+							 CStringGetDatum(typeName),
+							 ObjectIdGetDatum(typeNamespace));
+
+	/*
+	 * If it's not a shell, see if it's an autogenerated array type, and if so
+	 * rename it out of the way.
+	 */
+	if (OidIsValid(typoid) && get_typisdefined(typoid))
+	{
+		if (moveArrayTypeName(typoid, typeName, typeNamespace))
+			typoid = InvalidOid;
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("type \"%s\" already exists", typeName)));
+	}
+
+	/*
+	 * If this command is a parameterless CREATE TYPE, then we're just here to
+	 * make a shell type, so do that (or fail if there already is a shell).
+	 */
+	if (parameters == NIL)
+	{
+		if (OidIsValid(typoid))
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("type \"%s\" already exists", typeName)));
+
+		address = TypeShellMake(typeName, typeNamespace, GetUserId());
+		return address;
+	}
+
+	/*
+	 * Otherwise, we must already have a shell type, since there is no other
+	 * way that the I/O functions could have been created.
+	 */
+	if (!OidIsValid(typoid))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("type \"%s\" does not exist", typeName),
+				 errhint("Create the type as a shell type, then create its I/O functions, then do a full CREATE TYPE.")));
+
+	/* Extract the parameters from the parameter list */
+	foreach(pl, parameters)
+	{
+		DefElem    *defel = (DefElem *) lfirst(pl);
+		DefElem   **defelp;
+
+		if (strcmp(defel->defname, "like") == 0)
+			defelp = &likeTypeEl;
+		else if (strcmp(defel->defname, "internallength") == 0)
+			defelp = &internalLengthEl;
+		else if (strcmp(defel->defname, "input") == 0)
+			defelp = &inputNameEl;
+		else if (strcmp(defel->defname, "output") == 0)
+			defelp = &outputNameEl;
+		else if (strcmp(defel->defname, "receive") == 0)
+			defelp = &receiveNameEl;
+		else if (strcmp(defel->defname, "send") == 0)
+			defelp = &sendNameEl;
+		else if (strcmp(defel->defname, "typmod_in") == 0)
+			defelp = &typmodinNameEl;
+		else if (strcmp(defel->defname, "typmod_out") == 0)
+			defelp = &typmodoutNameEl;
+		else if (strcmp(defel->defname, "analyze") == 0 ||
+				 strcmp(defel->defname, "analyse") == 0)
+			defelp = &analyzeNameEl;
+		else if (strcmp(defel->defname, "subscript") == 0)
+			defelp = &subscriptNameEl;
+		else if (strcmp(defel->defname, "category") == 0)
+			defelp = &categoryEl;
+		else if (strcmp(defel->defname, "preferred") == 0)
+			defelp = &preferredEl;
+		else if (strcmp(defel->defname, "delimiter") == 0)
+			defelp = &delimiterEl;
+		else if (strcmp(defel->defname, "element") == 0)
+			defelp = &elemTypeEl;
+		else if (strcmp(defel->defname, "default") == 0)
+			defelp = &defaultValueEl;
+		else if (strcmp(defel->defname, "passedbyvalue") == 0)
+			defelp = &byValueEl;
+		else if (strcmp(defel->defname, "alignment") == 0)
+			defelp = &alignmentEl;
+		else if (strcmp(defel->defname, "storage") == 0)
+			defelp = &storageEl;
+		else if (strcmp(defel->defname, "collatable") == 0)
+			defelp = &collatableEl;
+		else
+		{
+			/* WARNING, not ERROR, for historical backwards-compatibility */
+			ereport(WARNING,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("type attribute \"%s\" not recognized",
+							defel->defname),
+					 parser_errposition(pstate, defel->location)));
+			continue;
+		}
+		if (*defelp != NULL)
+			errorConflictingDefElem(defel, pstate);
+		*defelp = defel;
+	}
+
+	/*
+	 * Now interpret the options; we do this separately so that LIKE can be
+	 * overridden by other options regardless of the ordering in the parameter
+	 * list.
+	 */
+	if (likeTypeEl)
+	{
+		Type		likeType;
+		Form_pg_type likeForm;
+
+		likeType = typenameType(NULL, defGetTypeName(likeTypeEl), NULL);
+		likeForm = (Form_pg_type) GETSTRUCT(likeType);
+		internalLength = likeForm->typlen;
+		byValue = likeForm->typbyval;
+		alignment = likeForm->typalign;
+		storage = likeForm->typstorage;
+		ReleaseSysCache(likeType);
+	}
+	if (internalLengthEl)
+		internalLength = defGetTypeLength(internalLengthEl);
+	if (inputNameEl)
+		inputName = defGetQualifiedName(inputNameEl);
+	if (outputNameEl)
+		outputName = defGetQualifiedName(outputNameEl);
+	if (receiveNameEl)
+		receiveName = defGetQualifiedName(receiveNameEl);
+	if (sendNameEl)
+		sendName = defGetQualifiedName(sendNameEl);
+	if (typmodinNameEl)
+		typmodinName = defGetQualifiedName(typmodinNameEl);
+	if (typmodoutNameEl)
+		typmodoutName = defGetQualifiedName(typmodoutNameEl);
+	if (analyzeNameEl)
+		analyzeName = defGetQualifiedName(analyzeNameEl);
+	if (subscriptNameEl)
+		subscriptName = defGetQualifiedName(subscriptNameEl);
+	if (categoryEl)
+	{
+		char	   *p = defGetString(categoryEl);
+
+		category = p[0];
+		/* restrict to non-control ASCII */
+		if (category < 32 || category > 126)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("invalid type category \"%s\": must be simple ASCII",
+							p)));
+	}
+	if (preferredEl)
+		preferred = defGetBoolean(preferredEl);
+	if (delimiterEl)
+	{
+		char	   *p = defGetString(delimiterEl);
+
+		delimiter = p[0];
+		/* XXX shouldn't we restrict the delimiter? */
+	}
+	if (elemTypeEl)
+	{
+		elemType = typenameTypeId(NULL, defGetTypeName(elemTypeEl));
+		/* disallow arrays of pseudotypes */
+		if (get_typtype(elemType) == TYPTYPE_PSEUDO)
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("array element type cannot be %s",
+							format_type_be(elemType))));
+	}
+	if (defaultValueEl)
+		defaultValue = defGetString(defaultValueEl);
+	if (byValueEl)
+		byValue = defGetBoolean(byValueEl);
+	if (alignmentEl)
+	{
+		char	   *a = defGetString(alignmentEl);
+
+		/*
+		 * Note: if argument was an unquoted identifier, parser will have
+		 * applied translations to it, so be prepared to recognize translated
+		 * type names as well as the nominal form.
+		 */
+		if (pg_strcasecmp(a, "double") == 0 ||
+			pg_strcasecmp(a, "float8") == 0 ||
+			pg_strcasecmp(a, "pg_catalog.float8") == 0)
+			alignment = TYPALIGN_DOUBLE;
+		else if (pg_strcasecmp(a, "int4") == 0 ||
+				 pg_strcasecmp(a, "pg_catalog.int4") == 0)
+			alignment = TYPALIGN_INT;
+		else if (pg_strcasecmp(a, "int2") == 0 ||
+				 pg_strcasecmp(a, "pg_catalog.int2") == 0)
+			alignment = TYPALIGN_SHORT;
+		else if (pg_strcasecmp(a, "char") == 0 ||
+				 pg_strcasecmp(a, "pg_catalog.bpchar") == 0)
+			alignment = TYPALIGN_CHAR;
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("alignment \"%s\" not recognized", a)));
+	}
+	if (storageEl)
+	{
+		char	   *a = defGetString(storageEl);
+
+		if (pg_strcasecmp(a, "plain") == 0)
+			storage = TYPSTORAGE_PLAIN;
+		else if (pg_strcasecmp(a, "external") == 0)
+			storage = TYPSTORAGE_EXTERNAL;
+		else if (pg_strcasecmp(a, "extended") == 0)
+			storage = TYPSTORAGE_EXTENDED;
+		else if (pg_strcasecmp(a, "main") == 0)
+			storage = TYPSTORAGE_MAIN;
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("storage \"%s\" not recognized", a)));
+	}
+	if (collatableEl)
+		collation = defGetBoolean(collatableEl) ? DEFAULT_COLLATION_OID : InvalidOid;
+
+	/*
+	 * make sure we have our required definitions
+	 */
+	if (inputName == NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("type input function must be specified")));
+	if (outputName == NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("type output function must be specified")));
+
+	if (typmodinName == NIL && typmodoutName != NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("type modifier output function is useless without a type modifier input function")));
+
+	/*
+	 * Convert I/O proc names to OIDs
+	 */
+	inputOid = findTypeInputFunction(inputName, typoid);
+	outputOid = findTypeOutputFunction(outputName, typoid);
+	if (receiveName)
+		receiveOid = findTypeReceiveFunction(receiveName, typoid);
+	if (sendName)
+		sendOid = findTypeSendFunction(sendName, typoid);
+
+	/*
+	 * Convert typmodin/out function proc names to OIDs.
+	 */
+	if (typmodinName)
+		typmodinOid = findTypeTypmodinFunction(typmodinName);
+	if (typmodoutName)
+		typmodoutOid = findTypeTypmodoutFunction(typmodoutName);
+
+	/*
+	 * Convert analysis function proc name to an OID. If no analysis function
+	 * is specified, we'll use zero to select the built-in default algorithm.
+	 */
+	if (analyzeName)
+		analyzeOid = findTypeAnalyzeFunction(analyzeName, typoid);
+
+	/*
+	 * Likewise look up the subscripting function if any.  If it is not
+	 * specified, but a typelem is specified, allow that if
+	 * raw_array_subscript_handler can be used.  (This is for backwards
+	 * compatibility; maybe someday we should throw an error instead.)
+	 */
+	if (subscriptName)
+		subscriptOid = findTypeSubscriptingFunction(subscriptName, typoid);
+	else if (OidIsValid(elemType))
+	{
+		if (internalLength > 0 && !byValue && get_typlen(elemType) > 0)
+			subscriptOid = F_RAW_ARRAY_SUBSCRIPT_HANDLER;
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("element type cannot be specified without a subscripting function")));
+	}
+
+	/*
+	 * Check permissions on functions.  We choose to require the creator/owner
+	 * of a type to also own the underlying functions.  Since creating a type
+	 * is tantamount to granting public execute access on the functions, the
+	 * minimum sane check would be for execute-with-grant-option.  But we
+	 * don't have a way to make the type go away if the grant option is
+	 * revoked, so ownership seems better.
+	 *
+	 * XXX For now, this is all unnecessary given the superuser check above.
+	 * If we ever relax that, these calls likely should be moved into
+	 * findTypeInputFunction et al, where they could be shared by AlterType.
+	 */
+#ifdef NOT_USED
+	if (inputOid && !pg_proc_ownercheck(inputOid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+					   NameListToString(inputName));
+	if (outputOid && !pg_proc_ownercheck(outputOid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+					   NameListToString(outputName));
+	if (receiveOid && !pg_proc_ownercheck(receiveOid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+					   NameListToString(receiveName));
+	if (sendOid && !pg_proc_ownercheck(sendOid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+					   NameListToString(sendName));
+	if (typmodinOid && !pg_proc_ownercheck(typmodinOid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+					   NameListToString(typmodinName));
+	if (typmodoutOid && !pg_proc_ownercheck(typmodoutOid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+					   NameListToString(typmodoutName));
+	if (analyzeOid && !pg_proc_ownercheck(analyzeOid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+					   NameListToString(analyzeName));
+	if (subscriptOid && !pg_proc_ownercheck(subscriptOid, GetUserId()))
+		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+					   NameListToString(subscriptName));
+#endif
+
+	/*
+	 * OK, we're done checking, time to make the type.  We must assign the
+	 * array type OID ahead of calling TypeCreate, since the base type and
+	 * array type each refer to the other.
+	 */
+	array_oid = AssignTypeArrayOid();
+
+	/*
+	 * now have TypeCreate do all the real work.
+	 *
+	 * Note: the pg_type.oid is stored in user tables as array elements (base
+	 * types) in ArrayType and in composite types in DatumTupleFields.  This
+	 * oid must be preserved by binary upgrades.
+	 */
+	address =
+		TypeCreate(InvalidOid,	/* no predetermined type OID */
+				   typeName,	/* type name */
+				   typeNamespace,	/* namespace */
+				   InvalidOid,	/* relation oid (n/a here) */
+				   0,			/* relation kind (ditto) */
+				   GetUserId(), /* owner's ID */
+				   internalLength,	/* internal size */
+				   TYPTYPE_BASE,	/* type-type (base type) */
+				   category,	/* type-category */
+				   preferred,	/* is it a preferred type? */
+				   delimiter,	/* array element delimiter */
+				   inputOid,	/* input procedure */
+				   outputOid,	/* output procedure */
+				   receiveOid,	/* receive procedure */
+				   sendOid,		/* send procedure */
+				   typmodinOid, /* typmodin procedure */
+				   typmodoutOid,	/* typmodout procedure */
+				   analyzeOid,	/* analyze procedure */
+				   subscriptOid,	/* subscript procedure */
+				   elemType,	/* element type ID */
+				   false,		/* this is not an implicit array type */
+				   array_oid,	/* array type we are about to create */
+				   InvalidOid,	/* base type ID (only for domains) */
+				   defaultValue,	/* default type value */
+				   NULL,		/* no binary form available */
+				   byValue,		/* passed by value */
+				   alignment,	/* required alignment */
+				   storage,		/* TOAST strategy */
+				   -1,			/* typMod (Domains only) */
+				   0,			/* Array Dimensions of typbasetype */
+				   false,		/* Type NOT NULL */
+				   collation);	/* type's collation */
+	Assert(typoid == address.objectId);
+
+	/*
+	 * Create the array type that goes with it.
+	 */
+	array_type = makeArrayTypeName(typeName, typeNamespace);
+
+	/* alignment must be TYPALIGN_INT or TYPALIGN_DOUBLE for arrays */
+	alignment = (alignment == TYPALIGN_DOUBLE) ? TYPALIGN_DOUBLE : TYPALIGN_INT;
+
+	TypeCreate(array_oid,		/* force assignment of this type OID */
+			   array_type,		/* type name */
+			   typeNamespace,	/* namespace */
+			   InvalidOid,		/* relation oid (n/a here) */
+			   0,				/* relation kind (ditto) */
+			   GetUserId(),		/* owner's ID */
+			   -1,				/* internal size (always varlena) */
+			   TYPTYPE_BASE,	/* type-type (base type) */
+			   TYPCATEGORY_ARRAY,	/* type-category (array) */
+			   false,			/* array types are never preferred */
+			   delimiter,		/* array element delimiter */
+			   F_ARRAY_IN,		/* input procedure */
+			   F_ARRAY_OUT,		/* output procedure */
+			   F_ARRAY_RECV,	/* receive procedure */
+			   F_ARRAY_SEND,	/* send procedure */
+			   typmodinOid,		/* typmodin procedure */
+			   typmodoutOid,	/* typmodout procedure */
+			   F_ARRAY_TYPANALYZE,	/* analyze procedure */
+			   F_ARRAY_SUBSCRIPT_HANDLER,	/* array subscript procedure */
+			   typoid,			/* element type ID */
+			   true,			/* yes this is an array type */
+			   InvalidOid,		/* no further array type */
+			   InvalidOid,		/* base type ID */
+			   NULL,			/* never a default type value */
+			   NULL,			/* binary default isn't sent either */
+			   false,			/* never passed by value */
+			   alignment,		/* see above */
+			   TYPSTORAGE_EXTENDED, /* ARRAY is always toastable */
+			   -1,				/* typMod (Domains only) */
+			   0,				/* Array dimensions of typbasetype */
+			   false,			/* Type NOT NULL */
+			   collation);		/* type's collation */
+
+	pfree(array_type);
+
+	return address;
+}
+
+/*
+ * Guts of type deletion.
+ */
+void
+RemoveTypeById(Oid typeOid)
+{
+	Relation	relation;
+	HeapTuple	tup;
+
+	relation = table_open(TypeRelationId, RowExclusiveLock);
+
+	tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeOid));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for type %u", typeOid);
+
+	CatalogTupleDelete(relation, &tup->t_self);
+
+	/*
+	 * If it is an enum, delete the pg_enum entries too; we don't bother with
+	 * making dependency entries for those, so it has to be done "by hand"
+	 * here.
+	 */
+	if (((Form_pg_type) GETSTRUCT(tup))->typtype == TYPTYPE_ENUM)
+		EnumValuesDelete(typeOid);
+
+	/*
+	 * If it is a range type, delete the pg_range entry too; we don't bother
+	 * with making a dependency entry for that, so it has to be done "by hand"
+	 * here.
+	 */
+	if (((Form_pg_type) GETSTRUCT(tup))->typtype == TYPTYPE_RANGE)
+		RangeDelete(typeOid);
+
+	ReleaseSysCache(tup);
+
+	table_close(relation, RowExclusiveLock);
+}
+
+
+/*
+ * DefineDomain
+ *		Registers a new domain.
+ */
+ObjectAddress
+DefineDomain(CreateDomainStmt *stmt)
+{
+	char	   *domainName;
+	char	   *domainArrayName;
+	Oid			domainNamespace;
+	AclResult	aclresult;
+	int16		internalLength;
+	Oid			inputProcedure;
+	Oid			outputProcedure;
+	Oid			receiveProcedure;
+	Oid			sendProcedure;
+	Oid			analyzeProcedure;
+	bool		byValue;
+	char		category;
+	char		delimiter;
+	char		alignment;
+	char		storage;
+	char		typtype;
+	Datum		datum;
+	bool		isnull;
+	char	   *defaultValue = NULL;
+	char	   *defaultValueBin = NULL;
+	bool		saw_default = false;
+	bool		typNotNull = false;
+	bool		nullDefined = false;
+	int32		typNDims = list_length(stmt->typeName->arrayBounds);
+	HeapTuple	typeTup;
+	List	   *schema = stmt->constraints;
+	ListCell   *listptr;
+	Oid			basetypeoid;
+	Oid			old_type_oid;
+	Oid			domaincoll;
+	Oid			domainArrayOid;
+	Form_pg_type baseType;
+	int32		basetypeMod;
+	Oid			baseColl;
+	ObjectAddress address;
+
+	/* Convert list of names to a name and namespace */
+	domainNamespace = QualifiedNameGetCreationNamespace(stmt->domainname,
+														&domainName);
+
+	/* Check we have creation rights in target namespace */
+	aclresult = pg_namespace_aclcheck(domainNamespace, GetUserId(),
+									  ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_SCHEMA,
+					   get_namespace_name(domainNamespace));
+
+	/*
+	 * Check for collision with an existing type name.  If there is one and
+	 * it's an autogenerated array, we can rename it out of the way.
+	 */
+	old_type_oid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid,
+								   CStringGetDatum(domainName),
+								   ObjectIdGetDatum(domainNamespace));
+	if (OidIsValid(old_type_oid))
+	{
+		if (!moveArrayTypeName(old_type_oid, domainName, domainNamespace))
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("type \"%s\" already exists", domainName)));
+	}
+
+	/*
+	 * Look up the base type.
+	 */
+	typeTup = typenameType(NULL, stmt->typeName, &basetypeMod);
+	baseType = (Form_pg_type) GETSTRUCT(typeTup);
+	basetypeoid = baseType->oid;
+
+	/*
+	 * Base type must be a plain base type, a composite type, another domain,
+	 * an enum or a range type.  Domains over pseudotypes would create a
+	 * security hole.  (It would be shorter to code this to just check for
+	 * pseudotypes; but it seems safer to call out the specific typtypes that
+	 * are supported, rather than assume that all future typtypes would be
+	 * automatically supported.)
+	 */
+	typtype = baseType->typtype;
+	if (typtype != TYPTYPE_BASE &&
+		typtype != TYPTYPE_COMPOSITE &&
+		typtype != TYPTYPE_DOMAIN &&
+		typtype != TYPTYPE_ENUM &&
+		typtype != TYPTYPE_RANGE &&
+		typtype != TYPTYPE_MULTIRANGE)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATATYPE_MISMATCH),
+				 errmsg("\"%s\" is not a valid base type for a domain",
+						TypeNameToString(stmt->typeName))));
+
+	aclresult = pg_type_aclcheck(basetypeoid, GetUserId(), ACL_USAGE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error_type(aclresult, basetypeoid);
+
+	/*
+	 * Collect the properties of the new domain.  Some are inherited from the
+	 * base type, some are not.  If you change any of this inheritance
+	 * behavior, be sure to update AlterTypeRecurse() to match!
+	 */
+
+	/*
+	 * Identify the collation if any
+	 */
+	baseColl = baseType->typcollation;
+	if (stmt->collClause)
+		domaincoll = get_collation_oid(stmt->collClause->collname, false);
+	else
+		domaincoll = baseColl;
+
+	/* Complain if COLLATE is applied to an uncollatable type */
+	if (OidIsValid(domaincoll) && !OidIsValid(baseColl))
+		ereport(ERROR,
+				(errcode(ERRCODE_DATATYPE_MISMATCH),
+				 errmsg("collations are not supported by type %s",
+						format_type_be(basetypeoid))));
+
+	/* passed by value */
+	byValue = baseType->typbyval;
+
+	/* Required Alignment */
+	alignment = baseType->typalign;
+
+	/* TOAST Strategy */
+	storage = baseType->typstorage;
+
+	/* Storage Length */
+	internalLength = baseType->typlen;
+
+	/* Type Category */
+	category = baseType->typcategory;
+
+	/* Array element Delimiter */
+	delimiter = baseType->typdelim;
+
+	/* I/O Functions */
+	inputProcedure = F_DOMAIN_IN;
+	outputProcedure = baseType->typoutput;
+	receiveProcedure = F_DOMAIN_RECV;
+	sendProcedure = baseType->typsend;
+
+	/* Domains never accept typmods, so no typmodin/typmodout needed */
+
+	/* Analysis function */
+	analyzeProcedure = baseType->typanalyze;
+
+	/*
+	 * Domains don't need a subscript function, since they are not
+	 * subscriptable on their own.  If the base type is subscriptable, the
+	 * parser will reduce the type to the base type before subscripting.
+	 */
+
+	/* Inherited default value */
+	datum = SysCacheGetAttr(TYPEOID, typeTup,
+							Anum_pg_type_typdefault, &isnull);
+	if (!isnull)
+		defaultValue = TextDatumGetCString(datum);
+
+	/* Inherited default binary value */
+	datum = SysCacheGetAttr(TYPEOID, typeTup,
+							Anum_pg_type_typdefaultbin, &isnull);
+	if (!isnull)
+		defaultValueBin = TextDatumGetCString(datum);
+
+	/*
+	 * Run through constraints manually to avoid the additional processing
+	 * conducted by DefineRelation() and friends.
+	 */
+	foreach(listptr, schema)
+	{
+		Constraint *constr = lfirst(listptr);
+
+		if (!IsA(constr, Constraint))
+			elog(ERROR, "unrecognized node type: %d",
+				 (int) nodeTag(constr));
+		switch (constr->contype)
+		{
+			case CONSTR_DEFAULT:
+
+				/*
+				 * The inherited default value may be overridden by the user
+				 * with the DEFAULT <expr> clause ... but only once.
+				 */
+				if (saw_default)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("multiple default expressions")));
+				saw_default = true;
+
+				if (constr->raw_expr)
+				{
+					ParseState *pstate;
+					Node	   *defaultExpr;
+
+					/* Create a dummy ParseState for transformExpr */
+					pstate = make_parsestate(NULL);
+
+					/*
+					 * Cook the constr->raw_expr into an expression. Note:
+					 * name is strictly for error message
+					 */
+					defaultExpr = cookDefault(pstate, constr->raw_expr,
+											  basetypeoid,
+											  basetypeMod,
+											  domainName,
+											  0);
+
+					/*
+					 * If the expression is just a NULL constant, we treat it
+					 * like not having a default.
+					 *
+					 * Note that if the basetype is another domain, we'll see
+					 * a CoerceToDomain expr here and not discard the default.
+					 * This is critical because the domain default needs to be
+					 * retained to override any default that the base domain
+					 * might have.
+					 */
+					if (defaultExpr == NULL ||
+						(IsA(defaultExpr, Const) &&
+						 ((Const *) defaultExpr)->constisnull))
+					{
+						defaultValue = NULL;
+						defaultValueBin = NULL;
+					}
+					else
+					{
+						/*
+						 * Expression must be stored as a nodeToString result,
+						 * but we also require a valid textual representation
+						 * (mainly to make life easier for pg_dump).
+						 */
+						defaultValue =
+							deparse_expression(defaultExpr,
+											   NIL, false, false);
+						defaultValueBin = nodeToString(defaultExpr);
+					}
+				}
+				else
+				{
+					/* No default (can this still happen?) */
+					defaultValue = NULL;
+					defaultValueBin = NULL;
+				}
+				break;
+
+			case CONSTR_NOTNULL:
+				if (nullDefined && !typNotNull)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("conflicting NULL/NOT NULL constraints")));
+				typNotNull = true;
+				nullDefined = true;
+				break;
+
+			case CONSTR_NULL:
+				if (nullDefined && typNotNull)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("conflicting NULL/NOT NULL constraints")));
+				typNotNull = false;
+				nullDefined = true;
+				break;
+
+			case CONSTR_CHECK:
+
+				/*
+				 * Check constraints are handled after domain creation, as
+				 * they require the Oid of the domain; at this point we can
+				 * only check that they're not marked NO INHERIT, because that
+				 * would be bogus.
+				 */
+				if (constr->is_no_inherit)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							 errmsg("check constraints for domains cannot be marked NO INHERIT")));
+				break;
+
+				/*
+				 * All else are error cases
+				 */
+			case CONSTR_UNIQUE:
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("unique constraints not possible for domains")));
+				break;
+
+			case CONSTR_PRIMARY:
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("primary key constraints not possible for domains")));
+				break;
+
+			case CONSTR_EXCLUSION:
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("exclusion constraints not possible for domains")));
+				break;
+
+			case CONSTR_FOREIGN:
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("foreign key constraints not possible for domains")));
+				break;
+
+			case CONSTR_ATTR_DEFERRABLE:
+			case CONSTR_ATTR_NOT_DEFERRABLE:
+			case CONSTR_ATTR_DEFERRED:
+			case CONSTR_ATTR_IMMEDIATE:
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("specifying constraint deferrability not supported for domains")));
+				break;
+
+			default:
+				elog(ERROR, "unrecognized constraint subtype: %d",
+					 (int) constr->contype);
+				break;
+		}
+	}
+
+	/* Allocate OID for array type */
+	domainArrayOid = AssignTypeArrayOid();
+
+	/*
+	 * Have TypeCreate do all the real work.
+	 */
+	address =
+		TypeCreate(InvalidOid,	/* no predetermined type OID */
+				   domainName,	/* type name */
+				   domainNamespace, /* namespace */
+				   InvalidOid,	/* relation oid (n/a here) */
+				   0,			/* relation kind (ditto) */
+				   GetUserId(), /* owner's ID */
+				   internalLength,	/* internal size */
+				   TYPTYPE_DOMAIN,	/* type-type (domain type) */
+				   category,	/* type-category */
+				   false,		/* domain types are never preferred */
+				   delimiter,	/* array element delimiter */
+				   inputProcedure,	/* input procedure */
+				   outputProcedure, /* output procedure */
+				   receiveProcedure,	/* receive procedure */
+				   sendProcedure,	/* send procedure */
+				   InvalidOid,	/* typmodin procedure - none */
+				   InvalidOid,	/* typmodout procedure - none */
+				   analyzeProcedure,	/* analyze procedure */
+				   InvalidOid,	/* subscript procedure - none */
+				   InvalidOid,	/* no array element type */
+				   false,		/* this isn't an array */
+				   domainArrayOid,	/* array type we are about to create */
+				   basetypeoid, /* base type ID */
+				   defaultValue,	/* default type value (text) */
+				   defaultValueBin, /* default type value (binary) */
+				   byValue,		/* passed by value */
+				   alignment,	/* required alignment */
+				   storage,		/* TOAST strategy */
+				   basetypeMod, /* typeMod value */
+				   typNDims,	/* Array dimensions for base type */
+				   typNotNull,	/* Type NOT NULL */
+				   domaincoll); /* type's collation */
+
+	/*
+	 * Create the array type that goes with it.
+	 */
+	domainArrayName = makeArrayTypeName(domainName, domainNamespace);
+
+	/* alignment must be TYPALIGN_INT or TYPALIGN_DOUBLE for arrays */
+	alignment = (alignment == TYPALIGN_DOUBLE) ? TYPALIGN_DOUBLE : TYPALIGN_INT;
+
+	TypeCreate(domainArrayOid,	/* force assignment of this type OID */
+			   domainArrayName, /* type name */
+			   domainNamespace, /* namespace */
+			   InvalidOid,		/* relation oid (n/a here) */
+			   0,				/* relation kind (ditto) */
+			   GetUserId(),		/* owner's ID */
+			   -1,				/* internal size (always varlena) */
+			   TYPTYPE_BASE,	/* type-type (base type) */
+			   TYPCATEGORY_ARRAY,	/* type-category (array) */
+			   false,			/* array types are never preferred */
+			   delimiter,		/* array element delimiter */
+			   F_ARRAY_IN,		/* input procedure */
+			   F_ARRAY_OUT,		/* output procedure */
+			   F_ARRAY_RECV,	/* receive procedure */
+			   F_ARRAY_SEND,	/* send procedure */
+			   InvalidOid,		/* typmodin procedure - none */
+			   InvalidOid,		/* typmodout procedure - none */
+			   F_ARRAY_TYPANALYZE,	/* analyze procedure */
+			   F_ARRAY_SUBSCRIPT_HANDLER,	/* array subscript procedure */
+			   address.objectId,	/* element type ID */
+			   true,			/* yes this is an array type */
+			   InvalidOid,		/* no further array type */
+			   InvalidOid,		/* base type ID */
+			   NULL,			/* never a default type value */
+			   NULL,			/* binary default isn't sent either */
+			   false,			/* never passed by value */
+			   alignment,		/* see above */
+			   TYPSTORAGE_EXTENDED, /* ARRAY is always toastable */
+			   -1,				/* typMod (Domains only) */
+			   0,				/* Array dimensions of typbasetype */
+			   false,			/* Type NOT NULL */
+			   domaincoll);		/* type's collation */
+
+	pfree(domainArrayName);
+
+	/*
+	 * Process constraints which refer to the domain ID returned by TypeCreate
+	 */
+	foreach(listptr, schema)
+	{
+		Constraint *constr = lfirst(listptr);
+
+		/* it must be a Constraint, per check above */
+
+		switch (constr->contype)
+		{
+			case CONSTR_CHECK:
+				domainAddConstraint(address.objectId, domainNamespace,
+									basetypeoid, basetypeMod,
+									constr, domainName, NULL);
+				break;
+
+				/* Other constraint types were fully processed above */
+
+			default:
+				break;
+		}
+
+		/* CCI so we can detect duplicate constraint names */
+		CommandCounterIncrement();
+	}
+
+	/*
+	 * Now we can clean up.
+	 */
+	ReleaseSysCache(typeTup);
+
+	return address;
+}
+
+
+/*
+ * DefineEnum
+ *		Registers a new enum.
+ */
+ObjectAddress
+DefineEnum(CreateEnumStmt *stmt)
+{
+	char	   *enumName;
+	char	   *enumArrayName;
+	Oid			enumNamespace;
+	AclResult	aclresult;
+	Oid			old_type_oid;
+	Oid			enumArrayOid;
+	ObjectAddress enumTypeAddr;
+
+	/* Convert list of names to a name and namespace */
+	enumNamespace = QualifiedNameGetCreationNamespace(stmt->typeName,
+													  &enumName);
+
+	/* Check we have creation rights in target namespace */
+	aclresult = pg_namespace_aclcheck(enumNamespace, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_SCHEMA,
+					   get_namespace_name(enumNamespace));
+
+	/*
+	 * Check for collision with an existing type name.  If there is one and
+	 * it's an autogenerated array, we can rename it out of the way.
+	 */
+	old_type_oid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid,
+								   CStringGetDatum(enumName),
+								   ObjectIdGetDatum(enumNamespace));
+	if (OidIsValid(old_type_oid))
+	{
+		if (!moveArrayTypeName(old_type_oid, enumName, enumNamespace))
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("type \"%s\" already exists", enumName)));
+	}
+
+	/* Allocate OID for array type */
+	enumArrayOid = AssignTypeArrayOid();
+
+	/* Create the pg_type entry */
+	enumTypeAddr =
+		TypeCreate(InvalidOid,	/* no predetermined type OID */
+				   enumName,	/* type name */
+				   enumNamespace,	/* namespace */
+				   InvalidOid,	/* relation oid (n/a here) */
+				   0,			/* relation kind (ditto) */
+				   GetUserId(), /* owner's ID */
+				   sizeof(Oid), /* internal size */
+				   TYPTYPE_ENUM,	/* type-type (enum type) */
+				   TYPCATEGORY_ENUM,	/* type-category (enum type) */
+				   false,		/* enum types are never preferred */
+				   DEFAULT_TYPDELIM,	/* array element delimiter */
+				   F_ENUM_IN,	/* input procedure */
+				   F_ENUM_OUT,	/* output procedure */
+				   F_ENUM_RECV, /* receive procedure */
+				   F_ENUM_SEND, /* send procedure */
+				   InvalidOid,	/* typmodin procedure - none */
+				   InvalidOid,	/* typmodout procedure - none */
+				   InvalidOid,	/* analyze procedure - default */
+				   InvalidOid,	/* subscript procedure - none */
+				   InvalidOid,	/* element type ID */
+				   false,		/* this is not an array type */
+				   enumArrayOid,	/* array type we are about to create */
+				   InvalidOid,	/* base type ID (only for domains) */
+				   NULL,		/* never a default type value */
+				   NULL,		/* binary default isn't sent either */
+				   true,		/* always passed by value */
+				   TYPALIGN_INT,	/* int alignment */
+				   TYPSTORAGE_PLAIN,	/* TOAST strategy always plain */
+				   -1,			/* typMod (Domains only) */
+				   0,			/* Array dimensions of typbasetype */
+				   false,		/* Type NOT NULL */
+				   InvalidOid); /* type's collation */
+
+	/* Enter the enum's values into pg_enum */
+	EnumValuesCreate(enumTypeAddr.objectId, stmt->vals);
+
+	/*
+	 * Create the array type that goes with it.
+	 */
+	enumArrayName = makeArrayTypeName(enumName, enumNamespace);
+
+	TypeCreate(enumArrayOid,	/* force assignment of this type OID */
+			   enumArrayName,	/* type name */
+			   enumNamespace,	/* namespace */
+			   InvalidOid,		/* relation oid (n/a here) */
+			   0,				/* relation kind (ditto) */
+			   GetUserId(),		/* owner's ID */
+			   -1,				/* internal size (always varlena) */
+			   TYPTYPE_BASE,	/* type-type (base type) */
+			   TYPCATEGORY_ARRAY,	/* type-category (array) */
+			   false,			/* array types are never preferred */
+			   DEFAULT_TYPDELIM,	/* array element delimiter */
+			   F_ARRAY_IN,		/* input procedure */
+			   F_ARRAY_OUT,		/* output procedure */
+			   F_ARRAY_RECV,	/* receive procedure */
+			   F_ARRAY_SEND,	/* send procedure */
+			   InvalidOid,		/* typmodin procedure - none */
+			   InvalidOid,		/* typmodout procedure - none */
+			   F_ARRAY_TYPANALYZE,	/* analyze procedure */
+			   F_ARRAY_SUBSCRIPT_HANDLER,	/* array subscript procedure */
+			   enumTypeAddr.objectId,	/* element type ID */
+			   true,			/* yes this is an array type */
+			   InvalidOid,		/* no further array type */
+			   InvalidOid,		/* base type ID */
+			   NULL,			/* never a default type value */
+			   NULL,			/* binary default isn't sent either */
+			   false,			/* never passed by value */
+			   TYPALIGN_INT,	/* enums have int align, so do their arrays */
+			   TYPSTORAGE_EXTENDED, /* ARRAY is always toastable */
+			   -1,				/* typMod (Domains only) */
+			   0,				/* Array dimensions of typbasetype */
+			   false,			/* Type NOT NULL */
+			   InvalidOid);		/* type's collation */
+
+	pfree(enumArrayName);
+
+	return enumTypeAddr;
+}
+
+/*
+ * AlterEnum
+ *		Adds a new label to an existing enum.
+ */
+ObjectAddress
+AlterEnum(AlterEnumStmt *stmt)
+{
+	Oid			enum_type_oid;
+	TypeName   *typename;
+	HeapTuple	tup;
+	ObjectAddress address;
+
+	/* Make a TypeName so we can use standard type lookup machinery */
+	typename = makeTypeNameFromNameList(stmt->typeName);
+	enum_type_oid = typenameTypeId(NULL, typename);
+
+	tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(enum_type_oid));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for type %u", enum_type_oid);
+
+	/* Check it's an enum and check user has permission to ALTER the enum */
+	checkEnumOwner(tup);
+
+	ReleaseSysCache(tup);
+
+	if (stmt->oldVal)
+	{
+		/* Rename an existing label */
+		RenameEnumLabel(enum_type_oid, stmt->oldVal, stmt->newVal);
+	}
+	else
+	{
+		/* Add a new label */
+		AddEnumLabel(enum_type_oid, stmt->newVal,
+					 stmt->newValNeighbor, stmt->newValIsAfter,
+					 stmt->skipIfNewValExists);
+	}
+
+	InvokeObjectPostAlterHook(TypeRelationId, enum_type_oid, 0);
+
+	ObjectAddressSet(address, TypeRelationId, enum_type_oid);
+
+	return address;
+}
+
+
+/*
+ * checkEnumOwner
+ *
+ * Check that the type is actually an enum and that the current user
+ * has permission to do ALTER TYPE on it.  Throw an error if not.
+ */
+static void
+checkEnumOwner(HeapTuple tup)
+{
+	Form_pg_type typTup = (Form_pg_type) GETSTRUCT(tup);
+
+	/* Check that this is actually an enum */
+	if (typTup->typtype != TYPTYPE_ENUM)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("%s is not an enum",
+						format_type_be(typTup->oid))));
+
+	/* Permission check: must own type */
+	if (!pg_type_ownercheck(typTup->oid, GetUserId()))
+		aclcheck_error_type(ACLCHECK_NOT_OWNER, typTup->oid);
+}
+
+
+/*
+ * DefineRange
+ *		Registers a new range type.
+ *
+ * Perhaps it might be worthwhile to set pg_type.typelem to the base type,
+ * and likewise on multiranges to set it to the range type. But having a
+ * non-zero typelem is treated elsewhere as a synonym for being an array,
+ * and users might have queries with that same assumption.
+ */
+ObjectAddress
+DefineRange(ParseState *pstate, CreateRangeStmt *stmt)
+{
+	char	   *typeName;
+	Oid			typeNamespace;
+	Oid			typoid;
+	char	   *rangeArrayName;
+	char	   *multirangeTypeName = NULL;
+	char	   *multirangeArrayName;
+	Oid			multirangeNamespace = InvalidOid;
+	Oid			rangeArrayOid;
+	Oid			multirangeOid;
+	Oid			multirangeArrayOid;
+	Oid			rangeSubtype = InvalidOid;
+	List	   *rangeSubOpclassName = NIL;
+	List	   *rangeCollationName = NIL;
+	List	   *rangeCanonicalName = NIL;
+	List	   *rangeSubtypeDiffName = NIL;
+	Oid			rangeSubOpclass;
+	Oid			rangeCollation;
+	regproc		rangeCanonical;
+	regproc		rangeSubtypeDiff;
+	int16		subtyplen;
+	bool		subtypbyval;
+	char		subtypalign;
+	char		alignment;
+	AclResult	aclresult;
+	ListCell   *lc;
+	ObjectAddress address;
+	ObjectAddress mltrngaddress PG_USED_FOR_ASSERTS_ONLY;
+	Oid			castFuncOid;
+
+	/* Convert list of names to a name and namespace */
+	typeNamespace = QualifiedNameGetCreationNamespace(stmt->typeName,
+													  &typeName);
+
+	/* Check we have creation rights in target namespace */
+	aclresult = pg_namespace_aclcheck(typeNamespace, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_SCHEMA,
+					   get_namespace_name(typeNamespace));
+
+	/*
+	 * Look to see if type already exists.
+	 */
+	typoid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid,
+							 CStringGetDatum(typeName),
+							 ObjectIdGetDatum(typeNamespace));
+
+	/*
+	 * If it's not a shell, see if it's an autogenerated array type, and if so
+	 * rename it out of the way.
+	 */
+	if (OidIsValid(typoid) && get_typisdefined(typoid))
+	{
+		if (moveArrayTypeName(typoid, typeName, typeNamespace))
+			typoid = InvalidOid;
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("type \"%s\" already exists", typeName)));
+	}
+
+	/*
+	 * Unlike DefineType(), we don't insist on a shell type existing first, as
+	 * it's only needed if the user wants to specify a canonical function.
+	 */
+
+	/* Extract the parameters from the parameter list */
+	foreach(lc, stmt->params)
+	{
+		DefElem    *defel = (DefElem *) lfirst(lc);
+
+		if (strcmp(defel->defname, "subtype") == 0)
+		{
+			if (OidIsValid(rangeSubtype))
+				errorConflictingDefElem(defel, pstate);
+			/* we can look up the subtype name immediately */
+			rangeSubtype = typenameTypeId(NULL, defGetTypeName(defel));
+		}
+		else if (strcmp(defel->defname, "subtype_opclass") == 0)
+		{
+			if (rangeSubOpclassName != NIL)
+				errorConflictingDefElem(defel, pstate);
+			rangeSubOpclassName = defGetQualifiedName(defel);
+		}
+		else if (strcmp(defel->defname, "collation") == 0)
+		{
+			if (rangeCollationName != NIL)
+				errorConflictingDefElem(defel, pstate);
+			rangeCollationName = defGetQualifiedName(defel);
+		}
+		else if (strcmp(defel->defname, "canonical") == 0)
+		{
+			if (rangeCanonicalName != NIL)
+				errorConflictingDefElem(defel, pstate);
+			rangeCanonicalName = defGetQualifiedName(defel);
+		}
+		else if (strcmp(defel->defname, "subtype_diff") == 0)
+		{
+			if (rangeSubtypeDiffName != NIL)
+				errorConflictingDefElem(defel, pstate);
+			rangeSubtypeDiffName = defGetQualifiedName(defel);
+		}
+		else if (strcmp(defel->defname, "multirange_type_name") == 0)
+		{
+			if (multirangeTypeName != NULL)
+				errorConflictingDefElem(defel, pstate);
+			/* we can look up the subtype name immediately */
+			multirangeNamespace = QualifiedNameGetCreationNamespace(defGetQualifiedName(defel),
+																	&multirangeTypeName);
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("type attribute \"%s\" not recognized",
+							defel->defname)));
+	}
+
+	/* Must have a subtype */
+	if (!OidIsValid(rangeSubtype))
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("type attribute \"subtype\" is required")));
+	/* disallow ranges of pseudotypes */
+	if (get_typtype(rangeSubtype) == TYPTYPE_PSEUDO)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATATYPE_MISMATCH),
+				 errmsg("range subtype cannot be %s",
+						format_type_be(rangeSubtype))));
+
+	/* Identify subopclass */
+	rangeSubOpclass = findRangeSubOpclass(rangeSubOpclassName, rangeSubtype);
+
+	/* Identify collation to use, if any */
+	if (type_is_collatable(rangeSubtype))
+	{
+		if (rangeCollationName != NIL)
+			rangeCollation = get_collation_oid(rangeCollationName, false);
+		else
+			rangeCollation = get_typcollation(rangeSubtype);
+	}
+	else
+	{
+		if (rangeCollationName != NIL)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("range collation specified but subtype does not support collation")));
+		rangeCollation = InvalidOid;
+	}
+
+	/* Identify support functions, if provided */
+	if (rangeCanonicalName != NIL)
+	{
+		if (!OidIsValid(typoid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("cannot specify a canonical function without a pre-created shell type"),
+					 errhint("Create the type as a shell type, then create its canonicalization function, then do a full CREATE TYPE.")));
+		rangeCanonical = findRangeCanonicalFunction(rangeCanonicalName,
+													typoid);
+	}
+	else
+		rangeCanonical = InvalidOid;
+
+	if (rangeSubtypeDiffName != NIL)
+		rangeSubtypeDiff = findRangeSubtypeDiffFunction(rangeSubtypeDiffName,
+														rangeSubtype);
+	else
+		rangeSubtypeDiff = InvalidOid;
+
+	get_typlenbyvalalign(rangeSubtype,
+						 &subtyplen, &subtypbyval, &subtypalign);
+
+	/* alignment must be TYPALIGN_INT or TYPALIGN_DOUBLE for ranges */
+	alignment = (subtypalign == TYPALIGN_DOUBLE) ? TYPALIGN_DOUBLE : TYPALIGN_INT;
+
+	/* Allocate OID for array type, its multirange, and its multirange array */
+	rangeArrayOid = AssignTypeArrayOid();
+	multirangeOid = AssignTypeMultirangeOid();
+	multirangeArrayOid = AssignTypeMultirangeArrayOid();
+
+	/* Create the pg_type entry */
+	address =
+		TypeCreate(InvalidOid,	/* no predetermined type OID */
+				   typeName,	/* type name */
+				   typeNamespace,	/* namespace */
+				   InvalidOid,	/* relation oid (n/a here) */
+				   0,			/* relation kind (ditto) */
+				   GetUserId(), /* owner's ID */
+				   -1,			/* internal size (always varlena) */
+				   TYPTYPE_RANGE,	/* type-type (range type) */
+				   TYPCATEGORY_RANGE,	/* type-category (range type) */
+				   false,		/* range types are never preferred */
+				   DEFAULT_TYPDELIM,	/* array element delimiter */
+				   F_RANGE_IN,	/* input procedure */
+				   F_RANGE_OUT, /* output procedure */
+				   F_RANGE_RECV,	/* receive procedure */
+				   F_RANGE_SEND,	/* send procedure */
+				   InvalidOid,	/* typmodin procedure - none */
+				   InvalidOid,	/* typmodout procedure - none */
+				   F_RANGE_TYPANALYZE,	/* analyze procedure */
+				   InvalidOid,	/* subscript procedure - none */
+				   InvalidOid,	/* element type ID - none */
+				   false,		/* this is not an array type */
+				   rangeArrayOid,	/* array type we are about to create */
+				   InvalidOid,	/* base type ID (only for domains) */
+				   NULL,		/* never a default type value */
+				   NULL,		/* no binary form available either */
+				   false,		/* never passed by value */
+				   alignment,	/* alignment */
+				   TYPSTORAGE_EXTENDED, /* TOAST strategy (always extended) */
+				   -1,			/* typMod (Domains only) */
+				   0,			/* Array dimensions of typbasetype */
+				   false,		/* Type NOT NULL */
+				   InvalidOid); /* type's collation (ranges never have one) */
+	Assert(typoid == InvalidOid || typoid == address.objectId);
+	typoid = address.objectId;
+
+	/* Create the multirange that goes with it */
+	if (multirangeTypeName)
+	{
+		Oid			old_typoid;
+
+		/*
+		 * Look to see if multirange type already exists.
+		 */
+		old_typoid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid,
+									 CStringGetDatum(multirangeTypeName),
+									 ObjectIdGetDatum(multirangeNamespace));
+
+		/*
+		 * If it's not a shell, see if it's an autogenerated array type, and
+		 * if so rename it out of the way.
+		 */
+		if (OidIsValid(old_typoid) && get_typisdefined(old_typoid))
+		{
+			if (!moveArrayTypeName(old_typoid, multirangeTypeName, multirangeNamespace))
+				ereport(ERROR,
+						(errcode(ERRCODE_DUPLICATE_OBJECT),
+						 errmsg("type \"%s\" already exists", multirangeTypeName)));
+		}
+	}
+	else
+	{
+		/* Generate multirange name automatically */
+		multirangeNamespace = typeNamespace;
+		multirangeTypeName = makeMultirangeTypeName(typeName, multirangeNamespace);
+	}
+
+	mltrngaddress =
+		TypeCreate(multirangeOid,	/* force assignment of this type OID */
+				   multirangeTypeName,	/* type name */
+				   multirangeNamespace, /* namespace */
+				   InvalidOid,	/* relation oid (n/a here) */
+				   0,			/* relation kind (ditto) */
+				   GetUserId(), /* owner's ID */
+				   -1,			/* internal size (always varlena) */
+				   TYPTYPE_MULTIRANGE,	/* type-type (multirange type) */
+				   TYPCATEGORY_RANGE,	/* type-category (range type) */
+				   false,		/* multirange types are never preferred */
+				   DEFAULT_TYPDELIM,	/* array element delimiter */
+				   F_MULTIRANGE_IN, /* input procedure */
+				   F_MULTIRANGE_OUT,	/* output procedure */
+				   F_MULTIRANGE_RECV,	/* receive procedure */
+				   F_MULTIRANGE_SEND,	/* send procedure */
+				   InvalidOid,	/* typmodin procedure - none */
+				   InvalidOid,	/* typmodout procedure - none */
+				   F_MULTIRANGE_TYPANALYZE, /* analyze procedure */
+				   InvalidOid,	/* subscript procedure - none */
+				   InvalidOid,	/* element type ID - none */
+				   false,		/* this is not an array type */
+				   multirangeArrayOid,	/* array type we are about to create */
+				   InvalidOid,	/* base type ID (only for domains) */
+				   NULL,		/* never a default type value */
+				   NULL,		/* no binary form available either */
+				   false,		/* never passed by value */
+				   alignment,	/* alignment */
+				   'x',			/* TOAST strategy (always extended) */
+				   -1,			/* typMod (Domains only) */
+				   0,			/* Array dimensions of typbasetype */
+				   false,		/* Type NOT NULL */
+				   InvalidOid); /* type's collation (ranges never have one) */
+	Assert(multirangeOid == mltrngaddress.objectId);
+
+	/* Create the entry in pg_range */
+	RangeCreate(typoid, rangeSubtype, rangeCollation, rangeSubOpclass,
+				rangeCanonical, rangeSubtypeDiff, multirangeOid);
+
+	/*
+	 * Create the array type that goes with it.
+	 */
+	rangeArrayName = makeArrayTypeName(typeName, typeNamespace);
+
+	TypeCreate(rangeArrayOid,	/* force assignment of this type OID */
+			   rangeArrayName,	/* type name */
+			   typeNamespace,	/* namespace */
+			   InvalidOid,		/* relation oid (n/a here) */
+			   0,				/* relation kind (ditto) */
+			   GetUserId(),		/* owner's ID */
+			   -1,				/* internal size (always varlena) */
+			   TYPTYPE_BASE,	/* type-type (base type) */
+			   TYPCATEGORY_ARRAY,	/* type-category (array) */
+			   false,			/* array types are never preferred */
+			   DEFAULT_TYPDELIM,	/* array element delimiter */
+			   F_ARRAY_IN,		/* input procedure */
+			   F_ARRAY_OUT,		/* output procedure */
+			   F_ARRAY_RECV,	/* receive procedure */
+			   F_ARRAY_SEND,	/* send procedure */
+			   InvalidOid,		/* typmodin procedure - none */
+			   InvalidOid,		/* typmodout procedure - none */
+			   F_ARRAY_TYPANALYZE,	/* analyze procedure */
+			   F_ARRAY_SUBSCRIPT_HANDLER,	/* array subscript procedure */
+			   typoid,			/* element type ID */
+			   true,			/* yes this is an array type */
+			   InvalidOid,		/* no further array type */
+			   InvalidOid,		/* base type ID */
+			   NULL,			/* never a default type value */
+			   NULL,			/* binary default isn't sent either */
+			   false,			/* never passed by value */
+			   alignment,		/* alignment - same as range's */
+			   TYPSTORAGE_EXTENDED, /* ARRAY is always toastable */
+			   -1,				/* typMod (Domains only) */
+			   0,				/* Array dimensions of typbasetype */
+			   false,			/* Type NOT NULL */
+			   InvalidOid);		/* typcollation */
+
+	pfree(rangeArrayName);
+
+	/* Create the multirange's array type */
+
+	multirangeArrayName = makeArrayTypeName(multirangeTypeName, typeNamespace);
+
+	TypeCreate(multirangeArrayOid,	/* force assignment of this type OID */
+			   multirangeArrayName, /* type name */
+			   multirangeNamespace, /* namespace */
+			   InvalidOid,		/* relation oid (n/a here) */
+			   0,				/* relation kind (ditto) */
+			   GetUserId(),		/* owner's ID */
+			   -1,				/* internal size (always varlena) */
+			   TYPTYPE_BASE,	/* type-type (base type) */
+			   TYPCATEGORY_ARRAY,	/* type-category (array) */
+			   false,			/* array types are never preferred */
+			   DEFAULT_TYPDELIM,	/* array element delimiter */
+			   F_ARRAY_IN,		/* input procedure */
+			   F_ARRAY_OUT,		/* output procedure */
+			   F_ARRAY_RECV,	/* receive procedure */
+			   F_ARRAY_SEND,	/* send procedure */
+			   InvalidOid,		/* typmodin procedure - none */
+			   InvalidOid,		/* typmodout procedure - none */
+			   F_ARRAY_TYPANALYZE,	/* analyze procedure */
+			   F_ARRAY_SUBSCRIPT_HANDLER,	/* array subscript procedure */
+			   multirangeOid,	/* element type ID */
+			   true,			/* yes this is an array type */
+			   InvalidOid,		/* no further array type */
+			   InvalidOid,		/* base type ID */
+			   NULL,			/* never a default type value */
+			   NULL,			/* binary default isn't sent either */
+			   false,			/* never passed by value */
+			   alignment,		/* alignment - same as range's */
+			   'x',				/* ARRAY is always toastable */
+			   -1,				/* typMod (Domains only) */
+			   0,				/* Array dimensions of typbasetype */
+			   false,			/* Type NOT NULL */
+			   InvalidOid);		/* typcollation */
+
+	/* And create the constructor functions for this range type */
+	makeRangeConstructors(typeName, typeNamespace, typoid, rangeSubtype);
+	makeMultirangeConstructors(multirangeTypeName, typeNamespace,
+							   multirangeOid, typoid, rangeArrayOid,
+							   &castFuncOid);
+
+	/* Create cast from the range type to its multirange type */
+	CastCreate(typoid, multirangeOid, castFuncOid, 'e', 'f', DEPENDENCY_INTERNAL);
+
+	pfree(multirangeArrayName);
+
+	return address;
+}
+
+/*
+ * Because there may exist several range types over the same subtype, the
+ * range type can't be uniquely determined from the subtype.  So it's
+ * impossible to define a polymorphic constructor; we have to generate new
+ * constructor functions explicitly for each range type.
+ *
+ * We actually define 4 functions, with 0 through 3 arguments.  This is just
+ * to offer more convenience for the user.
+ */
+static void
+makeRangeConstructors(const char *name, Oid namespace,
+					  Oid rangeOid, Oid subtype)
+{
+	static const char *const prosrc[2] = {"range_constructor2",
+	"range_constructor3"};
+	static const int pronargs[2] = {2, 3};
+
+	Oid			constructorArgTypes[3];
+	ObjectAddress myself,
+				referenced;
+	int			i;
+
+	constructorArgTypes[0] = subtype;
+	constructorArgTypes[1] = subtype;
+	constructorArgTypes[2] = TEXTOID;
+
+	referenced.classId = TypeRelationId;
+	referenced.objectId = rangeOid;
+	referenced.objectSubId = 0;
+
+	for (i = 0; i < lengthof(prosrc); i++)
+	{
+		oidvector  *constructorArgTypesVector;
+
+		constructorArgTypesVector = buildoidvector(constructorArgTypes,
+												   pronargs[i]);
+
+		myself = ProcedureCreate(name,	/* name: same as range type */
+								 namespace, /* namespace */
+								 false, /* replace */
+								 false, /* returns set */
+								 rangeOid,	/* return type */
+								 BOOTSTRAP_SUPERUSERID, /* proowner */
+								 INTERNALlanguageId,	/* language */
+								 F_FMGR_INTERNAL_VALIDATOR, /* language validator */
+								 prosrc[i], /* prosrc */
+								 NULL,	/* probin */
+								 NULL,	/* prosqlbody */
+								 PROKIND_FUNCTION,
+								 false, /* security_definer */
+								 false, /* leakproof */
+								 false, /* isStrict */
+								 PROVOLATILE_IMMUTABLE, /* volatility */
+								 PROPARALLEL_SAFE,	/* parallel safety */
+								 constructorArgTypesVector, /* parameterTypes */
+								 PointerGetDatum(NULL), /* allParameterTypes */
+								 PointerGetDatum(NULL), /* parameterModes */
+								 PointerGetDatum(NULL), /* parameterNames */
+								 NIL,	/* parameterDefaults */
+								 PointerGetDatum(NULL), /* trftypes */
+								 PointerGetDatum(NULL), /* proconfig */
+								 InvalidOid,	/* prosupport */
+								 1.0,	/* procost */
+								 0.0);	/* prorows */
+
+		/*
+		 * Make the constructors internally-dependent on the range type so
+		 * that they go away silently when the type is dropped.  Note that
+		 * pg_dump depends on this choice to avoid dumping the constructors.
+		 */
+		recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
+	}
+}
+
+/*
+ * We make a separate multirange constructor for each range type
+ * so its name can include the base type, like range constructors do.
+ * If we had an anyrangearray polymorphic type we could use it here,
+ * but since each type has its own constructor name there's no need.
+ *
+ * Sets castFuncOid to the oid of the new constructor that can be used
+ * to cast from a range to a multirange.
+ */
+static void
+makeMultirangeConstructors(const char *name, Oid namespace,
+						   Oid multirangeOid, Oid rangeOid, Oid rangeArrayOid,
+						   Oid *castFuncOid)
+{
+	ObjectAddress myself,
+				referenced;
+	oidvector  *argtypes;
+	Datum		allParamTypes;
+	ArrayType  *allParameterTypes;
+	Datum		paramModes;
+	ArrayType  *parameterModes;
+
+	referenced.classId = TypeRelationId;
+	referenced.objectId = multirangeOid;
+	referenced.objectSubId = 0;
+
+	/* 0-arg constructor - for empty multiranges */
+	argtypes = buildoidvector(NULL, 0);
+	myself = ProcedureCreate(name,	/* name: same as multirange type */
+							 namespace,
+							 false, /* replace */
+							 false, /* returns set */
+							 multirangeOid, /* return type */
+							 BOOTSTRAP_SUPERUSERID, /* proowner */
+							 INTERNALlanguageId,	/* language */
+							 F_FMGR_INTERNAL_VALIDATOR,
+							 "multirange_constructor0", /* prosrc */
+							 NULL,	/* probin */
+							 NULL,	/* prosqlbody */
+							 PROKIND_FUNCTION,
+							 false, /* security_definer */
+							 false, /* leakproof */
+							 true,	/* isStrict */
+							 PROVOLATILE_IMMUTABLE, /* volatility */
+							 PROPARALLEL_SAFE,	/* parallel safety */
+							 argtypes,	/* parameterTypes */
+							 PointerGetDatum(NULL), /* allParameterTypes */
+							 PointerGetDatum(NULL), /* parameterModes */
+							 PointerGetDatum(NULL), /* parameterNames */
+							 NIL,	/* parameterDefaults */
+							 PointerGetDatum(NULL), /* trftypes */
+							 PointerGetDatum(NULL), /* proconfig */
+							 InvalidOid,	/* prosupport */
+							 1.0,	/* procost */
+							 0.0);	/* prorows */
+
+	/*
+	 * Make the constructor internally-dependent on the multirange type so
+	 * that they go away silently when the type is dropped.  Note that pg_dump
+	 * depends on this choice to avoid dumping the constructors.
+	 */
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
+	pfree(argtypes);
+
+	/*
+	 * 1-arg constructor - for casts
+	 *
+	 * In theory we shouldn't need both this and the vararg (n-arg)
+	 * constructor, but having a separate 1-arg function lets us define casts
+	 * against it.
+	 */
+	argtypes = buildoidvector(&rangeOid, 1);
+	myself = ProcedureCreate(name,	/* name: same as multirange type */
+							 namespace,
+							 false, /* replace */
+							 false, /* returns set */
+							 multirangeOid, /* return type */
+							 BOOTSTRAP_SUPERUSERID, /* proowner */
+							 INTERNALlanguageId,	/* language */
+							 F_FMGR_INTERNAL_VALIDATOR,
+							 "multirange_constructor1", /* prosrc */
+							 NULL,	/* probin */
+							 NULL,	/* prosqlbody */
+							 PROKIND_FUNCTION,
+							 false, /* security_definer */
+							 false, /* leakproof */
+							 true,	/* isStrict */
+							 PROVOLATILE_IMMUTABLE, /* volatility */
+							 PROPARALLEL_SAFE,	/* parallel safety */
+							 argtypes,	/* parameterTypes */
+							 PointerGetDatum(NULL), /* allParameterTypes */
+							 PointerGetDatum(NULL), /* parameterModes */
+							 PointerGetDatum(NULL), /* parameterNames */
+							 NIL,	/* parameterDefaults */
+							 PointerGetDatum(NULL), /* trftypes */
+							 PointerGetDatum(NULL), /* proconfig */
+							 InvalidOid,	/* prosupport */
+							 1.0,	/* procost */
+							 0.0);	/* prorows */
+	/* ditto */
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
+	pfree(argtypes);
+	*castFuncOid = myself.objectId;
+
+	/* n-arg constructor - vararg */
+	argtypes = buildoidvector(&rangeArrayOid, 1);
+	allParamTypes = ObjectIdGetDatum(rangeArrayOid);
+	allParameterTypes = construct_array(&allParamTypes,
+										1, OIDOID,
+										sizeof(Oid), true, TYPALIGN_INT);
+	paramModes = CharGetDatum(FUNC_PARAM_VARIADIC);
+	parameterModes = construct_array(&paramModes, 1, CHAROID,
+									 1, true, TYPALIGN_CHAR);
+	myself = ProcedureCreate(name,	/* name: same as multirange type */
+							 namespace,
+							 false, /* replace */
+							 false, /* returns set */
+							 multirangeOid, /* return type */
+							 BOOTSTRAP_SUPERUSERID, /* proowner */
+							 INTERNALlanguageId,	/* language */
+							 F_FMGR_INTERNAL_VALIDATOR,
+							 "multirange_constructor2", /* prosrc */
+							 NULL,	/* probin */
+							 NULL,	/* prosqlbody */
+							 PROKIND_FUNCTION,
+							 false, /* security_definer */
+							 false, /* leakproof */
+							 true,	/* isStrict */
+							 PROVOLATILE_IMMUTABLE, /* volatility */
+							 PROPARALLEL_SAFE,	/* parallel safety */
+							 argtypes,	/* parameterTypes */
+							 PointerGetDatum(allParameterTypes),	/* allParameterTypes */
+							 PointerGetDatum(parameterModes),	/* parameterModes */
+							 PointerGetDatum(NULL), /* parameterNames */
+							 NIL,	/* parameterDefaults */
+							 PointerGetDatum(NULL), /* trftypes */
+							 PointerGetDatum(NULL), /* proconfig */
+							 InvalidOid,	/* prosupport */
+							 1.0,	/* procost */
+							 0.0);	/* prorows */
+	/* ditto */
+	recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
+	pfree(argtypes);
+	pfree(allParameterTypes);
+	pfree(parameterModes);
+}
+
+/*
+ * Find suitable I/O and other support functions for a type.
+ *
+ * typeOid is the type's OID (which will already exist, if only as a shell
+ * type).
+ */
+
+static Oid
+findTypeInputFunction(List *procname, Oid typeOid)
+{
+	Oid			argList[3];
+	Oid			procOid;
+	Oid			procOid2;
+
+	/*
+	 * Input functions can take a single argument of type CSTRING, or three
+	 * arguments (string, typioparam OID, typmod).  Whine about ambiguity if
+	 * both forms exist.
+	 */
+	argList[0] = CSTRINGOID;
+	argList[1] = OIDOID;
+	argList[2] = INT4OID;
+
+	procOid = LookupFuncName(procname, 1, argList, true);
+	procOid2 = LookupFuncName(procname, 3, argList, true);
+	if (OidIsValid(procOid))
+	{
+		if (OidIsValid(procOid2))
+			ereport(ERROR,
+					(errcode(ERRCODE_AMBIGUOUS_FUNCTION),
+					 errmsg("type input function %s has multiple matches",
+							NameListToString(procname))));
+	}
+	else
+	{
+		procOid = procOid2;
+		/* If not found, reference the 1-argument signature in error msg */
+		if (!OidIsValid(procOid))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_FUNCTION),
+					 errmsg("function %s does not exist",
+							func_signature_string(procname, 1, NIL, argList))));
+	}
+
+	/* Input functions must return the target type. */
+	if (get_func_rettype(procOid) != typeOid)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("type input function %s must return type %s",
+						NameListToString(procname), format_type_be(typeOid))));
+
+	/*
+	 * Print warnings if any of the type's I/O functions are marked volatile.
+	 * There is a general assumption that I/O functions are stable or
+	 * immutable; this allows us for example to mark record_in/record_out
+	 * stable rather than volatile.  Ideally we would throw errors not just
+	 * warnings here; but since this check is new as of 9.5, and since the
+	 * volatility marking might be just an error-of-omission and not a true
+	 * indication of how the function behaves, we'll let it pass as a warning
+	 * for now.
+	 */
+	if (func_volatile(procOid) == PROVOLATILE_VOLATILE)
+		ereport(WARNING,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("type input function %s should not be volatile",
+						NameListToString(procname))));
+
+	return procOid;
+}
+
+static Oid
+findTypeOutputFunction(List *procname, Oid typeOid)
+{
+	Oid			argList[1];
+	Oid			procOid;
+
+	/*
+	 * Output functions always take a single argument of the type and return
+	 * cstring.
+	 */
+	argList[0] = typeOid;
+
+	procOid = LookupFuncName(procname, 1, argList, true);
+	if (!OidIsValid(procOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_FUNCTION),
+				 errmsg("function %s does not exist",
+						func_signature_string(procname, 1, NIL, argList))));
+
+	if (get_func_rettype(procOid) != CSTRINGOID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("type output function %s must return type %s",
+						NameListToString(procname), "cstring")));
+
+	/* Just a warning for now, per comments in findTypeInputFunction */
+	if (func_volatile(procOid) == PROVOLATILE_VOLATILE)
+		ereport(WARNING,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("type output function %s should not be volatile",
+						NameListToString(procname))));
+
+	return procOid;
+}
+
+static Oid
+findTypeReceiveFunction(List *procname, Oid typeOid)
+{
+	Oid			argList[3];
+	Oid			procOid;
+	Oid			procOid2;
+
+	/*
+	 * Receive functions can take a single argument of type INTERNAL, or three
+	 * arguments (internal, typioparam OID, typmod).  Whine about ambiguity if
+	 * both forms exist.
+	 */
+	argList[0] = INTERNALOID;
+	argList[1] = OIDOID;
+	argList[2] = INT4OID;
+
+	procOid = LookupFuncName(procname, 1, argList, true);
+	procOid2 = LookupFuncName(procname, 3, argList, true);
+	if (OidIsValid(procOid))
+	{
+		if (OidIsValid(procOid2))
+			ereport(ERROR,
+					(errcode(ERRCODE_AMBIGUOUS_FUNCTION),
+					 errmsg("type receive function %s has multiple matches",
+							NameListToString(procname))));
+	}
+	else
+	{
+		procOid = procOid2;
+		/* If not found, reference the 1-argument signature in error msg */
+		if (!OidIsValid(procOid))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_FUNCTION),
+					 errmsg("function %s does not exist",
+							func_signature_string(procname, 1, NIL, argList))));
+	}
+
+	/* Receive functions must return the target type. */
+	if (get_func_rettype(procOid) != typeOid)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("type receive function %s must return type %s",
+						NameListToString(procname), format_type_be(typeOid))));
+
+	/* Just a warning for now, per comments in findTypeInputFunction */
+	if (func_volatile(procOid) == PROVOLATILE_VOLATILE)
+		ereport(WARNING,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("type receive function %s should not be volatile",
+						NameListToString(procname))));
+
+	return procOid;
+}
+
+static Oid
+findTypeSendFunction(List *procname, Oid typeOid)
+{
+	Oid			argList[1];
+	Oid			procOid;
+
+	/*
+	 * Send functions always take a single argument of the type and return
+	 * bytea.
+	 */
+	argList[0] = typeOid;
+
+	procOid = LookupFuncName(procname, 1, argList, true);
+	if (!OidIsValid(procOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_FUNCTION),
+				 errmsg("function %s does not exist",
+						func_signature_string(procname, 1, NIL, argList))));
+
+	if (get_func_rettype(procOid) != BYTEAOID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("type send function %s must return type %s",
+						NameListToString(procname), "bytea")));
+
+	/* Just a warning for now, per comments in findTypeInputFunction */
+	if (func_volatile(procOid) == PROVOLATILE_VOLATILE)
+		ereport(WARNING,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("type send function %s should not be volatile",
+						NameListToString(procname))));
+
+	return procOid;
+}
+
+static Oid
+findTypeTypmodinFunction(List *procname)
+{
+	Oid			argList[1];
+	Oid			procOid;
+
+	/*
+	 * typmodin functions always take one cstring[] argument and return int4.
+	 */
+	argList[0] = CSTRINGARRAYOID;
+
+	procOid = LookupFuncName(procname, 1, argList, true);
+	if (!OidIsValid(procOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_FUNCTION),
+				 errmsg("function %s does not exist",
+						func_signature_string(procname, 1, NIL, argList))));
+
+	if (get_func_rettype(procOid) != INT4OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("typmod_in function %s must return type %s",
+						NameListToString(procname), "integer")));
+
+	/* Just a warning for now, per comments in findTypeInputFunction */
+	if (func_volatile(procOid) == PROVOLATILE_VOLATILE)
+		ereport(WARNING,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("type modifier input function %s should not be volatile",
+						NameListToString(procname))));
+
+	return procOid;
+}
+
+static Oid
+findTypeTypmodoutFunction(List *procname)
+{
+	Oid			argList[1];
+	Oid			procOid;
+
+	/*
+	 * typmodout functions always take one int4 argument and return cstring.
+	 */
+	argList[0] = INT4OID;
+
+	procOid = LookupFuncName(procname, 1, argList, true);
+	if (!OidIsValid(procOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_FUNCTION),
+				 errmsg("function %s does not exist",
+						func_signature_string(procname, 1, NIL, argList))));
+
+	if (get_func_rettype(procOid) != CSTRINGOID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("typmod_out function %s must return type %s",
+						NameListToString(procname), "cstring")));
+
+	/* Just a warning for now, per comments in findTypeInputFunction */
+	if (func_volatile(procOid) == PROVOLATILE_VOLATILE)
+		ereport(WARNING,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("type modifier output function %s should not be volatile",
+						NameListToString(procname))));
+
+	return procOid;
+}
+
+static Oid
+findTypeAnalyzeFunction(List *procname, Oid typeOid)
+{
+	Oid			argList[1];
+	Oid			procOid;
+
+	/*
+	 * Analyze functions always take one INTERNAL argument and return bool.
+	 */
+	argList[0] = INTERNALOID;
+
+	procOid = LookupFuncName(procname, 1, argList, true);
+	if (!OidIsValid(procOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_FUNCTION),
+				 errmsg("function %s does not exist",
+						func_signature_string(procname, 1, NIL, argList))));
+
+	if (get_func_rettype(procOid) != BOOLOID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("type analyze function %s must return type %s",
+						NameListToString(procname), "boolean")));
+
+	return procOid;
+}
+
+static Oid
+findTypeSubscriptingFunction(List *procname, Oid typeOid)
+{
+	Oid			argList[1];
+	Oid			procOid;
+
+	/*
+	 * Subscripting support functions always take one INTERNAL argument and
+	 * return INTERNAL.  (The argument is not used, but we must have it to
+	 * maintain type safety.)
+	 */
+	argList[0] = INTERNALOID;
+
+	procOid = LookupFuncName(procname, 1, argList, true);
+	if (!OidIsValid(procOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_FUNCTION),
+				 errmsg("function %s does not exist",
+						func_signature_string(procname, 1, NIL, argList))));
+
+	if (get_func_rettype(procOid) != INTERNALOID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("type subscripting function %s must return type %s",
+						NameListToString(procname), "internal")));
+
+	/*
+	 * We disallow array_subscript_handler() from being selected explicitly,
+	 * since that must only be applied to autogenerated array types.
+	 */
+	if (procOid == F_ARRAY_SUBSCRIPT_HANDLER)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("user-defined types cannot use subscripting function %s",
+						NameListToString(procname))));
+
+	return procOid;
+}
+
+/*
+ * Find suitable support functions and opclasses for a range type.
+ */
+
+/*
+ * Find named btree opclass for subtype, or default btree opclass if
+ * opcname is NIL.
+ */
+static Oid
+findRangeSubOpclass(List *opcname, Oid subtype)
+{
+	Oid			opcid;
+	Oid			opInputType;
+
+	if (opcname != NIL)
+	{
+		opcid = get_opclass_oid(BTREE_AM_OID, opcname, false);
+
+		/*
+		 * Verify that the operator class accepts this datatype. Note we will
+		 * accept binary compatibility.
+		 */
+		opInputType = get_opclass_input_type(opcid);
+		if (!IsBinaryCoercible(subtype, opInputType))
+			ereport(ERROR,
+					(errcode(ERRCODE_DATATYPE_MISMATCH),
+					 errmsg("operator class \"%s\" does not accept data type %s",
+							NameListToString(opcname),
+							format_type_be(subtype))));
+	}
+	else
+	{
+		opcid = GetDefaultOpClass(subtype, BTREE_AM_OID);
+		if (!OidIsValid(opcid))
+		{
+			/* We spell the error message identically to ResolveOpClass */
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("data type %s has no default operator class for access method \"%s\"",
+							format_type_be(subtype), "btree"),
+					 errhint("You must specify an operator class for the range type or define a default operator class for the subtype.")));
+		}
+	}
+
+	return opcid;
+}
+
+static Oid
+findRangeCanonicalFunction(List *procname, Oid typeOid)
+{
+	Oid			argList[1];
+	Oid			procOid;
+	AclResult	aclresult;
+
+	/*
+	 * Range canonical functions must take and return the range type, and must
+	 * be immutable.
+	 */
+	argList[0] = typeOid;
+
+	procOid = LookupFuncName(procname, 1, argList, true);
+
+	if (!OidIsValid(procOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_FUNCTION),
+				 errmsg("function %s does not exist",
+						func_signature_string(procname, 1, NIL, argList))));
+
+	if (get_func_rettype(procOid) != typeOid)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("range canonical function %s must return range type",
+						func_signature_string(procname, 1, NIL, argList))));
+
+	if (func_volatile(procOid) != PROVOLATILE_IMMUTABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("range canonical function %s must be immutable",
+						func_signature_string(procname, 1, NIL, argList))));
+
+	/* Also, range type's creator must have permission to call function */
+	aclresult = pg_proc_aclcheck(procOid, GetUserId(), ACL_EXECUTE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_FUNCTION, get_func_name(procOid));
+
+	return procOid;
+}
+
+static Oid
+findRangeSubtypeDiffFunction(List *procname, Oid subtype)
+{
+	Oid			argList[2];
+	Oid			procOid;
+	AclResult	aclresult;
+
+	/*
+	 * Range subtype diff functions must take two arguments of the subtype,
+	 * must return float8, and must be immutable.
+	 */
+	argList[0] = subtype;
+	argList[1] = subtype;
+
+	procOid = LookupFuncName(procname, 2, argList, true);
+
+	if (!OidIsValid(procOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_FUNCTION),
+				 errmsg("function %s does not exist",
+						func_signature_string(procname, 2, NIL, argList))));
+
+	if (get_func_rettype(procOid) != FLOAT8OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("range subtype diff function %s must return type %s",
+						func_signature_string(procname, 2, NIL, argList),
+						"double precision")));
+
+	if (func_volatile(procOid) != PROVOLATILE_IMMUTABLE)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("range subtype diff function %s must be immutable",
+						func_signature_string(procname, 2, NIL, argList))));
+
+	/* Also, range type's creator must have permission to call function */
+	aclresult = pg_proc_aclcheck(procOid, GetUserId(), ACL_EXECUTE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_FUNCTION, get_func_name(procOid));
+
+	return procOid;
+}
+
+/*
+ *	AssignTypeArrayOid
+ *
+ *	Pre-assign the type's array OID for use in pg_type.typarray
+ */
+Oid
+AssignTypeArrayOid(void)
+{
+	Oid			type_array_oid;
+
+	/* Use binary-upgrade override for pg_type.typarray? */
+	if (IsBinaryUpgrade)
+	{
+		if (!OidIsValid(binary_upgrade_next_array_pg_type_oid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("pg_type array OID value not set when in binary upgrade mode")));
+
+		type_array_oid = binary_upgrade_next_array_pg_type_oid;
+		binary_upgrade_next_array_pg_type_oid = InvalidOid;
+	}
+	else
+	{
+		Relation	pg_type = table_open(TypeRelationId, AccessShareLock);
+
+		type_array_oid = GetNewOidWithIndex(pg_type, TypeOidIndexId,
+											Anum_pg_type_oid);
+		table_close(pg_type, AccessShareLock);
+	}
+
+	return type_array_oid;
+}
+
+/*
+ *	AssignTypeMultirangeOid
+ *
+ *	Pre-assign the range type's multirange OID for use in pg_type.oid
+ */
+Oid
+AssignTypeMultirangeOid(void)
+{
+	Oid			type_multirange_oid;
+
+	/* Use binary-upgrade override for pg_type.oid? */
+	if (IsBinaryUpgrade)
+	{
+		if (!OidIsValid(binary_upgrade_next_mrng_pg_type_oid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("pg_type multirange OID value not set when in binary upgrade mode")));
+
+		type_multirange_oid = binary_upgrade_next_mrng_pg_type_oid;
+		binary_upgrade_next_mrng_pg_type_oid = InvalidOid;
+	}
+	else
+	{
+		Relation	pg_type = table_open(TypeRelationId, AccessShareLock);
+
+		type_multirange_oid = GetNewOidWithIndex(pg_type, TypeOidIndexId,
+												 Anum_pg_type_oid);
+		table_close(pg_type, AccessShareLock);
+	}
+
+	return type_multirange_oid;
+}
+
+/*
+ *	AssignTypeMultirangeArrayOid
+ *
+ *	Pre-assign the range type's multirange array OID for use in pg_type.typarray
+ */
+Oid
+AssignTypeMultirangeArrayOid(void)
+{
+	Oid			type_multirange_array_oid;
+
+	/* Use binary-upgrade override for pg_type.oid? */
+	if (IsBinaryUpgrade)
+	{
+		if (!OidIsValid(binary_upgrade_next_mrng_array_pg_type_oid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("pg_type multirange array OID value not set when in binary upgrade mode")));
+
+		type_multirange_array_oid = binary_upgrade_next_mrng_array_pg_type_oid;
+		binary_upgrade_next_mrng_array_pg_type_oid = InvalidOid;
+	}
+	else
+	{
+		Relation	pg_type = table_open(TypeRelationId, AccessShareLock);
+
+		type_multirange_array_oid = GetNewOidWithIndex(pg_type, TypeOidIndexId,
+													   Anum_pg_type_oid);
+		table_close(pg_type, AccessShareLock);
+	}
+
+	return type_multirange_array_oid;
+}
+
+
+/*-------------------------------------------------------------------
+ * DefineCompositeType
+ *
+ * Create a Composite Type relation.
+ * `DefineRelation' does all the work, we just provide the correct
+ * arguments!
+ *
+ * If the relation already exists, then 'DefineRelation' will abort
+ * the xact...
+ *
+ * Return type is the new type's object address.
+ *-------------------------------------------------------------------
+ */
+ObjectAddress
+DefineCompositeType(RangeVar *typevar, List *coldeflist)
+{
+	CreateStmt *createStmt = makeNode(CreateStmt);
+	Oid			old_type_oid;
+	Oid			typeNamespace;
+	ObjectAddress address;
+
+	/*
+	 * now set the parameters for keys/inheritance etc. All of these are
+	 * uninteresting for composite types...
+	 */
+	createStmt->relation = typevar;
+	createStmt->tableElts = coldeflist;
+	createStmt->inhRelations = NIL;
+	createStmt->constraints = NIL;
+	createStmt->options = NIL;
+	createStmt->oncommit = ONCOMMIT_NOOP;
+	createStmt->tablespacename = NULL;
+	createStmt->if_not_exists = false;
+
+	/*
+	 * Check for collision with an existing type name. If there is one and
+	 * it's an autogenerated array, we can rename it out of the way.  This
+	 * check is here mainly to get a better error message about a "type"
+	 * instead of below about a "relation".
+	 */
+	typeNamespace = RangeVarGetAndCheckCreationNamespace(createStmt->relation,
+														 NoLock, NULL);
+	RangeVarAdjustRelationPersistence(createStmt->relation, typeNamespace);
+	old_type_oid =
+		GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid,
+						CStringGetDatum(createStmt->relation->relname),
+						ObjectIdGetDatum(typeNamespace));
+	if (OidIsValid(old_type_oid))
+	{
+		if (!moveArrayTypeName(old_type_oid, createStmt->relation->relname, typeNamespace))
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("type \"%s\" already exists", createStmt->relation->relname)));
+	}
+
+	/*
+	 * Finally create the relation.  This also creates the type.
+	 */
+	DefineRelation(createStmt, RELKIND_COMPOSITE_TYPE, InvalidOid, &address,
+				   NULL);
+
+	return address;
+}
+
+/*
+ * AlterDomainDefault
+ *
+ * Routine implementing ALTER DOMAIN SET/DROP DEFAULT statements.
+ *
+ * Returns ObjectAddress of the modified domain.
+ */
+ObjectAddress
+AlterDomainDefault(List *names, Node *defaultRaw)
+{
+	TypeName   *typename;
+	Oid			domainoid;
+	HeapTuple	tup;
+	ParseState *pstate;
+	Relation	rel;
+	char	   *defaultValue;
+	Node	   *defaultExpr = NULL; /* NULL if no default specified */
+	Datum		new_record[Natts_pg_type];
+	bool		new_record_nulls[Natts_pg_type];
+	bool		new_record_repl[Natts_pg_type];
+	HeapTuple	newtuple;
+	Form_pg_type typTup;
+	ObjectAddress address;
+
+	/* Make a TypeName so we can use standard type lookup machinery */
+	typename = makeTypeNameFromNameList(names);
+	domainoid = typenameTypeId(NULL, typename);
+
+	/* Look up the domain in the type table */
+	rel = table_open(TypeRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(domainoid));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for type %u", domainoid);
+	typTup = (Form_pg_type) GETSTRUCT(tup);
+
+	/* Check it's a domain and check user has permission for ALTER DOMAIN */
+	checkDomainOwner(tup);
+
+	/* Setup new tuple */
+	MemSet(new_record, (Datum) 0, sizeof(new_record));
+	MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+	MemSet(new_record_repl, false, sizeof(new_record_repl));
+
+	/* Store the new default into the tuple */
+	if (defaultRaw)
+	{
+		/* Create a dummy ParseState for transformExpr */
+		pstate = make_parsestate(NULL);
+
+		/*
+		 * Cook the colDef->raw_expr into an expression. Note: Name is
+		 * strictly for error message
+		 */
+		defaultExpr = cookDefault(pstate, defaultRaw,
+								  typTup->typbasetype,
+								  typTup->typtypmod,
+								  NameStr(typTup->typname),
+								  0);
+
+		/*
+		 * If the expression is just a NULL constant, we treat the command
+		 * like ALTER ... DROP DEFAULT.  (But see note for same test in
+		 * DefineDomain.)
+		 */
+		if (defaultExpr == NULL ||
+			(IsA(defaultExpr, Const) && ((Const *) defaultExpr)->constisnull))
+		{
+			/* Default is NULL, drop it */
+			defaultExpr = NULL;
+			new_record_nulls[Anum_pg_type_typdefaultbin - 1] = true;
+			new_record_repl[Anum_pg_type_typdefaultbin - 1] = true;
+			new_record_nulls[Anum_pg_type_typdefault - 1] = true;
+			new_record_repl[Anum_pg_type_typdefault - 1] = true;
+		}
+		else
+		{
+			/*
+			 * Expression must be stored as a nodeToString result, but we also
+			 * require a valid textual representation (mainly to make life
+			 * easier for pg_dump).
+			 */
+			defaultValue = deparse_expression(defaultExpr,
+											  NIL, false, false);
+
+			/*
+			 * Form an updated tuple with the new default and write it back.
+			 */
+			new_record[Anum_pg_type_typdefaultbin - 1] = CStringGetTextDatum(nodeToString(defaultExpr));
+
+			new_record_repl[Anum_pg_type_typdefaultbin - 1] = true;
+			new_record[Anum_pg_type_typdefault - 1] = CStringGetTextDatum(defaultValue);
+			new_record_repl[Anum_pg_type_typdefault - 1] = true;
+		}
+	}
+	else
+	{
+		/* ALTER ... DROP DEFAULT */
+		new_record_nulls[Anum_pg_type_typdefaultbin - 1] = true;
+		new_record_repl[Anum_pg_type_typdefaultbin - 1] = true;
+		new_record_nulls[Anum_pg_type_typdefault - 1] = true;
+		new_record_repl[Anum_pg_type_typdefault - 1] = true;
+	}
+
+	newtuple = heap_modify_tuple(tup, RelationGetDescr(rel),
+								 new_record, new_record_nulls,
+								 new_record_repl);
+
+	CatalogTupleUpdate(rel, &tup->t_self, newtuple);
+
+	/* Rebuild dependencies */
+	GenerateTypeDependencies(newtuple,
+							 rel,
+							 defaultExpr,
+							 NULL,	/* don't have typacl handy */
+							 0, /* relation kind is n/a */
+							 false, /* a domain isn't an implicit array */
+							 false, /* nor is it any kind of dependent type */
+							 false, /* don't touch extension membership */
+							 true); /* We do need to rebuild dependencies */
+
+	InvokeObjectPostAlterHook(TypeRelationId, domainoid, 0);
+
+	ObjectAddressSet(address, TypeRelationId, domainoid);
+
+	/* Clean up */
+	table_close(rel, RowExclusiveLock);
+	heap_freetuple(newtuple);
+
+	return address;
+}
+
+/*
+ * AlterDomainNotNull
+ *
+ * Routine implementing ALTER DOMAIN SET/DROP NOT NULL statements.
+ *
+ * Returns ObjectAddress of the modified domain.
+ */
+ObjectAddress
+AlterDomainNotNull(List *names, bool notNull)
+{
+	TypeName   *typename;
+	Oid			domainoid;
+	Relation	typrel;
+	HeapTuple	tup;
+	Form_pg_type typTup;
+	ObjectAddress address = InvalidObjectAddress;
+
+	/* Make a TypeName so we can use standard type lookup machinery */
+	typename = makeTypeNameFromNameList(names);
+	domainoid = typenameTypeId(NULL, typename);
+
+	/* Look up the domain in the type table */
+	typrel = table_open(TypeRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(domainoid));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for type %u", domainoid);
+	typTup = (Form_pg_type) GETSTRUCT(tup);
+
+	/* Check it's a domain and check user has permission for ALTER DOMAIN */
+	checkDomainOwner(tup);
+
+	/* Is the domain already set to the desired constraint? */
+	if (typTup->typnotnull == notNull)
+	{
+		table_close(typrel, RowExclusiveLock);
+		return address;
+	}
+
+	/* Adding a NOT NULL constraint requires checking existing columns */
+	if (notNull)
+	{
+		List	   *rels;
+		ListCell   *rt;
+
+		/* Fetch relation list with attributes based on this domain */
+		/* ShareLock is sufficient to prevent concurrent data changes */
+
+		rels = get_rels_with_domain(domainoid, ShareLock);
+
+		foreach(rt, rels)
+		{
+			RelToCheck *rtc = (RelToCheck *) lfirst(rt);
+			Relation	testrel = rtc->rel;
+			TupleDesc	tupdesc = RelationGetDescr(testrel);
+			TupleTableSlot *slot;
+			TableScanDesc scan;
+			Snapshot	snapshot;
+
+			/* Scan all tuples in this relation */
+			snapshot = RegisterSnapshot(GetLatestSnapshot());
+			scan = table_beginscan(testrel, snapshot, 0, NULL);
+			slot = table_slot_create(testrel, NULL);
+			while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
+			{
+				int			i;
+
+				/* Test attributes that are of the domain */
+				for (i = 0; i < rtc->natts; i++)
+				{
+					int			attnum = rtc->atts[i];
+					Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1);
+
+					if (slot_attisnull(slot, attnum))
+					{
+						/*
+						 * In principle the auxiliary information for this
+						 * error should be errdatatype(), but errtablecol()
+						 * seems considerably more useful in practice.  Since
+						 * this code only executes in an ALTER DOMAIN command,
+						 * the client should already know which domain is in
+						 * question.
+						 */
+						ereport(ERROR,
+								(errcode(ERRCODE_NOT_NULL_VIOLATION),
+								 errmsg("column \"%s\" of table \"%s\" contains null values",
+										NameStr(attr->attname),
+										RelationGetRelationName(testrel)),
+								 errtablecol(testrel, attnum)));
+					}
+				}
+			}
+			ExecDropSingleTupleTableSlot(slot);
+			table_endscan(scan);
+			UnregisterSnapshot(snapshot);
+
+			/* Close each rel after processing, but keep lock */
+			table_close(testrel, NoLock);
+		}
+	}
+
+	/*
+	 * Okay to update pg_type row.  We can scribble on typTup because it's a
+	 * copy.
+	 */
+	typTup->typnotnull = notNull;
+
+	CatalogTupleUpdate(typrel, &tup->t_self, tup);
+
+	InvokeObjectPostAlterHook(TypeRelationId, domainoid, 0);
+
+	ObjectAddressSet(address, TypeRelationId, domainoid);
+
+	/* Clean up */
+	heap_freetuple(tup);
+	table_close(typrel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * AlterDomainDropConstraint
+ *
+ * Implements the ALTER DOMAIN DROP CONSTRAINT statement
+ *
+ * Returns ObjectAddress of the modified domain.
+ */
+ObjectAddress
+AlterDomainDropConstraint(List *names, const char *constrName,
+						  DropBehavior behavior, bool missing_ok)
+{
+	TypeName   *typename;
+	Oid			domainoid;
+	HeapTuple	tup;
+	Relation	rel;
+	Relation	conrel;
+	SysScanDesc conscan;
+	ScanKeyData skey[3];
+	HeapTuple	contup;
+	bool		found = false;
+	ObjectAddress address;
+
+	/* Make a TypeName so we can use standard type lookup machinery */
+	typename = makeTypeNameFromNameList(names);
+	domainoid = typenameTypeId(NULL, typename);
+
+	/* Look up the domain in the type table */
+	rel = table_open(TypeRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(domainoid));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for type %u", domainoid);
+
+	/* Check it's a domain and check user has permission for ALTER DOMAIN */
+	checkDomainOwner(tup);
+
+	/* Grab an appropriate lock on the pg_constraint relation */
+	conrel = table_open(ConstraintRelationId, RowExclusiveLock);
+
+	/* Find and remove the target constraint */
+	ScanKeyInit(&skey[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(InvalidOid));
+	ScanKeyInit(&skey[1],
+				Anum_pg_constraint_contypid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(domainoid));
+	ScanKeyInit(&skey[2],
+				Anum_pg_constraint_conname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(constrName));
+
+	conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
+								 NULL, 3, skey);
+
+	/* There can be at most one matching row */
+	if ((contup = systable_getnext(conscan)) != NULL)
+	{
+		ObjectAddress conobj;
+
+		conobj.classId = ConstraintRelationId;
+		conobj.objectId = ((Form_pg_constraint) GETSTRUCT(contup))->oid;
+		conobj.objectSubId = 0;
+
+		performDeletion(&conobj, behavior, 0);
+		found = true;
+	}
+
+	/* Clean up after the scan */
+	systable_endscan(conscan);
+	table_close(conrel, RowExclusiveLock);
+
+	if (!found)
+	{
+		if (!missing_ok)
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("constraint \"%s\" of domain \"%s\" does not exist",
+							constrName, TypeNameToString(typename))));
+		else
+			ereport(NOTICE,
+					(errmsg("constraint \"%s\" of domain \"%s\" does not exist, skipping",
+							constrName, TypeNameToString(typename))));
+	}
+
+	/*
+	 * We must send out an sinval message for the domain, to ensure that any
+	 * dependent plans get rebuilt.  Since this command doesn't change the
+	 * domain's pg_type row, that won't happen automatically; do it manually.
+	 */
+	CacheInvalidateHeapTuple(rel, tup, NULL);
+
+	ObjectAddressSet(address, TypeRelationId, domainoid);
+
+	/* Clean up */
+	table_close(rel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * AlterDomainAddConstraint
+ *
+ * Implements the ALTER DOMAIN .. ADD CONSTRAINT statement.
+ */
+ObjectAddress
+AlterDomainAddConstraint(List *names, Node *newConstraint,
+						 ObjectAddress *constrAddr)
+{
+	TypeName   *typename;
+	Oid			domainoid;
+	Relation	typrel;
+	HeapTuple	tup;
+	Form_pg_type typTup;
+	Constraint *constr;
+	char	   *ccbin;
+	ObjectAddress address;
+
+	/* Make a TypeName so we can use standard type lookup machinery */
+	typename = makeTypeNameFromNameList(names);
+	domainoid = typenameTypeId(NULL, typename);
+
+	/* Look up the domain in the type table */
+	typrel = table_open(TypeRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(domainoid));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for type %u", domainoid);
+	typTup = (Form_pg_type) GETSTRUCT(tup);
+
+	/* Check it's a domain and check user has permission for ALTER DOMAIN */
+	checkDomainOwner(tup);
+
+	if (!IsA(newConstraint, Constraint))
+		elog(ERROR, "unrecognized node type: %d",
+			 (int) nodeTag(newConstraint));
+
+	constr = (Constraint *) newConstraint;
+
+	switch (constr->contype)
+	{
+		case CONSTR_CHECK:
+			/* processed below */
+			break;
+
+		case CONSTR_UNIQUE:
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unique constraints not possible for domains")));
+			break;
+
+		case CONSTR_PRIMARY:
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("primary key constraints not possible for domains")));
+			break;
+
+		case CONSTR_EXCLUSION:
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("exclusion constraints not possible for domains")));
+			break;
+
+		case CONSTR_FOREIGN:
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("foreign key constraints not possible for domains")));
+			break;
+
+		case CONSTR_ATTR_DEFERRABLE:
+		case CONSTR_ATTR_NOT_DEFERRABLE:
+		case CONSTR_ATTR_DEFERRED:
+		case CONSTR_ATTR_IMMEDIATE:
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("specifying constraint deferrability not supported for domains")));
+			break;
+
+		default:
+			elog(ERROR, "unrecognized constraint subtype: %d",
+				 (int) constr->contype);
+			break;
+	}
+
+	/*
+	 * Since all other constraint types throw errors, this must be a check
+	 * constraint.  First, process the constraint expression and add an entry
+	 * to pg_constraint.
+	 */
+
+	ccbin = domainAddConstraint(domainoid, typTup->typnamespace,
+								typTup->typbasetype, typTup->typtypmod,
+								constr, NameStr(typTup->typname), constrAddr);
+
+	/*
+	 * If requested to validate the constraint, test all values stored in the
+	 * attributes based on the domain the constraint is being added to.
+	 */
+	if (!constr->skip_validation)
+		validateDomainConstraint(domainoid, ccbin);
+
+	/*
+	 * We must send out an sinval message for the domain, to ensure that any
+	 * dependent plans get rebuilt.  Since this command doesn't change the
+	 * domain's pg_type row, that won't happen automatically; do it manually.
+	 */
+	CacheInvalidateHeapTuple(typrel, tup, NULL);
+
+	ObjectAddressSet(address, TypeRelationId, domainoid);
+
+	/* Clean up */
+	table_close(typrel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * AlterDomainValidateConstraint
+ *
+ * Implements the ALTER DOMAIN .. VALIDATE CONSTRAINT statement.
+ */
+ObjectAddress
+AlterDomainValidateConstraint(List *names, const char *constrName)
+{
+	TypeName   *typename;
+	Oid			domainoid;
+	Relation	typrel;
+	Relation	conrel;
+	HeapTuple	tup;
+	Form_pg_constraint con;
+	Form_pg_constraint copy_con;
+	char	   *conbin;
+	SysScanDesc scan;
+	Datum		val;
+	bool		isnull;
+	HeapTuple	tuple;
+	HeapTuple	copyTuple;
+	ScanKeyData skey[3];
+	ObjectAddress address;
+
+	/* Make a TypeName so we can use standard type lookup machinery */
+	typename = makeTypeNameFromNameList(names);
+	domainoid = typenameTypeId(NULL, typename);
+
+	/* Look up the domain in the type table */
+	typrel = table_open(TypeRelationId, AccessShareLock);
+
+	tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(domainoid));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for type %u", domainoid);
+
+	/* Check it's a domain and check user has permission for ALTER DOMAIN */
+	checkDomainOwner(tup);
+
+	/*
+	 * Find and check the target constraint
+	 */
+	conrel = table_open(ConstraintRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&skey[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(InvalidOid));
+	ScanKeyInit(&skey[1],
+				Anum_pg_constraint_contypid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(domainoid));
+	ScanKeyInit(&skey[2],
+				Anum_pg_constraint_conname,
+				BTEqualStrategyNumber, F_NAMEEQ,
+				CStringGetDatum(constrName));
+
+	scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
+							  NULL, 3, skey);
+
+	/* There can be at most one matching row */
+	if (!HeapTupleIsValid(tuple = systable_getnext(scan)))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("constraint \"%s\" of domain \"%s\" does not exist",
+						constrName, TypeNameToString(typename))));
+
+	con = (Form_pg_constraint) GETSTRUCT(tuple);
+	if (con->contype != CONSTRAINT_CHECK)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("constraint \"%s\" of domain \"%s\" is not a check constraint",
+						constrName, TypeNameToString(typename))));
+
+	val = SysCacheGetAttr(CONSTROID, tuple,
+						  Anum_pg_constraint_conbin,
+						  &isnull);
+	if (isnull)
+		elog(ERROR, "null conbin for constraint %u",
+			 con->oid);
+	conbin = TextDatumGetCString(val);
+
+	validateDomainConstraint(domainoid, conbin);
+
+	/*
+	 * Now update the catalog, while we have the door open.
+	 */
+	copyTuple = heap_copytuple(tuple);
+	copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple);
+	copy_con->convalidated = true;
+	CatalogTupleUpdate(conrel, &copyTuple->t_self, copyTuple);
+
+	InvokeObjectPostAlterHook(ConstraintRelationId, con->oid, 0);
+
+	ObjectAddressSet(address, TypeRelationId, domainoid);
+
+	heap_freetuple(copyTuple);
+
+	systable_endscan(scan);
+
+	table_close(typrel, AccessShareLock);
+	table_close(conrel, RowExclusiveLock);
+
+	ReleaseSysCache(tup);
+
+	return address;
+}
+
+static void
+validateDomainConstraint(Oid domainoid, char *ccbin)
+{
+	Expr	   *expr = (Expr *) stringToNode(ccbin);
+	List	   *rels;
+	ListCell   *rt;
+	EState	   *estate;
+	ExprContext *econtext;
+	ExprState  *exprstate;
+
+	/* Need an EState to run ExecEvalExpr */
+	estate = CreateExecutorState();
+	econtext = GetPerTupleExprContext(estate);
+
+	/* build execution state for expr */
+	exprstate = ExecPrepareExpr(expr, estate);
+
+	/* Fetch relation list with attributes based on this domain */
+	/* ShareLock is sufficient to prevent concurrent data changes */
+
+	rels = get_rels_with_domain(domainoid, ShareLock);
+
+	foreach(rt, rels)
+	{
+		RelToCheck *rtc = (RelToCheck *) lfirst(rt);
+		Relation	testrel = rtc->rel;
+		TupleDesc	tupdesc = RelationGetDescr(testrel);
+		TupleTableSlot *slot;
+		TableScanDesc scan;
+		Snapshot	snapshot;
+
+		/* Scan all tuples in this relation */
+		snapshot = RegisterSnapshot(GetLatestSnapshot());
+		scan = table_beginscan(testrel, snapshot, 0, NULL);
+		slot = table_slot_create(testrel, NULL);
+		while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
+		{
+			int			i;
+
+			/* Test attributes that are of the domain */
+			for (i = 0; i < rtc->natts; i++)
+			{
+				int			attnum = rtc->atts[i];
+				Datum		d;
+				bool		isNull;
+				Datum		conResult;
+				Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1);
+
+				d = slot_getattr(slot, attnum, &isNull);
+
+				econtext->domainValue_datum = d;
+				econtext->domainValue_isNull = isNull;
+
+				conResult = ExecEvalExprSwitchContext(exprstate,
+													  econtext,
+													  &isNull);
+
+				if (!isNull && !DatumGetBool(conResult))
+				{
+					/*
+					 * In principle the auxiliary information for this error
+					 * should be errdomainconstraint(), but errtablecol()
+					 * seems considerably more useful in practice.  Since this
+					 * code only executes in an ALTER DOMAIN command, the
+					 * client should already know which domain is in question,
+					 * and which constraint too.
+					 */
+					ereport(ERROR,
+							(errcode(ERRCODE_CHECK_VIOLATION),
+							 errmsg("column \"%s\" of table \"%s\" contains values that violate the new constraint",
+									NameStr(attr->attname),
+									RelationGetRelationName(testrel)),
+							 errtablecol(testrel, attnum)));
+				}
+			}
+
+			ResetExprContext(econtext);
+		}
+		ExecDropSingleTupleTableSlot(slot);
+		table_endscan(scan);
+		UnregisterSnapshot(snapshot);
+
+		/* Hold relation lock till commit (XXX bad for concurrency) */
+		table_close(testrel, NoLock);
+	}
+
+	FreeExecutorState(estate);
+}
+
+/*
+ * get_rels_with_domain
+ *
+ * Fetch all relations / attributes which are using the domain
+ *
+ * The result is a list of RelToCheck structs, one for each distinct
+ * relation, each containing one or more attribute numbers that are of
+ * the domain type.  We have opened each rel and acquired the specified lock
+ * type on it.
+ *
+ * We support nested domains by including attributes that are of derived
+ * domain types.  Current callers do not need to distinguish between attributes
+ * that are of exactly the given domain and those that are of derived domains.
+ *
+ * XXX this is completely broken because there is no way to lock the domain
+ * to prevent columns from being added or dropped while our command runs.
+ * We can partially protect against column drops by locking relations as we
+ * come across them, but there is still a race condition (the window between
+ * seeing a pg_depend entry and acquiring lock on the relation it references).
+ * Also, holding locks on all these relations simultaneously creates a non-
+ * trivial risk of deadlock.  We can minimize but not eliminate the deadlock
+ * risk by using the weakest suitable lock (ShareLock for most callers).
+ *
+ * XXX the API for this is not sufficient to support checking domain values
+ * that are inside container types, such as composite types, arrays, or
+ * ranges.  Currently we just error out if a container type containing the
+ * target domain is stored anywhere.
+ *
+ * Generally used for retrieving a list of tests when adding
+ * new constraints to a domain.
+ */
+static List *
+get_rels_with_domain(Oid domainOid, LOCKMODE lockmode)
+{
+	List	   *result = NIL;
+	char	   *domainTypeName = format_type_be(domainOid);
+	Relation	depRel;
+	ScanKeyData key[2];
+	SysScanDesc depScan;
+	HeapTuple	depTup;
+
+	Assert(lockmode != NoLock);
+
+	/* since this function recurses, it could be driven to stack overflow */
+	check_stack_depth();
+
+	/*
+	 * We scan pg_depend to find those things that depend on the domain. (We
+	 * assume we can ignore refobjsubid for a domain.)
+	 */
+	depRel = table_open(DependRelationId, AccessShareLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_depend_refclassid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(TypeRelationId));
+	ScanKeyInit(&key[1],
+				Anum_pg_depend_refobjid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(domainOid));
+
+	depScan = systable_beginscan(depRel, DependReferenceIndexId, true,
+								 NULL, 2, key);
+
+	while (HeapTupleIsValid(depTup = systable_getnext(depScan)))
+	{
+		Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup);
+		RelToCheck *rtc = NULL;
+		ListCell   *rellist;
+		Form_pg_attribute pg_att;
+		int			ptr;
+
+		/* Check for directly dependent types */
+		if (pg_depend->classid == TypeRelationId)
+		{
+			if (get_typtype(pg_depend->objid) == TYPTYPE_DOMAIN)
+			{
+				/*
+				 * This is a sub-domain, so recursively add dependent columns
+				 * to the output list.  This is a bit inefficient since we may
+				 * fail to combine RelToCheck entries when attributes of the
+				 * same rel have different derived domain types, but it's
+				 * probably not worth improving.
+				 */
+				result = list_concat(result,
+									 get_rels_with_domain(pg_depend->objid,
+														  lockmode));
+			}
+			else
+			{
+				/*
+				 * Otherwise, it is some container type using the domain, so
+				 * fail if there are any columns of this type.
+				 */
+				find_composite_type_dependencies(pg_depend->objid,
+												 NULL,
+												 domainTypeName);
+			}
+			continue;
+		}
+
+		/* Else, ignore dependees that aren't user columns of relations */
+		/* (we assume system columns are never of domain types) */
+		if (pg_depend->classid != RelationRelationId ||
+			pg_depend->objsubid <= 0)
+			continue;
+
+		/* See if we already have an entry for this relation */
+		foreach(rellist, result)
+		{
+			RelToCheck *rt = (RelToCheck *) lfirst(rellist);
+
+			if (RelationGetRelid(rt->rel) == pg_depend->objid)
+			{
+				rtc = rt;
+				break;
+			}
+		}
+
+		if (rtc == NULL)
+		{
+			/* First attribute found for this relation */
+			Relation	rel;
+
+			/* Acquire requested lock on relation */
+			rel = relation_open(pg_depend->objid, lockmode);
+
+			/*
+			 * Check to see if rowtype is stored anyplace as a composite-type
+			 * column; if so we have to fail, for now anyway.
+			 */
+			if (OidIsValid(rel->rd_rel->reltype))
+				find_composite_type_dependencies(rel->rd_rel->reltype,
+												 NULL,
+												 domainTypeName);
+
+			/*
+			 * Otherwise, we can ignore relations except those with both
+			 * storage and user-chosen column types.
+			 *
+			 * XXX If an index-only scan could satisfy "col::some_domain" from
+			 * a suitable expression index, this should also check expression
+			 * index columns.
+			 */
+			if (rel->rd_rel->relkind != RELKIND_RELATION &&
+				rel->rd_rel->relkind != RELKIND_MATVIEW)
+			{
+				relation_close(rel, lockmode);
+				continue;
+			}
+
+			/* Build the RelToCheck entry with enough space for all atts */
+			rtc = (RelToCheck *) palloc(sizeof(RelToCheck));
+			rtc->rel = rel;
+			rtc->natts = 0;
+			rtc->atts = (int *) palloc(sizeof(int) * RelationGetNumberOfAttributes(rel));
+			result = lappend(result, rtc);
+		}
+
+		/*
+		 * Confirm column has not been dropped, and is of the expected type.
+		 * This defends against an ALTER DROP COLUMN occurring just before we
+		 * acquired lock ... but if the whole table were dropped, we'd still
+		 * have a problem.
+		 */
+		if (pg_depend->objsubid > RelationGetNumberOfAttributes(rtc->rel))
+			continue;
+		pg_att = TupleDescAttr(rtc->rel->rd_att, pg_depend->objsubid - 1);
+		if (pg_att->attisdropped || pg_att->atttypid != domainOid)
+			continue;
+
+		/*
+		 * Okay, add column to result.  We store the columns in column-number
+		 * order; this is just a hack to improve predictability of regression
+		 * test output ...
+		 */
+		Assert(rtc->natts < RelationGetNumberOfAttributes(rtc->rel));
+
+		ptr = rtc->natts++;
+		while (ptr > 0 && rtc->atts[ptr - 1] > pg_depend->objsubid)
+		{
+			rtc->atts[ptr] = rtc->atts[ptr - 1];
+			ptr--;
+		}
+		rtc->atts[ptr] = pg_depend->objsubid;
+	}
+
+	systable_endscan(depScan);
+
+	relation_close(depRel, AccessShareLock);
+
+	return result;
+}
+
+/*
+ * checkDomainOwner
+ *
+ * Check that the type is actually a domain and that the current user
+ * has permission to do ALTER DOMAIN on it.  Throw an error if not.
+ */
+void
+checkDomainOwner(HeapTuple tup)
+{
+	Form_pg_type typTup = (Form_pg_type) GETSTRUCT(tup);
+
+	/* Check that this is actually a domain */
+	if (typTup->typtype != TYPTYPE_DOMAIN)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("%s is not a domain",
+						format_type_be(typTup->oid))));
+
+	/* Permission check: must own type */
+	if (!pg_type_ownercheck(typTup->oid, GetUserId()))
+		aclcheck_error_type(ACLCHECK_NOT_OWNER, typTup->oid);
+}
+
+/*
+ * domainAddConstraint - code shared between CREATE and ALTER DOMAIN
+ */
+static char *
+domainAddConstraint(Oid domainOid, Oid domainNamespace, Oid baseTypeOid,
+					int typMod, Constraint *constr,
+					const char *domainName, ObjectAddress *constrAddr)
+{
+	Node	   *expr;
+	char	   *ccbin;
+	ParseState *pstate;
+	CoerceToDomainValue *domVal;
+	Oid			ccoid;
+
+	/*
+	 * Assign or validate constraint name
+	 */
+	if (constr->conname)
+	{
+		if (ConstraintNameIsUsed(CONSTRAINT_DOMAIN,
+								 domainOid,
+								 constr->conname))
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("constraint \"%s\" for domain \"%s\" already exists",
+							constr->conname, domainName)));
+	}
+	else
+		constr->conname = ChooseConstraintName(domainName,
+											   NULL,
+											   "check",
+											   domainNamespace,
+											   NIL);
+
+	/*
+	 * Convert the A_EXPR in raw_expr into an EXPR
+	 */
+	pstate = make_parsestate(NULL);
+
+	/*
+	 * Set up a CoerceToDomainValue to represent the occurrence of VALUE in
+	 * the expression.  Note that it will appear to have the type of the base
+	 * type, not the domain.  This seems correct since within the check
+	 * expression, we should not assume the input value can be considered a
+	 * member of the domain.
+	 */
+	domVal = makeNode(CoerceToDomainValue);
+	domVal->typeId = baseTypeOid;
+	domVal->typeMod = typMod;
+	domVal->collation = get_typcollation(baseTypeOid);
+	domVal->location = -1;		/* will be set when/if used */
+
+	pstate->p_pre_columnref_hook = replace_domain_constraint_value;
+	pstate->p_ref_hook_state = (void *) domVal;
+
+	expr = transformExpr(pstate, constr->raw_expr, EXPR_KIND_DOMAIN_CHECK);
+
+	/*
+	 * Make sure it yields a boolean result.
+	 */
+	expr = coerce_to_boolean(pstate, expr, "CHECK");
+
+	/*
+	 * Fix up collation information.
+	 */
+	assign_expr_collations(pstate, expr);
+
+	/*
+	 * Domains don't allow variables (this is probably dead code now that
+	 * add_missing_from is history, but let's be sure).
+	 */
+	if (list_length(pstate->p_rtable) != 0 ||
+		contain_var_clause(expr))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot use table references in domain check constraint")));
+
+	/*
+	 * Convert to string form for storage.
+	 */
+	ccbin = nodeToString(expr);
+
+	/*
+	 * Store the constraint in pg_constraint
+	 */
+	ccoid =
+		CreateConstraintEntry(constr->conname,	/* Constraint Name */
+							  domainNamespace,	/* namespace */
+							  CONSTRAINT_CHECK, /* Constraint Type */
+							  false,	/* Is Deferrable */
+							  false,	/* Is Deferred */
+							  !constr->skip_validation, /* Is Validated */
+							  InvalidOid,	/* no parent constraint */
+							  InvalidOid,	/* not a relation constraint */
+							  NULL,
+							  0,
+							  0,
+							  domainOid,	/* domain constraint */
+							  InvalidOid,	/* no associated index */
+							  InvalidOid,	/* Foreign key fields */
+							  NULL,
+							  NULL,
+							  NULL,
+							  NULL,
+							  0,
+							  ' ',
+							  ' ',
+							  NULL,
+							  0,
+							  ' ',
+							  NULL, /* not an exclusion constraint */
+							  expr, /* Tree form of check constraint */
+							  ccbin,	/* Binary form of check constraint */
+							  true, /* is local */
+							  0,	/* inhcount */
+							  false,	/* connoinherit */
+							  false);	/* is_internal */
+	if (constrAddr)
+		ObjectAddressSet(*constrAddr, ConstraintRelationId, ccoid);
+
+	/*
+	 * Return the compiled constraint expression so the calling routine can
+	 * perform any additional required tests.
+	 */
+	return ccbin;
+}
+
+/* Parser pre_columnref_hook for domain CHECK constraint parsing */
+static Node *
+replace_domain_constraint_value(ParseState *pstate, ColumnRef *cref)
+{
+	/*
+	 * Check for a reference to "value", and if that's what it is, replace
+	 * with a CoerceToDomainValue as prepared for us by domainAddConstraint.
+	 * (We handle VALUE as a name, not a keyword, to avoid breaking a lot of
+	 * applications that have used VALUE as a column name in the past.)
+	 */
+	if (list_length(cref->fields) == 1)
+	{
+		Node	   *field1 = (Node *) linitial(cref->fields);
+		char	   *colname;
+
+		Assert(IsA(field1, String));
+		colname = strVal(field1);
+		if (strcmp(colname, "value") == 0)
+		{
+			CoerceToDomainValue *domVal = copyObject(pstate->p_ref_hook_state);
+
+			/* Propagate location knowledge, if any */
+			domVal->location = cref->location;
+			return (Node *) domVal;
+		}
+	}
+	return NULL;
+}
+
+
+/*
+ * Execute ALTER TYPE RENAME
+ */
+ObjectAddress
+RenameType(RenameStmt *stmt)
+{
+	List	   *names = castNode(List, stmt->object);
+	const char *newTypeName = stmt->newname;
+	TypeName   *typename;
+	Oid			typeOid;
+	Relation	rel;
+	HeapTuple	tup;
+	Form_pg_type typTup;
+	ObjectAddress address;
+
+	/* Make a TypeName so we can use standard type lookup machinery */
+	typename = makeTypeNameFromNameList(names);
+	typeOid = typenameTypeId(NULL, typename);
+
+	/* Look up the type in the type table */
+	rel = table_open(TypeRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(typeOid));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for type %u", typeOid);
+	typTup = (Form_pg_type) GETSTRUCT(tup);
+
+	/* check permissions on type */
+	if (!pg_type_ownercheck(typeOid, GetUserId()))
+		aclcheck_error_type(ACLCHECK_NOT_OWNER, typeOid);
+
+	/* ALTER DOMAIN used on a non-domain? */
+	if (stmt->renameType == OBJECT_DOMAIN && typTup->typtype != TYPTYPE_DOMAIN)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("%s is not a domain",
+						format_type_be(typeOid))));
+
+	/*
+	 * If it's a composite type, we need to check that it really is a
+	 * free-standing composite type, and not a table's rowtype. We want people
+	 * to use ALTER TABLE not ALTER TYPE for that case.
+	 */
+	if (typTup->typtype == TYPTYPE_COMPOSITE &&
+		get_rel_relkind(typTup->typrelid) != RELKIND_COMPOSITE_TYPE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("%s is a table's row type",
+						format_type_be(typeOid)),
+				 errhint("Use ALTER TABLE instead.")));
+
+	/* don't allow direct alteration of array types, either */
+	if (IsTrueArrayType(typTup))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot alter array type %s",
+						format_type_be(typeOid)),
+				 errhint("You can alter type %s, which will alter the array type as well.",
+						 format_type_be(typTup->typelem))));
+
+	/*
+	 * If type is composite we need to rename associated pg_class entry too.
+	 * RenameRelationInternal will call RenameTypeInternal automatically.
+	 */
+	if (typTup->typtype == TYPTYPE_COMPOSITE)
+		RenameRelationInternal(typTup->typrelid, newTypeName, false, false);
+	else
+		RenameTypeInternal(typeOid, newTypeName,
+						   typTup->typnamespace);
+
+	ObjectAddressSet(address, TypeRelationId, typeOid);
+	/* Clean up */
+	table_close(rel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * Change the owner of a type.
+ */
+ObjectAddress
+AlterTypeOwner(List *names, Oid newOwnerId, ObjectType objecttype)
+{
+	TypeName   *typename;
+	Oid			typeOid;
+	Relation	rel;
+	HeapTuple	tup;
+	HeapTuple	newtup;
+	Form_pg_type typTup;
+	AclResult	aclresult;
+	ObjectAddress address;
+
+	rel = table_open(TypeRelationId, RowExclusiveLock);
+
+	/* Make a TypeName so we can use standard type lookup machinery */
+	typename = makeTypeNameFromNameList(names);
+
+	/* Use LookupTypeName here so that shell types can be processed */
+	tup = LookupTypeName(NULL, typename, NULL, false);
+	if (tup == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("type \"%s\" does not exist",
+						TypeNameToString(typename))));
+	typeOid = typeTypeId(tup);
+
+	/* Copy the syscache entry so we can scribble on it below */
+	newtup = heap_copytuple(tup);
+	ReleaseSysCache(tup);
+	tup = newtup;
+	typTup = (Form_pg_type) GETSTRUCT(tup);
+
+	/* Don't allow ALTER DOMAIN on a type */
+	if (objecttype == OBJECT_DOMAIN && typTup->typtype != TYPTYPE_DOMAIN)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("%s is not a domain",
+						format_type_be(typeOid))));
+
+	/*
+	 * If it's a composite type, we need to check that it really is a
+	 * free-standing composite type, and not a table's rowtype. We want people
+	 * to use ALTER TABLE not ALTER TYPE for that case.
+	 */
+	if (typTup->typtype == TYPTYPE_COMPOSITE &&
+		get_rel_relkind(typTup->typrelid) != RELKIND_COMPOSITE_TYPE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("%s is a table's row type",
+						format_type_be(typeOid)),
+				 errhint("Use ALTER TABLE instead.")));
+
+	/* don't allow direct alteration of array types, either */
+	if (IsTrueArrayType(typTup))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot alter array type %s",
+						format_type_be(typeOid)),
+				 errhint("You can alter type %s, which will alter the array type as well.",
+						 format_type_be(typTup->typelem))));
+
+	/*
+	 * If the new owner is the same as the existing owner, consider the
+	 * command to have succeeded.  This is for dump restoration purposes.
+	 */
+	if (typTup->typowner != newOwnerId)
+	{
+		/* Superusers can always do it */
+		if (!superuser())
+		{
+			/* Otherwise, must be owner of the existing object */
+			if (!pg_type_ownercheck(typTup->oid, GetUserId()))
+				aclcheck_error_type(ACLCHECK_NOT_OWNER, typTup->oid);
+
+			/* Must be able to become new owner */
+			check_is_member_of_role(GetUserId(), newOwnerId);
+
+			/* New owner must have CREATE privilege on namespace */
+			aclresult = pg_namespace_aclcheck(typTup->typnamespace,
+											  newOwnerId,
+											  ACL_CREATE);
+			if (aclresult != ACLCHECK_OK)
+				aclcheck_error(aclresult, OBJECT_SCHEMA,
+							   get_namespace_name(typTup->typnamespace));
+		}
+
+		AlterTypeOwner_oid(typeOid, newOwnerId, true);
+	}
+
+	ObjectAddressSet(address, TypeRelationId, typeOid);
+
+	/* Clean up */
+	table_close(rel, RowExclusiveLock);
+
+	return address;
+}
+
+/*
+ * AlterTypeOwner_oid - change type owner unconditionally
+ *
+ * This function recurses to handle a pg_class entry, if necessary.  It
+ * invokes any necessary access object hooks.  If hasDependEntry is true, this
+ * function modifies the pg_shdepend entry appropriately (this should be
+ * passed as false only for table rowtypes and array types).
+ *
+ * This is used by ALTER TABLE/TYPE OWNER commands, as well as by REASSIGN
+ * OWNED BY.  It assumes the caller has done all needed check.
+ */
+void
+AlterTypeOwner_oid(Oid typeOid, Oid newOwnerId, bool hasDependEntry)
+{
+	Relation	rel;
+	HeapTuple	tup;
+	Form_pg_type typTup;
+
+	rel = table_open(TypeRelationId, RowExclusiveLock);
+
+	tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeOid));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for type %u", typeOid);
+	typTup = (Form_pg_type) GETSTRUCT(tup);
+
+	/*
+	 * If it's a composite type, invoke ATExecChangeOwner so that we fix up
+	 * the pg_class entry properly.  That will call back to
+	 * AlterTypeOwnerInternal to take care of the pg_type entry(s).
+	 */
+	if (typTup->typtype == TYPTYPE_COMPOSITE)
+		ATExecChangeOwner(typTup->typrelid, newOwnerId, true, AccessExclusiveLock);
+	else
+		AlterTypeOwnerInternal(typeOid, newOwnerId);
+
+	/* Update owner dependency reference */
+	if (hasDependEntry)
+		changeDependencyOnOwner(TypeRelationId, typeOid, newOwnerId);
+
+	InvokeObjectPostAlterHook(TypeRelationId, typeOid, 0);
+
+	ReleaseSysCache(tup);
+	table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * AlterTypeOwnerInternal - bare-bones type owner change.
+ *
+ * This routine simply modifies the owner of a pg_type entry, and recurses
+ * to handle a possible array type.
+ */
+void
+AlterTypeOwnerInternal(Oid typeOid, Oid newOwnerId)
+{
+	Relation	rel;
+	HeapTuple	tup;
+	Form_pg_type typTup;
+	Datum		repl_val[Natts_pg_type];
+	bool		repl_null[Natts_pg_type];
+	bool		repl_repl[Natts_pg_type];
+	Acl		   *newAcl;
+	Datum		aclDatum;
+	bool		isNull;
+
+	rel = table_open(TypeRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(typeOid));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for type %u", typeOid);
+	typTup = (Form_pg_type) GETSTRUCT(tup);
+
+	memset(repl_null, false, sizeof(repl_null));
+	memset(repl_repl, false, sizeof(repl_repl));
+
+	repl_repl[Anum_pg_type_typowner - 1] = true;
+	repl_val[Anum_pg_type_typowner - 1] = ObjectIdGetDatum(newOwnerId);
+
+	aclDatum = heap_getattr(tup,
+							Anum_pg_type_typacl,
+							RelationGetDescr(rel),
+							&isNull);
+	/* Null ACLs do not require changes */
+	if (!isNull)
+	{
+		newAcl = aclnewowner(DatumGetAclP(aclDatum),
+							 typTup->typowner, newOwnerId);
+		repl_repl[Anum_pg_type_typacl - 1] = true;
+		repl_val[Anum_pg_type_typacl - 1] = PointerGetDatum(newAcl);
+	}
+
+	tup = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, repl_null,
+							repl_repl);
+
+	CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+	/* If it has an array type, update that too */
+	if (OidIsValid(typTup->typarray))
+		AlterTypeOwnerInternal(typTup->typarray, newOwnerId);
+
+	/* Clean up */
+	table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Execute ALTER TYPE SET SCHEMA
+ */
+ObjectAddress
+AlterTypeNamespace(List *names, const char *newschema, ObjectType objecttype,
+				   Oid *oldschema)
+{
+	TypeName   *typename;
+	Oid			typeOid;
+	Oid			nspOid;
+	Oid			oldNspOid;
+	ObjectAddresses *objsMoved;
+	ObjectAddress myself;
+
+	/* Make a TypeName so we can use standard type lookup machinery */
+	typename = makeTypeNameFromNameList(names);
+	typeOid = typenameTypeId(NULL, typename);
+
+	/* Don't allow ALTER DOMAIN on a type */
+	if (objecttype == OBJECT_DOMAIN && get_typtype(typeOid) != TYPTYPE_DOMAIN)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("%s is not a domain",
+						format_type_be(typeOid))));
+
+	/* get schema OID and check its permissions */
+	nspOid = LookupCreationNamespace(newschema);
+
+	objsMoved = new_object_addresses();
+	oldNspOid = AlterTypeNamespace_oid(typeOid, nspOid, objsMoved);
+	free_object_addresses(objsMoved);
+
+	if (oldschema)
+		*oldschema = oldNspOid;
+
+	ObjectAddressSet(myself, TypeRelationId, typeOid);
+
+	return myself;
+}
+
+Oid
+AlterTypeNamespace_oid(Oid typeOid, Oid nspOid, ObjectAddresses *objsMoved)
+{
+	Oid			elemOid;
+
+	/* check permissions on type */
+	if (!pg_type_ownercheck(typeOid, GetUserId()))
+		aclcheck_error_type(ACLCHECK_NOT_OWNER, typeOid);
+
+	/* don't allow direct alteration of array types */
+	elemOid = get_element_type(typeOid);
+	if (OidIsValid(elemOid) && get_array_type(elemOid) == typeOid)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("cannot alter array type %s",
+						format_type_be(typeOid)),
+				 errhint("You can alter type %s, which will alter the array type as well.",
+						 format_type_be(elemOid))));
+
+	/* and do the work */
+	return AlterTypeNamespaceInternal(typeOid, nspOid, false, true, objsMoved);
+}
+
+/*
+ * Move specified type to new namespace.
+ *
+ * Caller must have already checked privileges.
+ *
+ * The function automatically recurses to process the type's array type,
+ * if any.  isImplicitArray should be true only when doing this internal
+ * recursion (outside callers must never try to move an array type directly).
+ *
+ * If errorOnTableType is true, the function errors out if the type is
+ * a table type.  ALTER TABLE has to be used to move a table to a new
+ * namespace.
+ *
+ * Returns the type's old namespace OID.
+ */
+Oid
+AlterTypeNamespaceInternal(Oid typeOid, Oid nspOid,
+						   bool isImplicitArray,
+						   bool errorOnTableType,
+						   ObjectAddresses *objsMoved)
+{
+	Relation	rel;
+	HeapTuple	tup;
+	Form_pg_type typform;
+	Oid			oldNspOid;
+	Oid			arrayOid;
+	bool		isCompositeType;
+	ObjectAddress thisobj;
+
+	/*
+	 * Make sure we haven't moved this object previously.
+	 */
+	thisobj.classId = TypeRelationId;
+	thisobj.objectId = typeOid;
+	thisobj.objectSubId = 0;
+
+	if (object_address_present(&thisobj, objsMoved))
+		return InvalidOid;
+
+	rel = table_open(TypeRelationId, RowExclusiveLock);
+
+	tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(typeOid));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for type %u", typeOid);
+	typform = (Form_pg_type) GETSTRUCT(tup);
+
+	oldNspOid = typform->typnamespace;
+	arrayOid = typform->typarray;
+
+	/* If the type is already there, we scan skip these next few checks. */
+	if (oldNspOid != nspOid)
+	{
+		/* common checks on switching namespaces */
+		CheckSetNamespace(oldNspOid, nspOid);
+
+		/* check for duplicate name (more friendly than unique-index failure) */
+		if (SearchSysCacheExists2(TYPENAMENSP,
+								  NameGetDatum(&typform->typname),
+								  ObjectIdGetDatum(nspOid)))
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("type \"%s\" already exists in schema \"%s\"",
+							NameStr(typform->typname),
+							get_namespace_name(nspOid))));
+	}
+
+	/* Detect whether type is a composite type (but not a table rowtype) */
+	isCompositeType =
+		(typform->typtype == TYPTYPE_COMPOSITE &&
+		 get_rel_relkind(typform->typrelid) == RELKIND_COMPOSITE_TYPE);
+
+	/* Enforce not-table-type if requested */
+	if (typform->typtype == TYPTYPE_COMPOSITE && !isCompositeType &&
+		errorOnTableType)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("%s is a table's row type",
+						format_type_be(typeOid)),
+				 errhint("Use ALTER TABLE instead.")));
+
+	if (oldNspOid != nspOid)
+	{
+		/* OK, modify the pg_type row */
+
+		/* tup is a copy, so we can scribble directly on it */
+		typform->typnamespace = nspOid;
+
+		CatalogTupleUpdate(rel, &tup->t_self, tup);
+	}
+
+	/*
+	 * Composite types have pg_class entries.
+	 *
+	 * We need to modify the pg_class tuple as well to reflect the change of
+	 * schema.
+	 */
+	if (isCompositeType)
+	{
+		Relation	classRel;
+
+		classRel = table_open(RelationRelationId, RowExclusiveLock);
+
+		AlterRelationNamespaceInternal(classRel, typform->typrelid,
+									   oldNspOid, nspOid,
+									   false, objsMoved);
+
+		table_close(classRel, RowExclusiveLock);
+
+		/*
+		 * Check for constraints associated with the composite type (we don't
+		 * currently support this, but probably will someday).
+		 */
+		AlterConstraintNamespaces(typform->typrelid, oldNspOid,
+								  nspOid, false, objsMoved);
+	}
+	else
+	{
+		/* If it's a domain, it might have constraints */
+		if (typform->typtype == TYPTYPE_DOMAIN)
+			AlterConstraintNamespaces(typeOid, oldNspOid, nspOid, true,
+									  objsMoved);
+	}
+
+	/*
+	 * Update dependency on schema, if any --- a table rowtype has not got
+	 * one, and neither does an implicit array.
+	 */
+	if (oldNspOid != nspOid &&
+		(isCompositeType || typform->typtype != TYPTYPE_COMPOSITE) &&
+		!isImplicitArray)
+		if (changeDependencyFor(TypeRelationId, typeOid,
+								NamespaceRelationId, oldNspOid, nspOid) != 1)
+			elog(ERROR, "failed to change schema dependency for type %s",
+				 format_type_be(typeOid));
+
+	InvokeObjectPostAlterHook(TypeRelationId, typeOid, 0);
+
+	heap_freetuple(tup);
+
+	table_close(rel, RowExclusiveLock);
+
+	add_exact_object_address(&thisobj, objsMoved);
+
+	/* Recursively alter the associated array type, if any */
+	if (OidIsValid(arrayOid))
+		AlterTypeNamespaceInternal(arrayOid, nspOid, true, true, objsMoved);
+
+	return oldNspOid;
+}
+
+/*
+ * AlterType
+ *		ALTER TYPE <type> SET (option = ...)
+ *
+ * NOTE: the set of changes that can be allowed here is constrained by many
+ * non-obvious implementation restrictions.  Tread carefully when considering
+ * adding new flexibility.
+ */
+ObjectAddress
+AlterType(AlterTypeStmt *stmt)
+{
+	ObjectAddress address;
+	Relation	catalog;
+	TypeName   *typename;
+	HeapTuple	tup;
+	Oid			typeOid;
+	Form_pg_type typForm;
+	bool		requireSuper = false;
+	AlterTypeRecurseParams atparams;
+	ListCell   *pl;
+
+	catalog = table_open(TypeRelationId, RowExclusiveLock);
+
+	/* Make a TypeName so we can use standard type lookup machinery */
+	typename = makeTypeNameFromNameList(stmt->typeName);
+	tup = typenameType(NULL, typename, NULL);
+
+	typeOid = typeTypeId(tup);
+	typForm = (Form_pg_type) GETSTRUCT(tup);
+
+	/* Process options */
+	memset(&atparams, 0, sizeof(atparams));
+	foreach(pl, stmt->options)
+	{
+		DefElem    *defel = (DefElem *) lfirst(pl);
+
+		if (strcmp(defel->defname, "storage") == 0)
+		{
+			char	   *a = defGetString(defel);
+
+			if (pg_strcasecmp(a, "plain") == 0)
+				atparams.storage = TYPSTORAGE_PLAIN;
+			else if (pg_strcasecmp(a, "external") == 0)
+				atparams.storage = TYPSTORAGE_EXTERNAL;
+			else if (pg_strcasecmp(a, "extended") == 0)
+				atparams.storage = TYPSTORAGE_EXTENDED;
+			else if (pg_strcasecmp(a, "main") == 0)
+				atparams.storage = TYPSTORAGE_MAIN;
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("storage \"%s\" not recognized", a)));
+
+			/*
+			 * Validate the storage request.  If the type isn't varlena, it
+			 * certainly doesn't support non-PLAIN storage.
+			 */
+			if (atparams.storage != TYPSTORAGE_PLAIN && typForm->typlen != -1)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("fixed-size types must have storage PLAIN")));
+
+			/*
+			 * Switching from PLAIN to non-PLAIN is allowed, but it requires
+			 * superuser, since we can't validate that the type's C functions
+			 * will support it.  Switching from non-PLAIN to PLAIN is
+			 * disallowed outright, because it's not practical to ensure that
+			 * no tables have toasted values of the type.  Switching among
+			 * different non-PLAIN settings is OK, since it just constitutes a
+			 * change in the strategy requested for columns created in the
+			 * future.
+			 */
+			if (atparams.storage != TYPSTORAGE_PLAIN &&
+				typForm->typstorage == TYPSTORAGE_PLAIN)
+				requireSuper = true;
+			else if (atparams.storage == TYPSTORAGE_PLAIN &&
+					 typForm->typstorage != TYPSTORAGE_PLAIN)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("cannot change type's storage to PLAIN")));
+
+			atparams.updateStorage = true;
+		}
+		else if (strcmp(defel->defname, "receive") == 0)
+		{
+			if (defel->arg != NULL)
+				atparams.receiveOid =
+					findTypeReceiveFunction(defGetQualifiedName(defel),
+											typeOid);
+			else
+				atparams.receiveOid = InvalidOid;	/* NONE, remove function */
+			atparams.updateReceive = true;
+			/* Replacing an I/O function requires superuser. */
+			requireSuper = true;
+		}
+		else if (strcmp(defel->defname, "send") == 0)
+		{
+			if (defel->arg != NULL)
+				atparams.sendOid =
+					findTypeSendFunction(defGetQualifiedName(defel),
+										 typeOid);
+			else
+				atparams.sendOid = InvalidOid;	/* NONE, remove function */
+			atparams.updateSend = true;
+			/* Replacing an I/O function requires superuser. */
+			requireSuper = true;
+		}
+		else if (strcmp(defel->defname, "typmod_in") == 0)
+		{
+			if (defel->arg != NULL)
+				atparams.typmodinOid =
+					findTypeTypmodinFunction(defGetQualifiedName(defel));
+			else
+				atparams.typmodinOid = InvalidOid;	/* NONE, remove function */
+			atparams.updateTypmodin = true;
+			/* Replacing an I/O function requires superuser. */
+			requireSuper = true;
+		}
+		else if (strcmp(defel->defname, "typmod_out") == 0)
+		{
+			if (defel->arg != NULL)
+				atparams.typmodoutOid =
+					findTypeTypmodoutFunction(defGetQualifiedName(defel));
+			else
+				atparams.typmodoutOid = InvalidOid; /* NONE, remove function */
+			atparams.updateTypmodout = true;
+			/* Replacing an I/O function requires superuser. */
+			requireSuper = true;
+		}
+		else if (strcmp(defel->defname, "analyze") == 0)
+		{
+			if (defel->arg != NULL)
+				atparams.analyzeOid =
+					findTypeAnalyzeFunction(defGetQualifiedName(defel),
+											typeOid);
+			else
+				atparams.analyzeOid = InvalidOid;	/* NONE, remove function */
+			atparams.updateAnalyze = true;
+			/* Replacing an analyze function requires superuser. */
+			requireSuper = true;
+		}
+		else if (strcmp(defel->defname, "subscript") == 0)
+		{
+			if (defel->arg != NULL)
+				atparams.subscriptOid =
+					findTypeSubscriptingFunction(defGetQualifiedName(defel),
+												 typeOid);
+			else
+				atparams.subscriptOid = InvalidOid; /* NONE, remove function */
+			atparams.updateSubscript = true;
+			/* Replacing a subscript function requires superuser. */
+			requireSuper = true;
+		}
+
+		/*
+		 * The rest of the options that CREATE accepts cannot be changed.
+		 * Check for them so that we can give a meaningful error message.
+		 */
+		else if (strcmp(defel->defname, "input") == 0 ||
+				 strcmp(defel->defname, "output") == 0 ||
+				 strcmp(defel->defname, "internallength") == 0 ||
+				 strcmp(defel->defname, "passedbyvalue") == 0 ||
+				 strcmp(defel->defname, "alignment") == 0 ||
+				 strcmp(defel->defname, "like") == 0 ||
+				 strcmp(defel->defname, "category") == 0 ||
+				 strcmp(defel->defname, "preferred") == 0 ||
+				 strcmp(defel->defname, "default") == 0 ||
+				 strcmp(defel->defname, "element") == 0 ||
+				 strcmp(defel->defname, "delimiter") == 0 ||
+				 strcmp(defel->defname, "collatable") == 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("type attribute \"%s\" cannot be changed",
+							defel->defname)));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("type attribute \"%s\" not recognized",
+							defel->defname)));
+	}
+
+	/*
+	 * Permissions check.  Require superuser if we decided the command
+	 * requires that, else must own the type.
+	 */
+	if (requireSuper)
+	{
+		if (!superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to alter a type")));
+	}
+	else
+	{
+		if (!pg_type_ownercheck(typeOid, GetUserId()))
+			aclcheck_error_type(ACLCHECK_NOT_OWNER, typeOid);
+	}
+
+	/*
+	 * We disallow all forms of ALTER TYPE SET on types that aren't plain base
+	 * types.  It would for example be highly unsafe, not to mention
+	 * pointless, to change the send/receive functions for a composite type.
+	 * Moreover, pg_dump has no support for changing these properties on
+	 * non-base types.  We might weaken this someday, but not now.
+	 *
+	 * Note: if you weaken this enough to allow composite types, be sure to
+	 * adjust the GenerateTypeDependencies call in AlterTypeRecurse.
+	 */
+	if (typForm->typtype != TYPTYPE_BASE)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("%s is not a base type",
+						format_type_be(typeOid))));
+
+	/*
+	 * For the same reasons, don't allow direct alteration of array types.
+	 */
+	if (IsTrueArrayType(typForm))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("%s is not a base type",
+						format_type_be(typeOid))));
+
+	/* OK, recursively update this type and any arrays/domains over it */
+	AlterTypeRecurse(typeOid, false, tup, catalog, &atparams);
+
+	/* Clean up */
+	ReleaseSysCache(tup);
+
+	table_close(catalog, RowExclusiveLock);
+
+	ObjectAddressSet(address, TypeRelationId, typeOid);
+
+	return address;
+}
+
+/*
+ * AlterTypeRecurse: one recursion step for AlterType()
+ *
+ * Apply the changes specified by "atparams" to the type identified by
+ * "typeOid", whose existing pg_type tuple is "tup".  If necessary,
+ * recursively update its array type as well.  Then search for any domains
+ * over this type, and recursively apply (most of) the same changes to those
+ * domains.
+ *
+ * We need this because the system generally assumes that a domain inherits
+ * many properties from its base type.  See DefineDomain() above for details
+ * of what is inherited.  Arrays inherit a smaller number of properties,
+ * but not none.
+ *
+ * There's a race condition here, in that some other transaction could
+ * concurrently add another domain atop this base type; we'd miss updating
+ * that one.  Hence, be wary of allowing ALTER TYPE to change properties for
+ * which it'd be really fatal for a domain to be out of sync with its base
+ * type (typlen, for example).  In practice, races seem unlikely to be an
+ * issue for plausible use-cases for ALTER TYPE.  If one does happen, it could
+ * be fixed by re-doing the same ALTER TYPE once all prior transactions have
+ * committed.
+ */
+static void
+AlterTypeRecurse(Oid typeOid, bool isImplicitArray,
+				 HeapTuple tup, Relation catalog,
+				 AlterTypeRecurseParams *atparams)
+{
+	Datum		values[Natts_pg_type];
+	bool		nulls[Natts_pg_type];
+	bool		replaces[Natts_pg_type];
+	HeapTuple	newtup;
+	SysScanDesc scan;
+	ScanKeyData key[1];
+	HeapTuple	domainTup;
+
+	/* Since this function recurses, it could be driven to stack overflow */
+	check_stack_depth();
+
+	/* Update the current type's tuple */
+	memset(values, 0, sizeof(values));
+	memset(nulls, 0, sizeof(nulls));
+	memset(replaces, 0, sizeof(replaces));
+
+	if (atparams->updateStorage)
+	{
+		replaces[Anum_pg_type_typstorage - 1] = true;
+		values[Anum_pg_type_typstorage - 1] = CharGetDatum(atparams->storage);
+	}
+	if (atparams->updateReceive)
+	{
+		replaces[Anum_pg_type_typreceive - 1] = true;
+		values[Anum_pg_type_typreceive - 1] = ObjectIdGetDatum(atparams->receiveOid);
+	}
+	if (atparams->updateSend)
+	{
+		replaces[Anum_pg_type_typsend - 1] = true;
+		values[Anum_pg_type_typsend - 1] = ObjectIdGetDatum(atparams->sendOid);
+	}
+	if (atparams->updateTypmodin)
+	{
+		replaces[Anum_pg_type_typmodin - 1] = true;
+		values[Anum_pg_type_typmodin - 1] = ObjectIdGetDatum(atparams->typmodinOid);
+	}
+	if (atparams->updateTypmodout)
+	{
+		replaces[Anum_pg_type_typmodout - 1] = true;
+		values[Anum_pg_type_typmodout - 1] = ObjectIdGetDatum(atparams->typmodoutOid);
+	}
+	if (atparams->updateAnalyze)
+	{
+		replaces[Anum_pg_type_typanalyze - 1] = true;
+		values[Anum_pg_type_typanalyze - 1] = ObjectIdGetDatum(atparams->analyzeOid);
+	}
+	if (atparams->updateSubscript)
+	{
+		replaces[Anum_pg_type_typsubscript - 1] = true;
+		values[Anum_pg_type_typsubscript - 1] = ObjectIdGetDatum(atparams->subscriptOid);
+	}
+
+	newtup = heap_modify_tuple(tup, RelationGetDescr(catalog),
+							   values, nulls, replaces);
+
+	CatalogTupleUpdate(catalog, &newtup->t_self, newtup);
+
+	/* Rebuild dependencies for this type */
+	GenerateTypeDependencies(newtup,
+							 catalog,
+							 NULL,	/* don't have defaultExpr handy */
+							 NULL,	/* don't have typacl handy */
+							 0, /* we rejected composite types above */
+							 isImplicitArray,	/* it might be an array */
+							 isImplicitArray,	/* dependent iff it's array */
+							 false, /* don't touch extension membership */
+							 true);
+
+	InvokeObjectPostAlterHook(TypeRelationId, typeOid, 0);
+
+	/*
+	 * Arrays inherit their base type's typmodin and typmodout, but none of
+	 * the other properties we're concerned with here.  Recurse to the array
+	 * type if needed.
+	 */
+	if (!isImplicitArray &&
+		(atparams->updateTypmodin || atparams->updateTypmodout))
+	{
+		Oid			arrtypoid = ((Form_pg_type) GETSTRUCT(newtup))->typarray;
+
+		if (OidIsValid(arrtypoid))
+		{
+			HeapTuple	arrtup;
+			AlterTypeRecurseParams arrparams;
+
+			arrtup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(arrtypoid));
+			if (!HeapTupleIsValid(arrtup))
+				elog(ERROR, "cache lookup failed for type %u", arrtypoid);
+
+			memset(&arrparams, 0, sizeof(arrparams));
+			arrparams.updateTypmodin = atparams->updateTypmodin;
+			arrparams.updateTypmodout = atparams->updateTypmodout;
+			arrparams.typmodinOid = atparams->typmodinOid;
+			arrparams.typmodoutOid = atparams->typmodoutOid;
+
+			AlterTypeRecurse(arrtypoid, true, arrtup, catalog, &arrparams);
+
+			ReleaseSysCache(arrtup);
+		}
+	}
+
+	/*
+	 * Now we need to recurse to domains.  However, some properties are not
+	 * inherited by domains, so clear the update flags for those.
+	 */
+	atparams->updateReceive = false;	/* domains use F_DOMAIN_RECV */
+	atparams->updateTypmodin = false;	/* domains don't have typmods */
+	atparams->updateTypmodout = false;
+	atparams->updateSubscript = false;	/* domains don't have subscriptors */
+
+	/* Skip the scan if nothing remains to be done */
+	if (!(atparams->updateStorage ||
+		  atparams->updateSend ||
+		  atparams->updateAnalyze))
+		return;
+
+	/* Search pg_type for possible domains over this type */
+	ScanKeyInit(&key[0],
+				Anum_pg_type_typbasetype,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(typeOid));
+
+	scan = systable_beginscan(catalog, InvalidOid, false,
+							  NULL, 1, key);
+
+	while ((domainTup = systable_getnext(scan)) != NULL)
+	{
+		Form_pg_type domainForm = (Form_pg_type) GETSTRUCT(domainTup);
+
+		/*
+		 * Shouldn't have a nonzero typbasetype in a non-domain, but let's
+		 * check
+		 */
+		if (domainForm->typtype != TYPTYPE_DOMAIN)
+			continue;
+
+		AlterTypeRecurse(domainForm->oid, false, domainTup, catalog, atparams);
+	}
+
+	systable_endscan(scan);
+}
diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c
new file mode 100644
index 0000000..cba8e19
--- /dev/null
+++ b/src/backend/commands/user.c
@@ -0,0 +1,1645 @@
+/*-------------------------------------------------------------------------
+ *
+ * user.c
+ *	  Commands for manipulating roles (formerly called users).
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/commands/user.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/binary_upgrade.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_auth_members.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_database.h"
+#include "catalog/pg_db_role_setting.h"
+#include "commands/comment.h"
+#include "commands/dbcommands.h"
+#include "commands/defrem.h"
+#include "commands/seclabel.h"
+#include "commands/user.h"
+#include "libpq/crypt.h"
+#include "miscadmin.h"
+#include "storage/lmgr.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/syscache.h"
+#include "utils/timestamp.h"
+
+/* Potentially set by pg_upgrade_support functions */
+Oid			binary_upgrade_next_pg_authid_oid = InvalidOid;
+
+
+/* GUC parameter */
+int			Password_encryption = PASSWORD_TYPE_SCRAM_SHA_256;
+
+/* Hook to check passwords in CreateRole() and AlterRole() */
+check_password_hook_type check_password_hook = NULL;
+
+static void AddRoleMems(const char *rolename, Oid roleid,
+						List *memberSpecs, List *memberIds,
+						Oid grantorId, bool admin_opt);
+static void DelRoleMems(const char *rolename, Oid roleid,
+						List *memberSpecs, List *memberIds,
+						bool admin_opt);
+
+
+/* Check if current user has createrole privileges */
+static bool
+have_createrole_privilege(void)
+{
+	return has_createrole_privilege(GetUserId());
+}
+
+
+/*
+ * CREATE ROLE
+ */
+Oid
+CreateRole(ParseState *pstate, CreateRoleStmt *stmt)
+{
+	Relation	pg_authid_rel;
+	TupleDesc	pg_authid_dsc;
+	HeapTuple	tuple;
+	Datum		new_record[Natts_pg_authid];
+	bool		new_record_nulls[Natts_pg_authid];
+	Oid			roleid;
+	ListCell   *item;
+	ListCell   *option;
+	char	   *password = NULL;	/* user password */
+	bool		issuper = false;	/* Make the user a superuser? */
+	bool		inherit = true; /* Auto inherit privileges? */
+	bool		createrole = false; /* Can this user create roles? */
+	bool		createdb = false;	/* Can the user create databases? */
+	bool		canlogin = false;	/* Can this user login? */
+	bool		isreplication = false;	/* Is this a replication role? */
+	bool		bypassrls = false;	/* Is this a row security enabled role? */
+	int			connlimit = -1; /* maximum connections allowed */
+	List	   *addroleto = NIL;	/* roles to make this a member of */
+	List	   *rolemembers = NIL;	/* roles to be members of this role */
+	List	   *adminmembers = NIL; /* roles to be admins of this role */
+	char	   *validUntil = NULL;	/* time the login is valid until */
+	Datum		validUntil_datum;	/* same, as timestamptz Datum */
+	bool		validUntil_null;
+	DefElem    *dpassword = NULL;
+	DefElem    *dissuper = NULL;
+	DefElem    *dinherit = NULL;
+	DefElem    *dcreaterole = NULL;
+	DefElem    *dcreatedb = NULL;
+	DefElem    *dcanlogin = NULL;
+	DefElem    *disreplication = NULL;
+	DefElem    *dconnlimit = NULL;
+	DefElem    *daddroleto = NULL;
+	DefElem    *drolemembers = NULL;
+	DefElem    *dadminmembers = NULL;
+	DefElem    *dvalidUntil = NULL;
+	DefElem    *dbypassRLS = NULL;
+
+	/* The defaults can vary depending on the original statement type */
+	switch (stmt->stmt_type)
+	{
+		case ROLESTMT_ROLE:
+			break;
+		case ROLESTMT_USER:
+			canlogin = true;
+			/* may eventually want inherit to default to false here */
+			break;
+		case ROLESTMT_GROUP:
+			break;
+	}
+
+	/* Extract options from the statement node tree */
+	foreach(option, stmt->options)
+	{
+		DefElem    *defel = (DefElem *) lfirst(option);
+
+		if (strcmp(defel->defname, "password") == 0)
+		{
+			if (dpassword)
+				errorConflictingDefElem(defel, pstate);
+			dpassword = defel;
+		}
+		else if (strcmp(defel->defname, "sysid") == 0)
+		{
+			ereport(NOTICE,
+					(errmsg("SYSID can no longer be specified")));
+		}
+		else if (strcmp(defel->defname, "superuser") == 0)
+		{
+			if (dissuper)
+				errorConflictingDefElem(defel, pstate);
+			dissuper = defel;
+		}
+		else if (strcmp(defel->defname, "inherit") == 0)
+		{
+			if (dinherit)
+				errorConflictingDefElem(defel, pstate);
+			dinherit = defel;
+		}
+		else if (strcmp(defel->defname, "createrole") == 0)
+		{
+			if (dcreaterole)
+				errorConflictingDefElem(defel, pstate);
+			dcreaterole = defel;
+		}
+		else if (strcmp(defel->defname, "createdb") == 0)
+		{
+			if (dcreatedb)
+				errorConflictingDefElem(defel, pstate);
+			dcreatedb = defel;
+		}
+		else if (strcmp(defel->defname, "canlogin") == 0)
+		{
+			if (dcanlogin)
+				errorConflictingDefElem(defel, pstate);
+			dcanlogin = defel;
+		}
+		else if (strcmp(defel->defname, "isreplication") == 0)
+		{
+			if (disreplication)
+				errorConflictingDefElem(defel, pstate);
+			disreplication = defel;
+		}
+		else if (strcmp(defel->defname, "connectionlimit") == 0)
+		{
+			if (dconnlimit)
+				errorConflictingDefElem(defel, pstate);
+			dconnlimit = defel;
+		}
+		else if (strcmp(defel->defname, "addroleto") == 0)
+		{
+			if (daddroleto)
+				errorConflictingDefElem(defel, pstate);
+			daddroleto = defel;
+		}
+		else if (strcmp(defel->defname, "rolemembers") == 0)
+		{
+			if (drolemembers)
+				errorConflictingDefElem(defel, pstate);
+			drolemembers = defel;
+		}
+		else if (strcmp(defel->defname, "adminmembers") == 0)
+		{
+			if (dadminmembers)
+				errorConflictingDefElem(defel, pstate);
+			dadminmembers = defel;
+		}
+		else if (strcmp(defel->defname, "validUntil") == 0)
+		{
+			if (dvalidUntil)
+				errorConflictingDefElem(defel, pstate);
+			dvalidUntil = defel;
+		}
+		else if (strcmp(defel->defname, "bypassrls") == 0)
+		{
+			if (dbypassRLS)
+				errorConflictingDefElem(defel, pstate);
+			dbypassRLS = defel;
+		}
+		else
+			elog(ERROR, "option \"%s\" not recognized",
+				 defel->defname);
+	}
+
+	if (dpassword && dpassword->arg)
+		password = strVal(dpassword->arg);
+	if (dissuper)
+		issuper = boolVal(dissuper->arg);
+	if (dinherit)
+		inherit = boolVal(dinherit->arg);
+	if (dcreaterole)
+		createrole = boolVal(dcreaterole->arg);
+	if (dcreatedb)
+		createdb = boolVal(dcreatedb->arg);
+	if (dcanlogin)
+		canlogin = boolVal(dcanlogin->arg);
+	if (disreplication)
+		isreplication = boolVal(disreplication->arg);
+	if (dconnlimit)
+	{
+		connlimit = intVal(dconnlimit->arg);
+		if (connlimit < -1)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("invalid connection limit: %d", connlimit)));
+	}
+	if (daddroleto)
+		addroleto = (List *) daddroleto->arg;
+	if (drolemembers)
+		rolemembers = (List *) drolemembers->arg;
+	if (dadminmembers)
+		adminmembers = (List *) dadminmembers->arg;
+	if (dvalidUntil)
+		validUntil = strVal(dvalidUntil->arg);
+	if (dbypassRLS)
+		bypassrls = boolVal(dbypassRLS->arg);
+
+	/* Check some permissions first */
+	if (issuper)
+	{
+		if (!superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to create superusers")));
+	}
+	else if (isreplication)
+	{
+		if (!superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to create replication users")));
+	}
+	else if (bypassrls)
+	{
+		if (!superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to create bypassrls users")));
+	}
+	else
+	{
+		if (!have_createrole_privilege())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("permission denied to create role")));
+	}
+
+	/*
+	 * Check that the user is not trying to create a role in the reserved
+	 * "pg_" namespace.
+	 */
+	if (IsReservedName(stmt->role))
+		ereport(ERROR,
+				(errcode(ERRCODE_RESERVED_NAME),
+				 errmsg("role name \"%s\" is reserved",
+						stmt->role),
+				 errdetail("Role names starting with \"pg_\" are reserved.")));
+
+	/*
+	 * If built with appropriate switch, whine when regression-testing
+	 * conventions for role names are violated.
+	 */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+	if (strncmp(stmt->role, "regress_", 8) != 0)
+		elog(WARNING, "roles created by regression test cases should have names starting with \"regress_\"");
+#endif
+
+	/*
+	 * Check the pg_authid relation to be certain the role doesn't already
+	 * exist.
+	 */
+	pg_authid_rel = table_open(AuthIdRelationId, RowExclusiveLock);
+	pg_authid_dsc = RelationGetDescr(pg_authid_rel);
+
+	if (OidIsValid(get_role_oid(stmt->role, true)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("role \"%s\" already exists",
+						stmt->role)));
+
+	/* Convert validuntil to internal form */
+	if (validUntil)
+	{
+		validUntil_datum = DirectFunctionCall3(timestamptz_in,
+											   CStringGetDatum(validUntil),
+											   ObjectIdGetDatum(InvalidOid),
+											   Int32GetDatum(-1));
+		validUntil_null = false;
+	}
+	else
+	{
+		validUntil_datum = (Datum) 0;
+		validUntil_null = true;
+	}
+
+	/*
+	 * Call the password checking hook if there is one defined
+	 */
+	if (check_password_hook && password)
+		(*check_password_hook) (stmt->role,
+								password,
+								get_password_type(password),
+								validUntil_datum,
+								validUntil_null);
+
+	/*
+	 * Build a tuple to insert
+	 */
+	MemSet(new_record, 0, sizeof(new_record));
+	MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+
+	new_record[Anum_pg_authid_rolname - 1] =
+		DirectFunctionCall1(namein, CStringGetDatum(stmt->role));
+
+	new_record[Anum_pg_authid_rolsuper - 1] = BoolGetDatum(issuper);
+	new_record[Anum_pg_authid_rolinherit - 1] = BoolGetDatum(inherit);
+	new_record[Anum_pg_authid_rolcreaterole - 1] = BoolGetDatum(createrole);
+	new_record[Anum_pg_authid_rolcreatedb - 1] = BoolGetDatum(createdb);
+	new_record[Anum_pg_authid_rolcanlogin - 1] = BoolGetDatum(canlogin);
+	new_record[Anum_pg_authid_rolreplication - 1] = BoolGetDatum(isreplication);
+	new_record[Anum_pg_authid_rolconnlimit - 1] = Int32GetDatum(connlimit);
+
+	if (password)
+	{
+		char	   *shadow_pass;
+		const char *logdetail = NULL;
+
+		/*
+		 * Don't allow an empty password. Libpq treats an empty password the
+		 * same as no password at all, and won't even try to authenticate. But
+		 * other clients might, so allowing it would be confusing. By clearing
+		 * the password when an empty string is specified, the account is
+		 * consistently locked for all clients.
+		 *
+		 * Note that this only covers passwords stored in the database itself.
+		 * There are also checks in the authentication code, to forbid an
+		 * empty password from being used with authentication methods that
+		 * fetch the password from an external system, like LDAP or PAM.
+		 */
+		if (password[0] == '\0' ||
+			plain_crypt_verify(stmt->role, password, "", &logdetail) == STATUS_OK)
+		{
+			ereport(NOTICE,
+					(errmsg("empty string is not a valid password, clearing password")));
+			new_record_nulls[Anum_pg_authid_rolpassword - 1] = true;
+		}
+		else
+		{
+			/* Encrypt the password to the requested format. */
+			shadow_pass = encrypt_password(Password_encryption, stmt->role,
+										   password);
+			new_record[Anum_pg_authid_rolpassword - 1] =
+				CStringGetTextDatum(shadow_pass);
+		}
+	}
+	else
+		new_record_nulls[Anum_pg_authid_rolpassword - 1] = true;
+
+	new_record[Anum_pg_authid_rolvaliduntil - 1] = validUntil_datum;
+	new_record_nulls[Anum_pg_authid_rolvaliduntil - 1] = validUntil_null;
+
+	new_record[Anum_pg_authid_rolbypassrls - 1] = BoolGetDatum(bypassrls);
+
+	/*
+	 * pg_largeobject_metadata contains pg_authid.oid's, so we use the
+	 * binary-upgrade override.
+	 */
+	if (IsBinaryUpgrade)
+	{
+		if (!OidIsValid(binary_upgrade_next_pg_authid_oid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("pg_authid OID value not set when in binary upgrade mode")));
+
+		roleid = binary_upgrade_next_pg_authid_oid;
+		binary_upgrade_next_pg_authid_oid = InvalidOid;
+	}
+	else
+	{
+		roleid = GetNewOidWithIndex(pg_authid_rel, AuthIdOidIndexId,
+									Anum_pg_authid_oid);
+	}
+
+	new_record[Anum_pg_authid_oid - 1] = ObjectIdGetDatum(roleid);
+
+	tuple = heap_form_tuple(pg_authid_dsc, new_record, new_record_nulls);
+
+	/*
+	 * Insert new record in the pg_authid table
+	 */
+	CatalogTupleInsert(pg_authid_rel, tuple);
+
+	/*
+	 * Advance command counter so we can see new record; else tests in
+	 * AddRoleMems may fail.
+	 */
+	if (addroleto || adminmembers || rolemembers)
+		CommandCounterIncrement();
+
+	/*
+	 * Add the new role to the specified existing roles.
+	 */
+	if (addroleto)
+	{
+		RoleSpec   *thisrole = makeNode(RoleSpec);
+		List	   *thisrole_list = list_make1(thisrole);
+		List	   *thisrole_oidlist = list_make1_oid(roleid);
+
+		thisrole->roletype = ROLESPEC_CSTRING;
+		thisrole->rolename = stmt->role;
+		thisrole->location = -1;
+
+		foreach(item, addroleto)
+		{
+			RoleSpec   *oldrole = lfirst(item);
+			HeapTuple	oldroletup = get_rolespec_tuple(oldrole);
+			Form_pg_authid oldroleform = (Form_pg_authid) GETSTRUCT(oldroletup);
+			Oid			oldroleid = oldroleform->oid;
+			char	   *oldrolename = NameStr(oldroleform->rolname);
+
+			AddRoleMems(oldrolename, oldroleid,
+						thisrole_list,
+						thisrole_oidlist,
+						GetUserId(), false);
+
+			ReleaseSysCache(oldroletup);
+		}
+	}
+
+	/*
+	 * Add the specified members to this new role. adminmembers get the admin
+	 * option, rolemembers don't.
+	 */
+	AddRoleMems(stmt->role, roleid,
+				adminmembers, roleSpecsToIds(adminmembers),
+				GetUserId(), true);
+	AddRoleMems(stmt->role, roleid,
+				rolemembers, roleSpecsToIds(rolemembers),
+				GetUserId(), false);
+
+	/* Post creation hook for new role */
+	InvokeObjectPostCreateHook(AuthIdRelationId, roleid, 0);
+
+	/*
+	 * Close pg_authid, but keep lock till commit.
+	 */
+	table_close(pg_authid_rel, NoLock);
+
+	return roleid;
+}
+
+
+/*
+ * ALTER ROLE
+ *
+ * Note: the rolemembers option accepted here is intended to support the
+ * backwards-compatible ALTER GROUP syntax.  Although it will work to say
+ * "ALTER ROLE role ROLE rolenames", we don't document it.
+ */
+Oid
+AlterRole(ParseState *pstate, AlterRoleStmt *stmt)
+{
+	Datum		new_record[Natts_pg_authid];
+	bool		new_record_nulls[Natts_pg_authid];
+	bool		new_record_repl[Natts_pg_authid];
+	Relation	pg_authid_rel;
+	TupleDesc	pg_authid_dsc;
+	HeapTuple	tuple,
+				new_tuple;
+	Form_pg_authid authform;
+	ListCell   *option;
+	char	   *rolename;
+	char	   *password = NULL;	/* user password */
+	int			connlimit = -1; /* maximum connections allowed */
+	char	   *validUntil = NULL;	/* time the login is valid until */
+	Datum		validUntil_datum;	/* same, as timestamptz Datum */
+	bool		validUntil_null;
+	DefElem    *dpassword = NULL;
+	DefElem    *dissuper = NULL;
+	DefElem    *dinherit = NULL;
+	DefElem    *dcreaterole = NULL;
+	DefElem    *dcreatedb = NULL;
+	DefElem    *dcanlogin = NULL;
+	DefElem    *disreplication = NULL;
+	DefElem    *dconnlimit = NULL;
+	DefElem    *drolemembers = NULL;
+	DefElem    *dvalidUntil = NULL;
+	DefElem    *dbypassRLS = NULL;
+	Oid			roleid;
+
+	check_rolespec_name(stmt->role,
+						_("Cannot alter reserved roles."));
+
+	/* Extract options from the statement node tree */
+	foreach(option, stmt->options)
+	{
+		DefElem    *defel = (DefElem *) lfirst(option);
+
+		if (strcmp(defel->defname, "password") == 0)
+		{
+			if (dpassword)
+				errorConflictingDefElem(defel, pstate);
+			dpassword = defel;
+		}
+		else if (strcmp(defel->defname, "superuser") == 0)
+		{
+			if (dissuper)
+				errorConflictingDefElem(defel, pstate);
+			dissuper = defel;
+		}
+		else if (strcmp(defel->defname, "inherit") == 0)
+		{
+			if (dinherit)
+				errorConflictingDefElem(defel, pstate);
+			dinherit = defel;
+		}
+		else if (strcmp(defel->defname, "createrole") == 0)
+		{
+			if (dcreaterole)
+				errorConflictingDefElem(defel, pstate);
+			dcreaterole = defel;
+		}
+		else if (strcmp(defel->defname, "createdb") == 0)
+		{
+			if (dcreatedb)
+				errorConflictingDefElem(defel, pstate);
+			dcreatedb = defel;
+		}
+		else if (strcmp(defel->defname, "canlogin") == 0)
+		{
+			if (dcanlogin)
+				errorConflictingDefElem(defel, pstate);
+			dcanlogin = defel;
+		}
+		else if (strcmp(defel->defname, "isreplication") == 0)
+		{
+			if (disreplication)
+				errorConflictingDefElem(defel, pstate);
+			disreplication = defel;
+		}
+		else if (strcmp(defel->defname, "connectionlimit") == 0)
+		{
+			if (dconnlimit)
+				errorConflictingDefElem(defel, pstate);
+			dconnlimit = defel;
+		}
+		else if (strcmp(defel->defname, "rolemembers") == 0 &&
+				 stmt->action != 0)
+		{
+			if (drolemembers)
+				errorConflictingDefElem(defel, pstate);
+			drolemembers = defel;
+		}
+		else if (strcmp(defel->defname, "validUntil") == 0)
+		{
+			if (dvalidUntil)
+				errorConflictingDefElem(defel, pstate);
+			dvalidUntil = defel;
+		}
+		else if (strcmp(defel->defname, "bypassrls") == 0)
+		{
+			if (dbypassRLS)
+				errorConflictingDefElem(defel, pstate);
+			dbypassRLS = defel;
+		}
+		else
+			elog(ERROR, "option \"%s\" not recognized",
+				 defel->defname);
+	}
+
+	if (dpassword && dpassword->arg)
+		password = strVal(dpassword->arg);
+	if (dconnlimit)
+	{
+		connlimit = intVal(dconnlimit->arg);
+		if (connlimit < -1)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("invalid connection limit: %d", connlimit)));
+	}
+	if (dvalidUntil)
+		validUntil = strVal(dvalidUntil->arg);
+
+	/*
+	 * Scan the pg_authid relation to be certain the user exists.
+	 */
+	pg_authid_rel = table_open(AuthIdRelationId, RowExclusiveLock);
+	pg_authid_dsc = RelationGetDescr(pg_authid_rel);
+
+	tuple = get_rolespec_tuple(stmt->role);
+	authform = (Form_pg_authid) GETSTRUCT(tuple);
+	rolename = pstrdup(NameStr(authform->rolname));
+	roleid = authform->oid;
+
+	/*
+	 * To mess with a superuser or replication role in any way you gotta be
+	 * superuser.  We also insist on superuser to change the BYPASSRLS
+	 * property.  Otherwise, if you don't have createrole, you're only allowed
+	 * to change your own password.
+	 */
+	if (authform->rolsuper || dissuper)
+	{
+		if (!superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to alter superuser roles or change superuser attribute")));
+	}
+	else if (authform->rolreplication || disreplication)
+	{
+		if (!superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to alter replication roles or change replication attribute")));
+	}
+	else if (dbypassRLS)
+	{
+		if (!superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to change bypassrls attribute")));
+	}
+	else if (!have_createrole_privilege())
+	{
+		/* check the rest */
+		if (dinherit || dcreaterole || dcreatedb || dcanlogin || dconnlimit ||
+			drolemembers || dvalidUntil || !dpassword || roleid != GetUserId())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("permission denied")));
+	}
+
+	/* Convert validuntil to internal form */
+	if (dvalidUntil)
+	{
+		validUntil_datum = DirectFunctionCall3(timestamptz_in,
+											   CStringGetDatum(validUntil),
+											   ObjectIdGetDatum(InvalidOid),
+											   Int32GetDatum(-1));
+		validUntil_null = false;
+	}
+	else
+	{
+		/* fetch existing setting in case hook needs it */
+		validUntil_datum = SysCacheGetAttr(AUTHNAME, tuple,
+										   Anum_pg_authid_rolvaliduntil,
+										   &validUntil_null);
+	}
+
+	/*
+	 * Call the password checking hook if there is one defined
+	 */
+	if (check_password_hook && password)
+		(*check_password_hook) (rolename,
+								password,
+								get_password_type(password),
+								validUntil_datum,
+								validUntil_null);
+
+	/*
+	 * Build an updated tuple, perusing the information just obtained
+	 */
+	MemSet(new_record, 0, sizeof(new_record));
+	MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+	MemSet(new_record_repl, false, sizeof(new_record_repl));
+
+	/*
+	 * issuper/createrole/etc
+	 */
+	if (dissuper)
+	{
+		new_record[Anum_pg_authid_rolsuper - 1] = BoolGetDatum(boolVal(dissuper->arg));
+		new_record_repl[Anum_pg_authid_rolsuper - 1] = true;
+	}
+
+	if (dinherit)
+	{
+		new_record[Anum_pg_authid_rolinherit - 1] = BoolGetDatum(boolVal(dinherit->arg));
+		new_record_repl[Anum_pg_authid_rolinherit - 1] = true;
+	}
+
+	if (dcreaterole)
+	{
+		new_record[Anum_pg_authid_rolcreaterole - 1] = BoolGetDatum(boolVal(dcreaterole->arg));
+		new_record_repl[Anum_pg_authid_rolcreaterole - 1] = true;
+	}
+
+	if (dcreatedb)
+	{
+		new_record[Anum_pg_authid_rolcreatedb - 1] = BoolGetDatum(boolVal(dcreatedb->arg));
+		new_record_repl[Anum_pg_authid_rolcreatedb - 1] = true;
+	}
+
+	if (dcanlogin)
+	{
+		new_record[Anum_pg_authid_rolcanlogin - 1] = BoolGetDatum(boolVal(dcanlogin->arg));
+		new_record_repl[Anum_pg_authid_rolcanlogin - 1] = true;
+	}
+
+	if (disreplication)
+	{
+		new_record[Anum_pg_authid_rolreplication - 1] = BoolGetDatum(boolVal(disreplication->arg));
+		new_record_repl[Anum_pg_authid_rolreplication - 1] = true;
+	}
+
+	if (dconnlimit)
+	{
+		new_record[Anum_pg_authid_rolconnlimit - 1] = Int32GetDatum(connlimit);
+		new_record_repl[Anum_pg_authid_rolconnlimit - 1] = true;
+	}
+
+	/* password */
+	if (password)
+	{
+		char	   *shadow_pass;
+		const char *logdetail = NULL;
+
+		/* Like in CREATE USER, don't allow an empty password. */
+		if (password[0] == '\0' ||
+			plain_crypt_verify(rolename, password, "", &logdetail) == STATUS_OK)
+		{
+			ereport(NOTICE,
+					(errmsg("empty string is not a valid password, clearing password")));
+			new_record_nulls[Anum_pg_authid_rolpassword - 1] = true;
+		}
+		else
+		{
+			/* Encrypt the password to the requested format. */
+			shadow_pass = encrypt_password(Password_encryption, rolename,
+										   password);
+			new_record[Anum_pg_authid_rolpassword - 1] =
+				CStringGetTextDatum(shadow_pass);
+		}
+		new_record_repl[Anum_pg_authid_rolpassword - 1] = true;
+	}
+
+	/* unset password */
+	if (dpassword && dpassword->arg == NULL)
+	{
+		new_record_repl[Anum_pg_authid_rolpassword - 1] = true;
+		new_record_nulls[Anum_pg_authid_rolpassword - 1] = true;
+	}
+
+	/* valid until */
+	new_record[Anum_pg_authid_rolvaliduntil - 1] = validUntil_datum;
+	new_record_nulls[Anum_pg_authid_rolvaliduntil - 1] = validUntil_null;
+	new_record_repl[Anum_pg_authid_rolvaliduntil - 1] = true;
+
+	if (dbypassRLS)
+	{
+		new_record[Anum_pg_authid_rolbypassrls - 1] = BoolGetDatum(boolVal(dbypassRLS->arg));
+		new_record_repl[Anum_pg_authid_rolbypassrls - 1] = true;
+	}
+
+	new_tuple = heap_modify_tuple(tuple, pg_authid_dsc, new_record,
+								  new_record_nulls, new_record_repl);
+	CatalogTupleUpdate(pg_authid_rel, &tuple->t_self, new_tuple);
+
+	InvokeObjectPostAlterHook(AuthIdRelationId, roleid, 0);
+
+	ReleaseSysCache(tuple);
+	heap_freetuple(new_tuple);
+
+	/*
+	 * Advance command counter so we can see new record; else tests in
+	 * AddRoleMems may fail.
+	 */
+	if (drolemembers)
+	{
+		List	   *rolemembers = (List *) drolemembers->arg;
+
+		CommandCounterIncrement();
+
+		if (stmt->action == +1) /* add members to role */
+			AddRoleMems(rolename, roleid,
+						rolemembers, roleSpecsToIds(rolemembers),
+						GetUserId(), false);
+		else if (stmt->action == -1)	/* drop members from role */
+			DelRoleMems(rolename, roleid,
+						rolemembers, roleSpecsToIds(rolemembers),
+						false);
+	}
+
+	/*
+	 * Close pg_authid, but keep lock till commit.
+	 */
+	table_close(pg_authid_rel, NoLock);
+
+	return roleid;
+}
+
+
+/*
+ * ALTER ROLE ... SET
+ */
+Oid
+AlterRoleSet(AlterRoleSetStmt *stmt)
+{
+	HeapTuple	roletuple;
+	Form_pg_authid roleform;
+	Oid			databaseid = InvalidOid;
+	Oid			roleid = InvalidOid;
+
+	if (stmt->role)
+	{
+		check_rolespec_name(stmt->role,
+							_("Cannot alter reserved roles."));
+
+		roletuple = get_rolespec_tuple(stmt->role);
+		roleform = (Form_pg_authid) GETSTRUCT(roletuple);
+		roleid = roleform->oid;
+
+		/*
+		 * Obtain a lock on the role and make sure it didn't go away in the
+		 * meantime.
+		 */
+		shdepLockAndCheckObject(AuthIdRelationId, roleid);
+
+		/*
+		 * To mess with a superuser you gotta be superuser; else you need
+		 * createrole, or just want to change your own settings
+		 */
+		if (roleform->rolsuper)
+		{
+			if (!superuser())
+				ereport(ERROR,
+						(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+						 errmsg("must be superuser to alter superusers")));
+		}
+		else
+		{
+			if (!have_createrole_privilege() && roleid != GetUserId())
+				ereport(ERROR,
+						(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+						 errmsg("permission denied")));
+		}
+
+		ReleaseSysCache(roletuple);
+	}
+
+	/* look up and lock the database, if specified */
+	if (stmt->database != NULL)
+	{
+		databaseid = get_database_oid(stmt->database, false);
+		shdepLockAndCheckObject(DatabaseRelationId, databaseid);
+
+		if (!stmt->role)
+		{
+			/*
+			 * If no role is specified, then this is effectively the same as
+			 * ALTER DATABASE ... SET, so use the same permission check.
+			 */
+			if (!pg_database_ownercheck(databaseid, GetUserId()))
+				aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+							   stmt->database);
+		}
+	}
+
+	if (!stmt->role && !stmt->database)
+	{
+		/* Must be superuser to alter settings globally. */
+		if (!superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to alter settings globally")));
+	}
+
+	AlterSetting(databaseid, roleid, stmt->setstmt);
+
+	return roleid;
+}
+
+
+/*
+ * DROP ROLE
+ */
+void
+DropRole(DropRoleStmt *stmt)
+{
+	Relation	pg_authid_rel,
+				pg_auth_members_rel;
+	ListCell   *item;
+
+	if (!have_createrole_privilege())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied to drop role")));
+
+	/*
+	 * Scan the pg_authid relation to find the Oid of the role(s) to be
+	 * deleted.
+	 */
+	pg_authid_rel = table_open(AuthIdRelationId, RowExclusiveLock);
+	pg_auth_members_rel = table_open(AuthMemRelationId, RowExclusiveLock);
+
+	foreach(item, stmt->roles)
+	{
+		RoleSpec   *rolspec = lfirst(item);
+		char	   *role;
+		HeapTuple	tuple,
+					tmp_tuple;
+		Form_pg_authid roleform;
+		ScanKeyData scankey;
+		char	   *detail;
+		char	   *detail_log;
+		SysScanDesc sscan;
+		Oid			roleid;
+
+		if (rolspec->roletype != ROLESPEC_CSTRING)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("cannot use special role specifier in DROP ROLE")));
+		role = rolspec->rolename;
+
+		tuple = SearchSysCache1(AUTHNAME, PointerGetDatum(role));
+		if (!HeapTupleIsValid(tuple))
+		{
+			if (!stmt->missing_ok)
+			{
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_OBJECT),
+						 errmsg("role \"%s\" does not exist", role)));
+			}
+			else
+			{
+				ereport(NOTICE,
+						(errmsg("role \"%s\" does not exist, skipping",
+								role)));
+			}
+
+			continue;
+		}
+
+		roleform = (Form_pg_authid) GETSTRUCT(tuple);
+		roleid = roleform->oid;
+
+		if (roleid == GetUserId())
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_IN_USE),
+					 errmsg("current user cannot be dropped")));
+		if (roleid == GetOuterUserId())
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_IN_USE),
+					 errmsg("current user cannot be dropped")));
+		if (roleid == GetSessionUserId())
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_IN_USE),
+					 errmsg("session user cannot be dropped")));
+
+		/*
+		 * For safety's sake, we allow createrole holders to drop ordinary
+		 * roles but not superuser roles.  This is mainly to avoid the
+		 * scenario where you accidentally drop the last superuser.
+		 */
+		if (roleform->rolsuper && !superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to drop superusers")));
+
+		/* DROP hook for the role being removed */
+		InvokeObjectDropHook(AuthIdRelationId, roleid, 0);
+
+		/*
+		 * Lock the role, so nobody can add dependencies to her while we drop
+		 * her.  We keep the lock until the end of transaction.
+		 */
+		LockSharedObject(AuthIdRelationId, roleid, 0, AccessExclusiveLock);
+
+		/* Check for pg_shdepend entries depending on this role */
+		if (checkSharedDependencies(AuthIdRelationId, roleid,
+									&detail, &detail_log))
+			ereport(ERROR,
+					(errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST),
+					 errmsg("role \"%s\" cannot be dropped because some objects depend on it",
+							role),
+					 errdetail_internal("%s", detail),
+					 errdetail_log("%s", detail_log)));
+
+		/*
+		 * Remove the role from the pg_authid table
+		 */
+		CatalogTupleDelete(pg_authid_rel, &tuple->t_self);
+
+		ReleaseSysCache(tuple);
+
+		/*
+		 * Remove role from the pg_auth_members table.  We have to remove all
+		 * tuples that show it as either a role or a member.
+		 *
+		 * XXX what about grantor entries?	Maybe we should do one heap scan.
+		 */
+		ScanKeyInit(&scankey,
+					Anum_pg_auth_members_roleid,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(roleid));
+
+		sscan = systable_beginscan(pg_auth_members_rel, AuthMemRoleMemIndexId,
+								   true, NULL, 1, &scankey);
+
+		while (HeapTupleIsValid(tmp_tuple = systable_getnext(sscan)))
+		{
+			CatalogTupleDelete(pg_auth_members_rel, &tmp_tuple->t_self);
+		}
+
+		systable_endscan(sscan);
+
+		ScanKeyInit(&scankey,
+					Anum_pg_auth_members_member,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(roleid));
+
+		sscan = systable_beginscan(pg_auth_members_rel, AuthMemMemRoleIndexId,
+								   true, NULL, 1, &scankey);
+
+		while (HeapTupleIsValid(tmp_tuple = systable_getnext(sscan)))
+		{
+			CatalogTupleDelete(pg_auth_members_rel, &tmp_tuple->t_self);
+		}
+
+		systable_endscan(sscan);
+
+		/*
+		 * Remove any comments or security labels on this role.
+		 */
+		DeleteSharedComments(roleid, AuthIdRelationId);
+		DeleteSharedSecurityLabel(roleid, AuthIdRelationId);
+
+		/*
+		 * Remove settings for this role.
+		 */
+		DropSetting(InvalidOid, roleid);
+
+		/*
+		 * Advance command counter so that later iterations of this loop will
+		 * see the changes already made.  This is essential if, for example,
+		 * we are trying to drop both a role and one of its direct members ---
+		 * we'll get an error if we try to delete the linking pg_auth_members
+		 * tuple twice.  (We do not need a CCI between the two delete loops
+		 * above, because it's not allowed for a role to directly contain
+		 * itself.)
+		 */
+		CommandCounterIncrement();
+	}
+
+	/*
+	 * Now we can clean up; but keep locks until commit.
+	 */
+	table_close(pg_auth_members_rel, NoLock);
+	table_close(pg_authid_rel, NoLock);
+}
+
+/*
+ * Rename role
+ */
+ObjectAddress
+RenameRole(const char *oldname, const char *newname)
+{
+	HeapTuple	oldtuple,
+				newtuple;
+	TupleDesc	dsc;
+	Relation	rel;
+	Datum		datum;
+	bool		isnull;
+	Datum		repl_val[Natts_pg_authid];
+	bool		repl_null[Natts_pg_authid];
+	bool		repl_repl[Natts_pg_authid];
+	int			i;
+	Oid			roleid;
+	ObjectAddress address;
+	Form_pg_authid authform;
+
+	rel = table_open(AuthIdRelationId, RowExclusiveLock);
+	dsc = RelationGetDescr(rel);
+
+	oldtuple = SearchSysCache1(AUTHNAME, CStringGetDatum(oldname));
+	if (!HeapTupleIsValid(oldtuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("role \"%s\" does not exist", oldname)));
+
+	/*
+	 * XXX Client applications probably store the session user somewhere, so
+	 * renaming it could cause confusion.  On the other hand, there may not be
+	 * an actual problem besides a little confusion, so think about this and
+	 * decide.  Same for SET ROLE ... we don't restrict renaming the current
+	 * effective userid, though.
+	 */
+
+	authform = (Form_pg_authid) GETSTRUCT(oldtuple);
+	roleid = authform->oid;
+
+	if (roleid == GetSessionUserId())
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("session user cannot be renamed")));
+	if (roleid == GetOuterUserId())
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("current user cannot be renamed")));
+
+	/*
+	 * Check that the user is not trying to rename a system role and not
+	 * trying to rename a role into the reserved "pg_" namespace.
+	 */
+	if (IsReservedName(NameStr(authform->rolname)))
+		ereport(ERROR,
+				(errcode(ERRCODE_RESERVED_NAME),
+				 errmsg("role name \"%s\" is reserved",
+						NameStr(authform->rolname)),
+				 errdetail("Role names starting with \"pg_\" are reserved.")));
+
+	if (IsReservedName(newname))
+		ereport(ERROR,
+				(errcode(ERRCODE_RESERVED_NAME),
+				 errmsg("role name \"%s\" is reserved",
+						newname),
+				 errdetail("Role names starting with \"pg_\" are reserved.")));
+
+	/*
+	 * If built with appropriate switch, whine when regression-testing
+	 * conventions for role names are violated.
+	 */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+	if (strncmp(newname, "regress_", 8) != 0)
+		elog(WARNING, "roles created by regression test cases should have names starting with \"regress_\"");
+#endif
+
+	/* make sure the new name doesn't exist */
+	if (SearchSysCacheExists1(AUTHNAME, CStringGetDatum(newname)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("role \"%s\" already exists", newname)));
+
+	/*
+	 * createrole is enough privilege unless you want to mess with a superuser
+	 */
+	if (((Form_pg_authid) GETSTRUCT(oldtuple))->rolsuper)
+	{
+		if (!superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to rename superusers")));
+	}
+	else
+	{
+		if (!have_createrole_privilege())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("permission denied to rename role")));
+	}
+
+	/* OK, construct the modified tuple */
+	for (i = 0; i < Natts_pg_authid; i++)
+		repl_repl[i] = false;
+
+	repl_repl[Anum_pg_authid_rolname - 1] = true;
+	repl_val[Anum_pg_authid_rolname - 1] = DirectFunctionCall1(namein,
+															   CStringGetDatum(newname));
+	repl_null[Anum_pg_authid_rolname - 1] = false;
+
+	datum = heap_getattr(oldtuple, Anum_pg_authid_rolpassword, dsc, &isnull);
+
+	if (!isnull && get_password_type(TextDatumGetCString(datum)) == PASSWORD_TYPE_MD5)
+	{
+		/* MD5 uses the username as salt, so just clear it on a rename */
+		repl_repl[Anum_pg_authid_rolpassword - 1] = true;
+		repl_null[Anum_pg_authid_rolpassword - 1] = true;
+
+		ereport(NOTICE,
+				(errmsg("MD5 password cleared because of role rename")));
+	}
+
+	newtuple = heap_modify_tuple(oldtuple, dsc, repl_val, repl_null, repl_repl);
+	CatalogTupleUpdate(rel, &oldtuple->t_self, newtuple);
+
+	InvokeObjectPostAlterHook(AuthIdRelationId, roleid, 0);
+
+	ObjectAddressSet(address, AuthIdRelationId, roleid);
+
+	ReleaseSysCache(oldtuple);
+
+	/*
+	 * Close pg_authid, but keep lock till commit.
+	 */
+	table_close(rel, NoLock);
+
+	return address;
+}
+
+/*
+ * GrantRoleStmt
+ *
+ * Grant/Revoke roles to/from roles
+ */
+void
+GrantRole(GrantRoleStmt *stmt)
+{
+	Relation	pg_authid_rel;
+	Oid			grantor;
+	List	   *grantee_ids;
+	ListCell   *item;
+
+	if (stmt->grantor)
+		grantor = get_rolespec_oid(stmt->grantor, false);
+	else
+		grantor = GetUserId();
+
+	grantee_ids = roleSpecsToIds(stmt->grantee_roles);
+
+	/* AccessShareLock is enough since we aren't modifying pg_authid */
+	pg_authid_rel = table_open(AuthIdRelationId, AccessShareLock);
+
+	/*
+	 * Step through all of the granted roles and add/remove entries for the
+	 * grantees, or, if admin_opt is set, then just add/remove the admin
+	 * option.
+	 *
+	 * Note: Permissions checking is done by AddRoleMems/DelRoleMems
+	 */
+	foreach(item, stmt->granted_roles)
+	{
+		AccessPriv *priv = (AccessPriv *) lfirst(item);
+		char	   *rolename = priv->priv_name;
+		Oid			roleid;
+
+		/* Must reject priv(columns) and ALL PRIVILEGES(columns) */
+		if (rolename == NULL || priv->cols != NIL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_GRANT_OPERATION),
+					 errmsg("column names cannot be included in GRANT/REVOKE ROLE")));
+
+		roleid = get_role_oid(rolename, false);
+		if (stmt->is_grant)
+			AddRoleMems(rolename, roleid,
+						stmt->grantee_roles, grantee_ids,
+						grantor, stmt->admin_opt);
+		else
+			DelRoleMems(rolename, roleid,
+						stmt->grantee_roles, grantee_ids,
+						stmt->admin_opt);
+	}
+
+	/*
+	 * Close pg_authid, but keep lock till commit.
+	 */
+	table_close(pg_authid_rel, NoLock);
+}
+
+/*
+ * DropOwnedObjects
+ *
+ * Drop the objects owned by a given list of roles.
+ */
+void
+DropOwnedObjects(DropOwnedStmt *stmt)
+{
+	List	   *role_ids = roleSpecsToIds(stmt->roles);
+	ListCell   *cell;
+
+	/* Check privileges */
+	foreach(cell, role_ids)
+	{
+		Oid			roleid = lfirst_oid(cell);
+
+		if (!has_privs_of_role(GetUserId(), roleid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("permission denied to drop objects")));
+	}
+
+	/* Ok, do it */
+	shdepDropOwned(role_ids, stmt->behavior);
+}
+
+/*
+ * ReassignOwnedObjects
+ *
+ * Give the objects owned by a given list of roles away to another user.
+ */
+void
+ReassignOwnedObjects(ReassignOwnedStmt *stmt)
+{
+	List	   *role_ids = roleSpecsToIds(stmt->roles);
+	ListCell   *cell;
+	Oid			newrole;
+
+	/* Check privileges */
+	foreach(cell, role_ids)
+	{
+		Oid			roleid = lfirst_oid(cell);
+
+		if (!has_privs_of_role(GetUserId(), roleid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("permission denied to reassign objects")));
+	}
+
+	/* Must have privileges on the receiving side too */
+	newrole = get_rolespec_oid(stmt->newrole, false);
+
+	if (!has_privs_of_role(GetUserId(), newrole))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("permission denied to reassign objects")));
+
+	/* Ok, do it */
+	shdepReassignOwned(role_ids, newrole);
+}
+
+/*
+ * roleSpecsToIds
+ *
+ * Given a list of RoleSpecs, generate a list of role OIDs in the same order.
+ *
+ * ROLESPEC_PUBLIC is not allowed.
+ */
+List *
+roleSpecsToIds(List *memberNames)
+{
+	List	   *result = NIL;
+	ListCell   *l;
+
+	foreach(l, memberNames)
+	{
+		RoleSpec   *rolespec = lfirst_node(RoleSpec, l);
+		Oid			roleid;
+
+		roleid = get_rolespec_oid(rolespec, false);
+		result = lappend_oid(result, roleid);
+	}
+	return result;
+}
+
+/*
+ * AddRoleMems -- Add given members to the specified role
+ *
+ * rolename: name of role to add to (used only for error messages)
+ * roleid: OID of role to add to
+ * memberSpecs: list of RoleSpec of roles to add (used only for error messages)
+ * memberIds: OIDs of roles to add
+ * grantorId: who is granting the membership
+ * admin_opt: granting admin option?
+ */
+static void
+AddRoleMems(const char *rolename, Oid roleid,
+			List *memberSpecs, List *memberIds,
+			Oid grantorId, bool admin_opt)
+{
+	Relation	pg_authmem_rel;
+	TupleDesc	pg_authmem_dsc;
+	ListCell   *specitem;
+	ListCell   *iditem;
+
+	Assert(list_length(memberSpecs) == list_length(memberIds));
+
+	/* Skip permission check if nothing to do */
+	if (!memberIds)
+		return;
+
+	/*
+	 * Check permissions: must have createrole or admin option on the role to
+	 * be changed.  To mess with a superuser role, you gotta be superuser.
+	 */
+	if (superuser_arg(roleid))
+	{
+		if (!superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to alter superusers")));
+	}
+	else
+	{
+		if (!have_createrole_privilege() &&
+			!is_admin_of_role(grantorId, roleid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must have admin option on role \"%s\"",
+							rolename)));
+	}
+
+	/*
+	 * The charter of pg_database_owner is to have exactly one, implicit,
+	 * situation-dependent member.  There's no technical need for this
+	 * restriction.  (One could lift it and take the further step of making
+	 * pg_database_ownercheck() equivalent to has_privs_of_role(roleid,
+	 * ROLE_PG_DATABASE_OWNER), in which case explicit, situation-independent
+	 * members could act as the owner of any database.)
+	 */
+	if (roleid == ROLE_PG_DATABASE_OWNER)
+		ereport(ERROR,
+				errmsg("role \"%s\" cannot have explicit members", rolename));
+
+	/*
+	 * The role membership grantor of record has little significance at
+	 * present.  Nonetheless, inasmuch as users might look to it for a crude
+	 * audit trail, let only superusers impute the grant to a third party.
+	 */
+	if (grantorId != GetUserId() && !superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be superuser to set grantor")));
+
+	pg_authmem_rel = table_open(AuthMemRelationId, RowExclusiveLock);
+	pg_authmem_dsc = RelationGetDescr(pg_authmem_rel);
+
+	forboth(specitem, memberSpecs, iditem, memberIds)
+	{
+		RoleSpec   *memberRole = lfirst_node(RoleSpec, specitem);
+		Oid			memberid = lfirst_oid(iditem);
+		HeapTuple	authmem_tuple;
+		HeapTuple	tuple;
+		Datum		new_record[Natts_pg_auth_members];
+		bool		new_record_nulls[Natts_pg_auth_members];
+		bool		new_record_repl[Natts_pg_auth_members];
+
+		/*
+		 * pg_database_owner is never a role member.  Lifting this restriction
+		 * would require a policy decision about membership loops.  One could
+		 * prevent loops, which would include making "ALTER DATABASE x OWNER
+		 * TO proposed_datdba" fail if is_member_of_role(pg_database_owner,
+		 * proposed_datdba).  Hence, gaining a membership could reduce what a
+		 * role could do.  Alternately, one could allow these memberships to
+		 * complete loops.  A role could then have actual WITH ADMIN OPTION on
+		 * itself, prompting a decision about is_admin_of_role() treatment of
+		 * the case.
+		 *
+		 * Lifting this restriction also has policy implications for ownership
+		 * of shared objects (databases and tablespaces).  We allow such
+		 * ownership, but we might find cause to ban it in the future.
+		 * Designing such a ban would more troublesome if the design had to
+		 * address pg_database_owner being a member of role FOO that owns a
+		 * shared object.  (The effect of such ownership is that any owner of
+		 * another database can act as the owner of affected shared objects.)
+		 */
+		if (memberid == ROLE_PG_DATABASE_OWNER)
+			ereport(ERROR,
+					errmsg("role \"%s\" cannot be a member of any role",
+						   get_rolespec_name(memberRole)));
+
+		/*
+		 * Refuse creation of membership loops, including the trivial case
+		 * where a role is made a member of itself.  We do this by checking to
+		 * see if the target role is already a member of the proposed member
+		 * role.  We have to ignore possible superuserness, however, else we
+		 * could never grant membership in a superuser-privileged role.
+		 */
+		if (is_member_of_role_nosuper(roleid, memberid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_GRANT_OPERATION),
+					 errmsg("role \"%s\" is a member of role \"%s\"",
+							rolename, get_rolespec_name(memberRole))));
+
+		/*
+		 * Check if entry for this role/member already exists; if so, give
+		 * warning unless we are adding admin option.
+		 */
+		authmem_tuple = SearchSysCache2(AUTHMEMROLEMEM,
+										ObjectIdGetDatum(roleid),
+										ObjectIdGetDatum(memberid));
+		if (HeapTupleIsValid(authmem_tuple) &&
+			(!admin_opt ||
+			 ((Form_pg_auth_members) GETSTRUCT(authmem_tuple))->admin_option))
+		{
+			ereport(NOTICE,
+					(errmsg("role \"%s\" is already a member of role \"%s\"",
+							get_rolespec_name(memberRole), rolename)));
+			ReleaseSysCache(authmem_tuple);
+			continue;
+		}
+
+		/* Build a tuple to insert or update */
+		MemSet(new_record, 0, sizeof(new_record));
+		MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+		MemSet(new_record_repl, false, sizeof(new_record_repl));
+
+		new_record[Anum_pg_auth_members_roleid - 1] = ObjectIdGetDatum(roleid);
+		new_record[Anum_pg_auth_members_member - 1] = ObjectIdGetDatum(memberid);
+		new_record[Anum_pg_auth_members_grantor - 1] = ObjectIdGetDatum(grantorId);
+		new_record[Anum_pg_auth_members_admin_option - 1] = BoolGetDatum(admin_opt);
+
+		if (HeapTupleIsValid(authmem_tuple))
+		{
+			new_record_repl[Anum_pg_auth_members_grantor - 1] = true;
+			new_record_repl[Anum_pg_auth_members_admin_option - 1] = true;
+			tuple = heap_modify_tuple(authmem_tuple, pg_authmem_dsc,
+									  new_record,
+									  new_record_nulls, new_record_repl);
+			CatalogTupleUpdate(pg_authmem_rel, &tuple->t_self, tuple);
+			ReleaseSysCache(authmem_tuple);
+		}
+		else
+		{
+			tuple = heap_form_tuple(pg_authmem_dsc,
+									new_record, new_record_nulls);
+			CatalogTupleInsert(pg_authmem_rel, tuple);
+		}
+
+		/* CCI after each change, in case there are duplicates in list */
+		CommandCounterIncrement();
+	}
+
+	/*
+	 * Close pg_authmem, but keep lock till commit.
+	 */
+	table_close(pg_authmem_rel, NoLock);
+}
+
+/*
+ * DelRoleMems -- Remove given members from the specified role
+ *
+ * rolename: name of role to del from (used only for error messages)
+ * roleid: OID of role to del from
+ * memberSpecs: list of RoleSpec of roles to del (used only for error messages)
+ * memberIds: OIDs of roles to del
+ * admin_opt: remove admin option only?
+ */
+static void
+DelRoleMems(const char *rolename, Oid roleid,
+			List *memberSpecs, List *memberIds,
+			bool admin_opt)
+{
+	Relation	pg_authmem_rel;
+	TupleDesc	pg_authmem_dsc;
+	ListCell   *specitem;
+	ListCell   *iditem;
+
+	Assert(list_length(memberSpecs) == list_length(memberIds));
+
+	/* Skip permission check if nothing to do */
+	if (!memberIds)
+		return;
+
+	/*
+	 * Check permissions: must have createrole or admin option on the role to
+	 * be changed.  To mess with a superuser role, you gotta be superuser.
+	 */
+	if (superuser_arg(roleid))
+	{
+		if (!superuser())
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be superuser to alter superusers")));
+	}
+	else
+	{
+		if (!have_createrole_privilege() &&
+			!is_admin_of_role(GetUserId(), roleid))
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must have admin option on role \"%s\"",
+							rolename)));
+	}
+
+	pg_authmem_rel = table_open(AuthMemRelationId, RowExclusiveLock);
+	pg_authmem_dsc = RelationGetDescr(pg_authmem_rel);
+
+	forboth(specitem, memberSpecs, iditem, memberIds)
+	{
+		RoleSpec   *memberRole = lfirst(specitem);
+		Oid			memberid = lfirst_oid(iditem);
+		HeapTuple	authmem_tuple;
+
+		/*
+		 * Find entry for this role/member
+		 */
+		authmem_tuple = SearchSysCache2(AUTHMEMROLEMEM,
+										ObjectIdGetDatum(roleid),
+										ObjectIdGetDatum(memberid));
+		if (!HeapTupleIsValid(authmem_tuple))
+		{
+			ereport(WARNING,
+					(errmsg("role \"%s\" is not a member of role \"%s\"",
+							get_rolespec_name(memberRole), rolename)));
+			continue;
+		}
+
+		if (!admin_opt)
+		{
+			/* Remove the entry altogether */
+			CatalogTupleDelete(pg_authmem_rel, &authmem_tuple->t_self);
+		}
+		else
+		{
+			/* Just turn off the admin option */
+			HeapTuple	tuple;
+			Datum		new_record[Natts_pg_auth_members];
+			bool		new_record_nulls[Natts_pg_auth_members];
+			bool		new_record_repl[Natts_pg_auth_members];
+
+			/* Build a tuple to update with */
+			MemSet(new_record, 0, sizeof(new_record));
+			MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+			MemSet(new_record_repl, false, sizeof(new_record_repl));
+
+			new_record[Anum_pg_auth_members_admin_option - 1] = BoolGetDatum(false);
+			new_record_repl[Anum_pg_auth_members_admin_option - 1] = true;
+
+			tuple = heap_modify_tuple(authmem_tuple, pg_authmem_dsc,
+									  new_record,
+									  new_record_nulls, new_record_repl);
+			CatalogTupleUpdate(pg_authmem_rel, &tuple->t_self, tuple);
+		}
+
+		ReleaseSysCache(authmem_tuple);
+
+		/* CCI after each change, in case there are duplicates in list */
+		CommandCounterIncrement();
+	}
+
+	/*
+	 * Close pg_authmem, but keep lock till commit.
+	 */
+	table_close(pg_authmem_rel, NoLock);
+}
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
new file mode 100644
index 0000000..75b0ca9
--- /dev/null
+++ b/src/backend/commands/vacuum.c
@@ -0,0 +1,2465 @@
+/*-------------------------------------------------------------------------
+ *
+ * vacuum.c
+ *	  The postgres vacuum cleaner.
+ *
+ * This file includes (a) control and dispatch code for VACUUM and ANALYZE
+ * commands, (b) code to compute various vacuum thresholds, and (c) index
+ * vacuum code.
+ *
+ * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
+ * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
+ * CLUSTER, handled in cluster.c.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/vacuum.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/clog.h"
+#include "access/commit_ts.h"
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/multixact.h"
+#include "access/tableam.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "catalog/namespace.h"
+#include "catalog/index.h"
+#include "catalog/pg_database.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_namespace.h"
+#include "commands/cluster.h"
+#include "commands/defrem.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "pgstat.h"
+#include "postmaster/autovacuum.h"
+#include "postmaster/bgworker_internals.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/pmsignal.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "utils/acl.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+#include "utils/pg_rusage.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+
+
+/*
+ * GUC parameters
+ */
+int			vacuum_freeze_min_age;
+int			vacuum_freeze_table_age;
+int			vacuum_multixact_freeze_min_age;
+int			vacuum_multixact_freeze_table_age;
+int			vacuum_failsafe_age;
+int			vacuum_multixact_failsafe_age;
+
+
+/* A few variables that don't seem worth passing around as parameters */
+static MemoryContext vac_context = NULL;
+static BufferAccessStrategy vac_strategy;
+
+
+/*
+ * Variables for cost-based parallel vacuum.  See comments atop
+ * compute_parallel_delay to understand how it works.
+ */
+pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
+pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
+int			VacuumCostBalanceLocal = 0;
+
+/* non-export function prototypes */
+static List *expand_vacuum_rel(VacuumRelation *vrel, int options);
+static List *get_all_vacuum_rels(int options);
+static void vac_truncate_clog(TransactionId frozenXID,
+							  MultiXactId minMulti,
+							  TransactionId lastSaneFrozenXid,
+							  MultiXactId lastSaneMinMulti);
+static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params);
+static double compute_parallel_delay(void);
+static VacOptValue get_vacoptval_from_boolean(DefElem *def);
+static bool vac_tid_reaped(ItemPointer itemptr, void *state);
+static int	vac_cmp_itemptr(const void *left, const void *right);
+
+/*
+ * Primary entry point for manual VACUUM and ANALYZE commands
+ *
+ * This is mainly a preparation wrapper for the real operations that will
+ * happen in vacuum().
+ */
+void
+ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
+{
+	VacuumParams params;
+	bool		verbose = false;
+	bool		skip_locked = false;
+	bool		analyze = false;
+	bool		freeze = false;
+	bool		full = false;
+	bool		disable_page_skipping = false;
+	bool		process_toast = true;
+	ListCell   *lc;
+
+	/* index_cleanup and truncate values unspecified for now */
+	params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
+	params.truncate = VACOPTVALUE_UNSPECIFIED;
+
+	/* By default parallel vacuum is enabled */
+	params.nworkers = 0;
+
+	/* Parse options list */
+	foreach(lc, vacstmt->options)
+	{
+		DefElem    *opt = (DefElem *) lfirst(lc);
+
+		/* Parse common options for VACUUM and ANALYZE */
+		if (strcmp(opt->defname, "verbose") == 0)
+			verbose = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "skip_locked") == 0)
+			skip_locked = defGetBoolean(opt);
+		else if (!vacstmt->is_vacuumcmd)
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
+					 parser_errposition(pstate, opt->location)));
+
+		/* Parse options available on VACUUM */
+		else if (strcmp(opt->defname, "analyze") == 0)
+			analyze = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "freeze") == 0)
+			freeze = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "full") == 0)
+			full = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "disable_page_skipping") == 0)
+			disable_page_skipping = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "index_cleanup") == 0)
+		{
+			/* Interpret no string as the default, which is 'auto' */
+			if (!opt->arg)
+				params.index_cleanup = VACOPTVALUE_AUTO;
+			else
+			{
+				char	   *sval = defGetString(opt);
+
+				/* Try matching on 'auto' string, or fall back on boolean */
+				if (pg_strcasecmp(sval, "auto") == 0)
+					params.index_cleanup = VACOPTVALUE_AUTO;
+				else
+					params.index_cleanup = get_vacoptval_from_boolean(opt);
+			}
+		}
+		else if (strcmp(opt->defname, "process_toast") == 0)
+			process_toast = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "truncate") == 0)
+			params.truncate = get_vacoptval_from_boolean(opt);
+		else if (strcmp(opt->defname, "parallel") == 0)
+		{
+			if (opt->arg == NULL)
+			{
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("parallel option requires a value between 0 and %d",
+								MAX_PARALLEL_WORKER_LIMIT),
+						 parser_errposition(pstate, opt->location)));
+			}
+			else
+			{
+				int			nworkers;
+
+				nworkers = defGetInt32(opt);
+				if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("parallel workers for vacuum must be between 0 and %d",
+									MAX_PARALLEL_WORKER_LIMIT),
+							 parser_errposition(pstate, opt->location)));
+
+				/*
+				 * Disable parallel vacuum, if user has specified parallel
+				 * degree as zero.
+				 */
+				if (nworkers == 0)
+					params.nworkers = -1;
+				else
+					params.nworkers = nworkers;
+			}
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
+					 parser_errposition(pstate, opt->location)));
+	}
+
+	/* Set vacuum options */
+	params.options =
+		(vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
+		(verbose ? VACOPT_VERBOSE : 0) |
+		(skip_locked ? VACOPT_SKIP_LOCKED : 0) |
+		(analyze ? VACOPT_ANALYZE : 0) |
+		(freeze ? VACOPT_FREEZE : 0) |
+		(full ? VACOPT_FULL : 0) |
+		(disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
+		(process_toast ? VACOPT_PROCESS_TOAST : 0);
+
+	/* sanity checks on options */
+	Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
+	Assert((params.options & VACOPT_VACUUM) ||
+		   !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
+
+	if ((params.options & VACOPT_FULL) && params.nworkers > 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("VACUUM FULL cannot be performed in parallel")));
+
+	/*
+	 * Make sure VACOPT_ANALYZE is specified if any column lists are present.
+	 */
+	if (!(params.options & VACOPT_ANALYZE))
+	{
+		ListCell   *lc;
+
+		foreach(lc, vacstmt->rels)
+		{
+			VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
+
+			if (vrel->va_cols != NIL)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("ANALYZE option must be specified when a column list is provided")));
+		}
+	}
+
+	/*
+	 * All freeze ages are zero if the FREEZE option is given; otherwise pass
+	 * them as -1 which means to use the default values.
+	 */
+	if (params.options & VACOPT_FREEZE)
+	{
+		params.freeze_min_age = 0;
+		params.freeze_table_age = 0;
+		params.multixact_freeze_min_age = 0;
+		params.multixact_freeze_table_age = 0;
+	}
+	else
+	{
+		params.freeze_min_age = -1;
+		params.freeze_table_age = -1;
+		params.multixact_freeze_min_age = -1;
+		params.multixact_freeze_table_age = -1;
+	}
+
+	/* user-invoked vacuum is never "for wraparound" */
+	params.is_wraparound = false;
+
+	/* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */
+	params.log_min_duration = -1;
+
+	/* Now go through the common routine */
+	vacuum(vacstmt->rels, &params, NULL, isTopLevel);
+}
+
+/*
+ * Internal entry point for VACUUM and ANALYZE commands.
+ *
+ * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
+ * we process all relevant tables in the database.  For each VacuumRelation,
+ * if a valid OID is supplied, the table with that OID is what to process;
+ * otherwise, the VacuumRelation's RangeVar indicates what to process.
+ *
+ * params contains a set of parameters that can be used to customize the
+ * behavior.
+ *
+ * bstrategy is normally given as NULL, but in autovacuum it can be passed
+ * in to use the same buffer strategy object across multiple vacuum() calls.
+ *
+ * isTopLevel should be passed down from ProcessUtility.
+ *
+ * It is the caller's responsibility that all parameters are allocated in a
+ * memory context that will not disappear at transaction commit.
+ */
+void
+vacuum(List *relations, VacuumParams *params,
+	   BufferAccessStrategy bstrategy, bool isTopLevel)
+{
+	static bool in_vacuum = false;
+
+	const char *stmttype;
+	volatile bool in_outer_xact,
+				use_own_xacts;
+
+	Assert(params != NULL);
+
+	stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
+
+	/*
+	 * We cannot run VACUUM inside a user transaction block; if we were inside
+	 * a transaction, then our commit- and start-transaction-command calls
+	 * would not have the intended effect!	There are numerous other subtle
+	 * dependencies on this, too.
+	 *
+	 * ANALYZE (without VACUUM) can run either way.
+	 */
+	if (params->options & VACOPT_VACUUM)
+	{
+		PreventInTransactionBlock(isTopLevel, stmttype);
+		in_outer_xact = false;
+	}
+	else
+		in_outer_xact = IsInTransactionBlock(isTopLevel);
+
+	/*
+	 * Due to static variables vac_context, anl_context and vac_strategy,
+	 * vacuum() is not reentrant.  This matters when VACUUM FULL or ANALYZE
+	 * calls a hostile index expression that itself calls ANALYZE.
+	 */
+	if (in_vacuum)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("%s cannot be executed from VACUUM or ANALYZE",
+						stmttype)));
+
+	/*
+	 * Sanity check DISABLE_PAGE_SKIPPING option.
+	 */
+	if ((params->options & VACOPT_FULL) != 0 &&
+		(params->options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
+
+	/* sanity check for PROCESS_TOAST */
+	if ((params->options & VACOPT_FULL) != 0 &&
+		(params->options & VACOPT_PROCESS_TOAST) == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("PROCESS_TOAST required with VACUUM FULL")));
+
+	/*
+	 * Create special memory context for cross-transaction storage.
+	 *
+	 * Since it is a child of PortalContext, it will go away eventually even
+	 * if we suffer an error; there's no need for special abort cleanup logic.
+	 */
+	vac_context = AllocSetContextCreate(PortalContext,
+										"Vacuum",
+										ALLOCSET_DEFAULT_SIZES);
+
+	/*
+	 * If caller didn't give us a buffer strategy object, make one in the
+	 * cross-transaction memory context.
+	 */
+	if (bstrategy == NULL)
+	{
+		MemoryContext old_context = MemoryContextSwitchTo(vac_context);
+
+		bstrategy = GetAccessStrategy(BAS_VACUUM);
+		MemoryContextSwitchTo(old_context);
+	}
+	vac_strategy = bstrategy;
+
+	/*
+	 * Build list of relation(s) to process, putting any new data in
+	 * vac_context for safekeeping.
+	 */
+	if (relations != NIL)
+	{
+		List	   *newrels = NIL;
+		ListCell   *lc;
+
+		foreach(lc, relations)
+		{
+			VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
+			List	   *sublist;
+			MemoryContext old_context;
+
+			sublist = expand_vacuum_rel(vrel, params->options);
+			old_context = MemoryContextSwitchTo(vac_context);
+			newrels = list_concat(newrels, sublist);
+			MemoryContextSwitchTo(old_context);
+		}
+		relations = newrels;
+	}
+	else
+		relations = get_all_vacuum_rels(params->options);
+
+	/*
+	 * Decide whether we need to start/commit our own transactions.
+	 *
+	 * For VACUUM (with or without ANALYZE): always do so, so that we can
+	 * release locks as soon as possible.  (We could possibly use the outer
+	 * transaction for a one-table VACUUM, but handling TOAST tables would be
+	 * problematic.)
+	 *
+	 * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
+	 * start/commit our own transactions.  Also, there's no need to do so if
+	 * only processing one relation.  For multiple relations when not within a
+	 * transaction block, and also in an autovacuum worker, use own
+	 * transactions so we can release locks sooner.
+	 */
+	if (params->options & VACOPT_VACUUM)
+		use_own_xacts = true;
+	else
+	{
+		Assert(params->options & VACOPT_ANALYZE);
+		if (IsAutoVacuumWorkerProcess())
+			use_own_xacts = true;
+		else if (in_outer_xact)
+			use_own_xacts = false;
+		else if (list_length(relations) > 1)
+			use_own_xacts = true;
+		else
+			use_own_xacts = false;
+	}
+
+	/*
+	 * vacuum_rel expects to be entered with no transaction active; it will
+	 * start and commit its own transaction.  But we are called by an SQL
+	 * command, and so we are executing inside a transaction already. We
+	 * commit the transaction started in PostgresMain() here, and start
+	 * another one before exiting to match the commit waiting for us back in
+	 * PostgresMain().
+	 */
+	if (use_own_xacts)
+	{
+		Assert(!in_outer_xact);
+
+		/* ActiveSnapshot is not set by autovacuum */
+		if (ActiveSnapshotSet())
+			PopActiveSnapshot();
+
+		/* matches the StartTransaction in PostgresMain() */
+		CommitTransactionCommand();
+	}
+
+	/* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
+	PG_TRY();
+	{
+		ListCell   *cur;
+
+		in_vacuum = true;
+		VacuumCostActive = (VacuumCostDelay > 0);
+		VacuumCostBalance = 0;
+		VacuumPageHit = 0;
+		VacuumPageMiss = 0;
+		VacuumPageDirty = 0;
+		VacuumCostBalanceLocal = 0;
+		VacuumSharedCostBalance = NULL;
+		VacuumActiveNWorkers = NULL;
+
+		/*
+		 * Loop to process each selected relation.
+		 */
+		foreach(cur, relations)
+		{
+			VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
+
+			if (params->options & VACOPT_VACUUM)
+			{
+				if (!vacuum_rel(vrel->oid, vrel->relation, params))
+					continue;
+			}
+
+			if (params->options & VACOPT_ANALYZE)
+			{
+				/*
+				 * If using separate xacts, start one for analyze. Otherwise,
+				 * we can use the outer transaction.
+				 */
+				if (use_own_xacts)
+				{
+					StartTransactionCommand();
+					/* functions in indexes may want a snapshot set */
+					PushActiveSnapshot(GetTransactionSnapshot());
+				}
+
+				analyze_rel(vrel->oid, vrel->relation, params,
+							vrel->va_cols, in_outer_xact, vac_strategy);
+
+				if (use_own_xacts)
+				{
+					PopActiveSnapshot();
+					CommitTransactionCommand();
+				}
+				else
+				{
+					/*
+					 * If we're not using separate xacts, better separate the
+					 * ANALYZE actions with CCIs.  This avoids trouble if user
+					 * says "ANALYZE t, t".
+					 */
+					CommandCounterIncrement();
+				}
+			}
+		}
+	}
+	PG_FINALLY();
+	{
+		in_vacuum = false;
+		VacuumCostActive = false;
+	}
+	PG_END_TRY();
+
+	/*
+	 * Finish up processing.
+	 */
+	if (use_own_xacts)
+	{
+		/* here, we are not in a transaction */
+
+		/*
+		 * This matches the CommitTransaction waiting for us in
+		 * PostgresMain().
+		 */
+		StartTransactionCommand();
+	}
+
+	if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
+	{
+		/*
+		 * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
+		 * (autovacuum.c does this for itself.)
+		 */
+		vac_update_datfrozenxid();
+	}
+
+	/*
+	 * Clean up working storage --- note we must do this after
+	 * StartTransactionCommand, else we might be trying to delete the active
+	 * context!
+	 */
+	MemoryContextDelete(vac_context);
+	vac_context = NULL;
+}
+
+/*
+ * Check if a given relation can be safely vacuumed or analyzed.  If the
+ * user is not the relation owner, issue a WARNING log message and return
+ * false to let the caller decide what to do with this relation.  This
+ * routine is used to decide if a relation can be processed for VACUUM or
+ * ANALYZE.
+ */
+bool
+vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, bits32 options)
+{
+	char	   *relname;
+
+	Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
+
+	/*
+	 * Check permissions.
+	 *
+	 * We allow the user to vacuum or analyze a table if he is superuser, the
+	 * table owner, or the database owner (but in the latter case, only if
+	 * it's not a shared relation).  pg_class_ownercheck includes the
+	 * superuser case.
+	 *
+	 * Note we choose to treat permissions failure as a WARNING and keep
+	 * trying to vacuum or analyze the rest of the DB --- is this appropriate?
+	 */
+	if (pg_class_ownercheck(relid, GetUserId()) ||
+		(pg_database_ownercheck(MyDatabaseId, GetUserId()) && !reltuple->relisshared))
+		return true;
+
+	relname = NameStr(reltuple->relname);
+
+	if ((options & VACOPT_VACUUM) != 0)
+	{
+		if (reltuple->relisshared)
+			ereport(WARNING,
+					(errmsg("skipping \"%s\" --- only superuser can vacuum it",
+							relname)));
+		else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
+			ereport(WARNING,
+					(errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
+							relname)));
+		else
+			ereport(WARNING,
+					(errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
+							relname)));
+
+		/*
+		 * For VACUUM ANALYZE, both logs could show up, but just generate
+		 * information for VACUUM as that would be the first one to be
+		 * processed.
+		 */
+		return false;
+	}
+
+	if ((options & VACOPT_ANALYZE) != 0)
+	{
+		if (reltuple->relisshared)
+			ereport(WARNING,
+					(errmsg("skipping \"%s\" --- only superuser can analyze it",
+							relname)));
+		else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
+			ereport(WARNING,
+					(errmsg("skipping \"%s\" --- only superuser or database owner can analyze it",
+							relname)));
+		else
+			ereport(WARNING,
+					(errmsg("skipping \"%s\" --- only table or database owner can analyze it",
+							relname)));
+	}
+
+	return false;
+}
+
+
+/*
+ * vacuum_open_relation
+ *
+ * This routine is used for attempting to open and lock a relation which
+ * is going to be vacuumed or analyzed.  If the relation cannot be opened
+ * or locked, a log is emitted if possible.
+ */
+Relation
+vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options,
+					 bool verbose, LOCKMODE lmode)
+{
+	Relation	rel;
+	bool		rel_lock = true;
+	int			elevel;
+
+	Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
+
+	/*
+	 * Open the relation and get the appropriate lock on it.
+	 *
+	 * There's a race condition here: the relation may have gone away since
+	 * the last time we saw it.  If so, we don't need to vacuum or analyze it.
+	 *
+	 * If we've been asked not to wait for the relation lock, acquire it first
+	 * in non-blocking mode, before calling try_relation_open().
+	 */
+	if (!(options & VACOPT_SKIP_LOCKED))
+		rel = try_relation_open(relid, lmode);
+	else if (ConditionalLockRelationOid(relid, lmode))
+		rel = try_relation_open(relid, NoLock);
+	else
+	{
+		rel = NULL;
+		rel_lock = false;
+	}
+
+	/* if relation is opened, leave */
+	if (rel)
+		return rel;
+
+	/*
+	 * Relation could not be opened, hence generate if possible a log
+	 * informing on the situation.
+	 *
+	 * If the RangeVar is not defined, we do not have enough information to
+	 * provide a meaningful log statement.  Chances are that the caller has
+	 * intentionally not provided this information so that this logging is
+	 * skipped, anyway.
+	 */
+	if (relation == NULL)
+		return NULL;
+
+	/*
+	 * Determine the log level.
+	 *
+	 * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
+	 * statements in the permission checks; otherwise, only log if the caller
+	 * so requested.
+	 */
+	if (!IsAutoVacuumWorkerProcess())
+		elevel = WARNING;
+	else if (verbose)
+		elevel = LOG;
+	else
+		return NULL;
+
+	if ((options & VACOPT_VACUUM) != 0)
+	{
+		if (!rel_lock)
+			ereport(elevel,
+					(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+					 errmsg("skipping vacuum of \"%s\" --- lock not available",
+							relation->relname)));
+		else
+			ereport(elevel,
+					(errcode(ERRCODE_UNDEFINED_TABLE),
+					 errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
+							relation->relname)));
+
+		/*
+		 * For VACUUM ANALYZE, both logs could show up, but just generate
+		 * information for VACUUM as that would be the first one to be
+		 * processed.
+		 */
+		return NULL;
+	}
+
+	if ((options & VACOPT_ANALYZE) != 0)
+	{
+		if (!rel_lock)
+			ereport(elevel,
+					(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+					 errmsg("skipping analyze of \"%s\" --- lock not available",
+							relation->relname)));
+		else
+			ereport(elevel,
+					(errcode(ERRCODE_UNDEFINED_TABLE),
+					 errmsg("skipping analyze of \"%s\" --- relation no longer exists",
+							relation->relname)));
+	}
+
+	return NULL;
+}
+
+
+/*
+ * Given a VacuumRelation, fill in the table OID if it wasn't specified,
+ * and optionally add VacuumRelations for partitions of the table.
+ *
+ * If a VacuumRelation does not have an OID supplied and is a partitioned
+ * table, an extra entry will be added to the output for each partition.
+ * Presently, only autovacuum supplies OIDs when calling vacuum(), and
+ * it does not want us to expand partitioned tables.
+ *
+ * We take care not to modify the input data structure, but instead build
+ * new VacuumRelation(s) to return.  (But note that they will reference
+ * unmodified parts of the input, eg column lists.)  New data structures
+ * are made in vac_context.
+ */
+static List *
+expand_vacuum_rel(VacuumRelation *vrel, int options)
+{
+	List	   *vacrels = NIL;
+	MemoryContext oldcontext;
+
+	/* If caller supplied OID, there's nothing we need do here. */
+	if (OidIsValid(vrel->oid))
+	{
+		oldcontext = MemoryContextSwitchTo(vac_context);
+		vacrels = lappend(vacrels, vrel);
+		MemoryContextSwitchTo(oldcontext);
+	}
+	else
+	{
+		/* Process a specific relation, and possibly partitions thereof */
+		Oid			relid;
+		HeapTuple	tuple;
+		Form_pg_class classForm;
+		bool		include_parts;
+		int			rvr_opts;
+
+		/*
+		 * Since autovacuum workers supply OIDs when calling vacuum(), no
+		 * autovacuum worker should reach this code.
+		 */
+		Assert(!IsAutoVacuumWorkerProcess());
+
+		/*
+		 * We transiently take AccessShareLock to protect the syscache lookup
+		 * below, as well as find_all_inheritors's expectation that the caller
+		 * holds some lock on the starting relation.
+		 */
+		rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
+		relid = RangeVarGetRelidExtended(vrel->relation,
+										 AccessShareLock,
+										 rvr_opts,
+										 NULL, NULL);
+
+		/*
+		 * If the lock is unavailable, emit the same log statement that
+		 * vacuum_rel() and analyze_rel() would.
+		 */
+		if (!OidIsValid(relid))
+		{
+			if (options & VACOPT_VACUUM)
+				ereport(WARNING,
+						(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+						 errmsg("skipping vacuum of \"%s\" --- lock not available",
+								vrel->relation->relname)));
+			else
+				ereport(WARNING,
+						(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+						 errmsg("skipping analyze of \"%s\" --- lock not available",
+								vrel->relation->relname)));
+			return vacrels;
+		}
+
+		/*
+		 * To check whether the relation is a partitioned table and its
+		 * ownership, fetch its syscache entry.
+		 */
+		tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "cache lookup failed for relation %u", relid);
+		classForm = (Form_pg_class) GETSTRUCT(tuple);
+
+		/*
+		 * Make a returnable VacuumRelation for this rel if user is a proper
+		 * owner.
+		 */
+		if (vacuum_is_relation_owner(relid, classForm, options))
+		{
+			oldcontext = MemoryContextSwitchTo(vac_context);
+			vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
+														  relid,
+														  vrel->va_cols));
+			MemoryContextSwitchTo(oldcontext);
+		}
+
+
+		include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
+		ReleaseSysCache(tuple);
+
+		/*
+		 * If it is, make relation list entries for its partitions.  Note that
+		 * the list returned by find_all_inheritors() includes the passed-in
+		 * OID, so we have to skip that.  There's no point in taking locks on
+		 * the individual partitions yet, and doing so would just add
+		 * unnecessary deadlock risk.  For this last reason we do not check
+		 * yet the ownership of the partitions, which get added to the list to
+		 * process.  Ownership will be checked later on anyway.
+		 */
+		if (include_parts)
+		{
+			List	   *part_oids = find_all_inheritors(relid, NoLock, NULL);
+			ListCell   *part_lc;
+
+			foreach(part_lc, part_oids)
+			{
+				Oid			part_oid = lfirst_oid(part_lc);
+
+				if (part_oid == relid)
+					continue;	/* ignore original table */
+
+				/*
+				 * We omit a RangeVar since it wouldn't be appropriate to
+				 * complain about failure to open one of these relations
+				 * later.
+				 */
+				oldcontext = MemoryContextSwitchTo(vac_context);
+				vacrels = lappend(vacrels, makeVacuumRelation(NULL,
+															  part_oid,
+															  vrel->va_cols));
+				MemoryContextSwitchTo(oldcontext);
+			}
+		}
+
+		/*
+		 * Release lock again.  This means that by the time we actually try to
+		 * process the table, it might be gone or renamed.  In the former case
+		 * we'll silently ignore it; in the latter case we'll process it
+		 * anyway, but we must beware that the RangeVar doesn't necessarily
+		 * identify it anymore.  This isn't ideal, perhaps, but there's little
+		 * practical alternative, since we're typically going to commit this
+		 * transaction and begin a new one between now and then.  Moreover,
+		 * holding locks on multiple relations would create significant risk
+		 * of deadlock.
+		 */
+		UnlockRelationOid(relid, AccessShareLock);
+	}
+
+	return vacrels;
+}
+
+/*
+ * Construct a list of VacuumRelations for all vacuumable rels in
+ * the current database.  The list is built in vac_context.
+ */
+static List *
+get_all_vacuum_rels(int options)
+{
+	List	   *vacrels = NIL;
+	Relation	pgclass;
+	TableScanDesc scan;
+	HeapTuple	tuple;
+
+	pgclass = table_open(RelationRelationId, AccessShareLock);
+
+	scan = table_beginscan_catalog(pgclass, 0, NULL);
+
+	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	{
+		Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
+		MemoryContext oldcontext;
+		Oid			relid = classForm->oid;
+
+		/* check permissions of relation */
+		if (!vacuum_is_relation_owner(relid, classForm, options))
+			continue;
+
+		/*
+		 * We include partitioned tables here; depending on which operation is
+		 * to be performed, caller will decide whether to process or ignore
+		 * them.
+		 */
+		if (classForm->relkind != RELKIND_RELATION &&
+			classForm->relkind != RELKIND_MATVIEW &&
+			classForm->relkind != RELKIND_PARTITIONED_TABLE)
+			continue;
+
+		/*
+		 * Build VacuumRelation(s) specifying the table OIDs to be processed.
+		 * We omit a RangeVar since it wouldn't be appropriate to complain
+		 * about failure to open one of these relations later.
+		 */
+		oldcontext = MemoryContextSwitchTo(vac_context);
+		vacrels = lappend(vacrels, makeVacuumRelation(NULL,
+													  relid,
+													  NIL));
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	table_endscan(scan);
+	table_close(pgclass, AccessShareLock);
+
+	return vacrels;
+}
+
+/*
+ * vacuum_set_xid_limits() -- compute oldestXmin and freeze cutoff points
+ *
+ * Input parameters are the target relation, applicable freeze age settings.
+ *
+ * The output parameters are:
+ * - oldestXmin is the Xid below which tuples deleted by any xact (that
+ *   committed) should be considered DEAD, not just RECENTLY_DEAD.
+ * - oldestMxact is the Mxid below which MultiXacts are definitely not
+ *   seen as visible by any running transaction.
+ * - freezeLimit is the Xid below which all Xids are definitely replaced by
+ *   FrozenTransactionId during aggressive vacuums.
+ * - multiXactCutoff is the value below which all MultiXactIds are definitely
+ *   removed from Xmax during aggressive vacuums.
+ *
+ * Return value indicates if vacuumlazy.c caller should make its VACUUM
+ * operation aggressive.  An aggressive VACUUM must advance relfrozenxid up to
+ * FreezeLimit (at a minimum), and relminmxid up to multiXactCutoff (at a
+ * minimum).
+ *
+ * oldestXmin and oldestMxact are the most recent values that can ever be
+ * passed to vac_update_relstats() as frozenxid and minmulti arguments by our
+ * vacuumlazy.c caller later on.  These values should be passed when it turns
+ * out that VACUUM will leave no unfrozen XIDs/XMIDs behind in the table.
+ */
+bool
+vacuum_set_xid_limits(Relation rel,
+					  int freeze_min_age,
+					  int freeze_table_age,
+					  int multixact_freeze_min_age,
+					  int multixact_freeze_table_age,
+					  TransactionId *oldestXmin,
+					  MultiXactId *oldestMxact,
+					  TransactionId *freezeLimit,
+					  MultiXactId *multiXactCutoff)
+{
+	int			freezemin;
+	int			mxid_freezemin;
+	int			effective_multixact_freeze_max_age;
+	TransactionId limit;
+	TransactionId safeLimit;
+	MultiXactId mxactLimit;
+	MultiXactId safeMxactLimit;
+	int			freezetable;
+
+	/*
+	 * We can always ignore processes running lazy vacuum.  This is because we
+	 * use these values only for deciding which tuples we must keep in the
+	 * tables.  Since lazy vacuum doesn't write its XID anywhere (usually no
+	 * XID assigned), it's safe to ignore it.  In theory it could be
+	 * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
+	 * that only one vacuum process can be working on a particular table at
+	 * any time, and that each vacuum is always an independent transaction.
+	 */
+	*oldestXmin = GetOldestNonRemovableTransactionId(rel);
+
+	if (OldSnapshotThresholdActive())
+	{
+		TransactionId limit_xmin;
+		TimestampTz limit_ts;
+
+		if (TransactionIdLimitedForOldSnapshots(*oldestXmin, rel,
+												&limit_xmin, &limit_ts))
+		{
+			/*
+			 * TODO: We should only set the threshold if we are pruning on the
+			 * basis of the increased limits.  Not as crucial here as it is
+			 * for opportunistic pruning (which often happens at a much higher
+			 * frequency), but would still be a significant improvement.
+			 */
+			SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin);
+			*oldestXmin = limit_xmin;
+		}
+	}
+
+	Assert(TransactionIdIsNormal(*oldestXmin));
+
+	/*
+	 * Determine the minimum freeze age to use: as specified by the caller, or
+	 * vacuum_freeze_min_age, but in any case not more than half
+	 * autovacuum_freeze_max_age, so that autovacuums to prevent XID
+	 * wraparound won't occur too frequently.
+	 */
+	freezemin = freeze_min_age;
+	if (freezemin < 0)
+		freezemin = vacuum_freeze_min_age;
+	freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
+	Assert(freezemin >= 0);
+
+	/*
+	 * Compute the cutoff XID, being careful not to generate a "permanent" XID
+	 */
+	limit = *oldestXmin - freezemin;
+	if (!TransactionIdIsNormal(limit))
+		limit = FirstNormalTransactionId;
+
+	/*
+	 * If oldestXmin is very far back (in practice, more than
+	 * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
+	 * freeze age of zero.
+	 */
+	safeLimit = ReadNextTransactionId() - autovacuum_freeze_max_age;
+	if (!TransactionIdIsNormal(safeLimit))
+		safeLimit = FirstNormalTransactionId;
+
+	if (TransactionIdPrecedes(limit, safeLimit))
+	{
+		ereport(WARNING,
+				(errmsg("oldest xmin is far in the past"),
+				 errhint("Close open transactions soon to avoid wraparound problems.\n"
+						 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
+		limit = *oldestXmin;
+	}
+
+	*freezeLimit = limit;
+
+	/*
+	 * Compute the multixact age for which freezing is urgent.  This is
+	 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
+	 * short of multixact member space.
+	 */
+	effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
+
+	/*
+	 * Determine the minimum multixact freeze age to use: as specified by
+	 * caller, or vacuum_multixact_freeze_min_age, but in any case not more
+	 * than half effective_multixact_freeze_max_age, so that autovacuums to
+	 * prevent MultiXact wraparound won't occur too frequently.
+	 */
+	mxid_freezemin = multixact_freeze_min_age;
+	if (mxid_freezemin < 0)
+		mxid_freezemin = vacuum_multixact_freeze_min_age;
+	mxid_freezemin = Min(mxid_freezemin,
+						 effective_multixact_freeze_max_age / 2);
+	Assert(mxid_freezemin >= 0);
+
+	/* Remember for caller */
+	*oldestMxact = GetOldestMultiXactId();
+
+	/* compute the cutoff multi, being careful to generate a valid value */
+	mxactLimit = *oldestMxact - mxid_freezemin;
+	if (mxactLimit < FirstMultiXactId)
+		mxactLimit = FirstMultiXactId;
+
+	safeMxactLimit =
+		ReadNextMultiXactId() - effective_multixact_freeze_max_age;
+	if (safeMxactLimit < FirstMultiXactId)
+		safeMxactLimit = FirstMultiXactId;
+
+	if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
+	{
+		ereport(WARNING,
+				(errmsg("oldest multixact is far in the past"),
+				 errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
+		/* Use the safe limit, unless an older mxact is still running */
+		if (MultiXactIdPrecedes(*oldestMxact, safeMxactLimit))
+			mxactLimit = *oldestMxact;
+		else
+			mxactLimit = safeMxactLimit;
+	}
+
+	*multiXactCutoff = mxactLimit;
+
+	/*
+	 * Done setting output parameters; just need to figure out if caller needs
+	 * to do an aggressive VACUUM or not.
+	 *
+	 * Determine the table freeze age to use: as specified by the caller, or
+	 * vacuum_freeze_table_age, but in any case not more than
+	 * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
+	 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
+	 * before anti-wraparound autovacuum is launched.
+	 */
+	freezetable = freeze_table_age;
+	if (freezetable < 0)
+		freezetable = vacuum_freeze_table_age;
+	freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
+	Assert(freezetable >= 0);
+
+	/*
+	 * Compute XID limit causing an aggressive vacuum, being careful not to
+	 * generate a "permanent" XID
+	 */
+	limit = ReadNextTransactionId() - freezetable;
+	if (!TransactionIdIsNormal(limit))
+		limit = FirstNormalTransactionId;
+	if (TransactionIdPrecedesOrEquals(rel->rd_rel->relfrozenxid,
+									  limit))
+		return true;
+
+	/*
+	 * Similar to the above, determine the table freeze age to use for
+	 * multixacts: as specified by the caller, or
+	 * vacuum_multixact_freeze_table_age, but in any case not more than
+	 * autovacuum_multixact_freeze_table_age * 0.95, so that if you have e.g.
+	 * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
+	 * multixacts before anti-wraparound autovacuum is launched.
+	 */
+	freezetable = multixact_freeze_table_age;
+	if (freezetable < 0)
+		freezetable = vacuum_multixact_freeze_table_age;
+	freezetable = Min(freezetable,
+					  effective_multixact_freeze_max_age * 0.95);
+	Assert(freezetable >= 0);
+
+	/*
+	 * Compute MultiXact limit causing an aggressive vacuum, being careful to
+	 * generate a valid MultiXact value
+	 */
+	mxactLimit = ReadNextMultiXactId() - freezetable;
+	if (mxactLimit < FirstMultiXactId)
+		mxactLimit = FirstMultiXactId;
+	if (MultiXactIdPrecedesOrEquals(rel->rd_rel->relminmxid,
+									mxactLimit))
+		return true;
+
+	return false;
+}
+
+/*
+ * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
+ * mechanism to determine if its table's relfrozenxid and relminmxid are now
+ * dangerously far in the past.
+ *
+ * Input parameters are the target relation's relfrozenxid and relminmxid.
+ *
+ * When we return true, VACUUM caller triggers the failsafe.
+ */
+bool
+vacuum_xid_failsafe_check(TransactionId relfrozenxid, MultiXactId relminmxid)
+{
+	TransactionId xid_skip_limit;
+	MultiXactId multi_skip_limit;
+	int			skip_index_vacuum;
+
+	Assert(TransactionIdIsNormal(relfrozenxid));
+	Assert(MultiXactIdIsValid(relminmxid));
+
+	/*
+	 * Determine the index skipping age to use. In any case no less than
+	 * autovacuum_freeze_max_age * 1.05.
+	 */
+	skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
+
+	xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
+	if (!TransactionIdIsNormal(xid_skip_limit))
+		xid_skip_limit = FirstNormalTransactionId;
+
+	if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
+	{
+		/* The table's relfrozenxid is too old */
+		return true;
+	}
+
+	/*
+	 * Similar to above, determine the index skipping age to use for
+	 * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
+	 * 1.05.
+	 */
+	skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
+							autovacuum_multixact_freeze_max_age * 1.05);
+
+	multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
+	if (multi_skip_limit < FirstMultiXactId)
+		multi_skip_limit = FirstMultiXactId;
+
+	if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
+	{
+		/* The table's relminmxid is too old */
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
+ *
+ *		If we scanned the whole relation then we should just use the count of
+ *		live tuples seen; but if we did not, we should not blindly extrapolate
+ *		from that number, since VACUUM may have scanned a quite nonrandom
+ *		subset of the table.  When we have only partial information, we take
+ *		the old value of pg_class.reltuples/pg_class.relpages as a measurement
+ *		of the tuple density in the unscanned pages.
+ *
+ *		Note: scanned_tuples should count only *live* tuples, since
+ *		pg_class.reltuples is defined that way.
+ */
+double
+vac_estimate_reltuples(Relation relation,
+					   BlockNumber total_pages,
+					   BlockNumber scanned_pages,
+					   double scanned_tuples)
+{
+	BlockNumber old_rel_pages = relation->rd_rel->relpages;
+	double		old_rel_tuples = relation->rd_rel->reltuples;
+	double		old_density;
+	double		unscanned_pages;
+	double		total_tuples;
+
+	/* If we did scan the whole table, just use the count as-is */
+	if (scanned_pages >= total_pages)
+		return scanned_tuples;
+
+	/*
+	 * When successive VACUUM commands scan the same few pages again and
+	 * again, without anything from the table really changing, there is a risk
+	 * that our beliefs about tuple density will gradually become distorted.
+	 * This might be caused by vacuumlazy.c implementation details, such as
+	 * its tendency to always scan the last heap page.  Handle that here.
+	 *
+	 * If the relation is _exactly_ the same size according to the existing
+	 * pg_class entry, and only a few of its pages (less than 2%) were
+	 * scanned, keep the existing value of reltuples.  Also keep the existing
+	 * value when only a subset of rel's pages <= a single page were scanned.
+	 *
+	 * (Note: we might be returning -1 here.)
+	 */
+	if (old_rel_pages == total_pages &&
+		scanned_pages < (double) total_pages * 0.02)
+		return old_rel_tuples;
+	if (scanned_pages <= 1)
+		return old_rel_tuples;
+
+	/*
+	 * If old density is unknown, we can't do much except scale up
+	 * scanned_tuples to match total_pages.
+	 */
+	if (old_rel_tuples < 0 || old_rel_pages == 0)
+		return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
+
+	/*
+	 * Okay, we've covered the corner cases.  The normal calculation is to
+	 * convert the old measurement to a density (tuples per page), then
+	 * estimate the number of tuples in the unscanned pages using that figure,
+	 * and finally add on the number of tuples in the scanned pages.
+	 */
+	old_density = old_rel_tuples / old_rel_pages;
+	unscanned_pages = (double) total_pages - (double) scanned_pages;
+	total_tuples = old_density * unscanned_pages + scanned_tuples;
+	return floor(total_tuples + 0.5);
+}
+
+
+/*
+ *	vac_update_relstats() -- update statistics for one relation
+ *
+ *		Update the whole-relation statistics that are kept in its pg_class
+ *		row.  There are additional stats that will be updated if we are
+ *		doing ANALYZE, but we always update these stats.  This routine works
+ *		for both index and heap relation entries in pg_class.
+ *
+ *		We violate transaction semantics here by overwriting the rel's
+ *		existing pg_class tuple with the new values.  This is reasonably
+ *		safe as long as we're sure that the new values are correct whether or
+ *		not this transaction commits.  The reason for doing this is that if
+ *		we updated these tuples in the usual way, vacuuming pg_class itself
+ *		wouldn't work very well --- by the time we got done with a vacuum
+ *		cycle, most of the tuples in pg_class would've been obsoleted.  Of
+ *		course, this only works for fixed-size not-null columns, but these are.
+ *
+ *		Another reason for doing it this way is that when we are in a lazy
+ *		VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
+ *		Somebody vacuuming pg_class might think they could delete a tuple
+ *		marked with xmin = our xid.
+ *
+ *		In addition to fundamentally nontransactional statistics such as
+ *		relpages and relallvisible, we try to maintain certain lazily-updated
+ *		DDL flags such as relhasindex, by clearing them if no longer correct.
+ *		It's safe to do this in VACUUM, which can't run in parallel with
+ *		CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
+ *		However, it's *not* safe to do it in an ANALYZE that's within an
+ *		outer transaction, because for example the current transaction might
+ *		have dropped the last index; then we'd think relhasindex should be
+ *		cleared, but if the transaction later rolls back this would be wrong.
+ *		So we refrain from updating the DDL flags if we're inside an outer
+ *		transaction.  This is OK since postponing the flag maintenance is
+ *		always allowable.
+ *
+ *		Note: num_tuples should count only *live* tuples, since
+ *		pg_class.reltuples is defined that way.
+ *
+ *		This routine is shared by VACUUM and ANALYZE.
+ */
+void
+vac_update_relstats(Relation relation,
+					BlockNumber num_pages, double num_tuples,
+					BlockNumber num_all_visible_pages,
+					bool hasindex, TransactionId frozenxid,
+					MultiXactId minmulti,
+					bool *frozenxid_updated, bool *minmulti_updated,
+					bool in_outer_xact)
+{
+	Oid			relid = RelationGetRelid(relation);
+	Relation	rd;
+	HeapTuple	ctup;
+	Form_pg_class pgcform;
+	bool		dirty,
+				futurexid,
+				futuremxid;
+	TransactionId oldfrozenxid;
+	MultiXactId oldminmulti;
+
+	rd = table_open(RelationRelationId, RowExclusiveLock);
+
+	/* Fetch a copy of the tuple to scribble on */
+	ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+	if (!HeapTupleIsValid(ctup))
+		elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
+			 relid);
+	pgcform = (Form_pg_class) GETSTRUCT(ctup);
+
+	/* Apply statistical updates, if any, to copied tuple */
+
+	dirty = false;
+	if (pgcform->relpages != (int32) num_pages)
+	{
+		pgcform->relpages = (int32) num_pages;
+		dirty = true;
+	}
+	if (pgcform->reltuples != (float4) num_tuples)
+	{
+		pgcform->reltuples = (float4) num_tuples;
+		dirty = true;
+	}
+	if (pgcform->relallvisible != (int32) num_all_visible_pages)
+	{
+		pgcform->relallvisible = (int32) num_all_visible_pages;
+		dirty = true;
+	}
+
+	/* Apply DDL updates, but not inside an outer transaction (see above) */
+
+	if (!in_outer_xact)
+	{
+		/*
+		 * If we didn't find any indexes, reset relhasindex.
+		 */
+		if (pgcform->relhasindex && !hasindex)
+		{
+			pgcform->relhasindex = false;
+			dirty = true;
+		}
+
+		/* We also clear relhasrules and relhastriggers if needed */
+		if (pgcform->relhasrules && relation->rd_rules == NULL)
+		{
+			pgcform->relhasrules = false;
+			dirty = true;
+		}
+		if (pgcform->relhastriggers && relation->trigdesc == NULL)
+		{
+			pgcform->relhastriggers = false;
+			dirty = true;
+		}
+	}
+
+	/*
+	 * Update relfrozenxid, unless caller passed InvalidTransactionId
+	 * indicating it has no new data.
+	 *
+	 * Ordinarily, we don't let relfrozenxid go backwards.  However, if the
+	 * stored relfrozenxid is "in the future" then it seems best to assume
+	 * it's corrupt, and overwrite with the oldest remaining XID in the table.
+	 * This should match vac_update_datfrozenxid() concerning what we consider
+	 * to be "in the future".
+	 */
+	oldfrozenxid = pgcform->relfrozenxid;
+	futurexid = false;
+	if (frozenxid_updated)
+		*frozenxid_updated = false;
+	if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
+	{
+		bool		update = false;
+
+		if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
+			update = true;
+		else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
+			futurexid = update = true;
+
+		if (update)
+		{
+			pgcform->relfrozenxid = frozenxid;
+			dirty = true;
+			if (frozenxid_updated)
+				*frozenxid_updated = true;
+		}
+	}
+
+	/* Similarly for relminmxid */
+	oldminmulti = pgcform->relminmxid;
+	futuremxid = false;
+	if (minmulti_updated)
+		*minmulti_updated = false;
+	if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
+	{
+		bool		update = false;
+
+		if (MultiXactIdPrecedes(oldminmulti, minmulti))
+			update = true;
+		else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
+			futuremxid = update = true;
+
+		if (update)
+		{
+			pgcform->relminmxid = minmulti;
+			dirty = true;
+			if (minmulti_updated)
+				*minmulti_updated = true;
+		}
+	}
+
+	/* If anything changed, write out the tuple. */
+	if (dirty)
+		heap_inplace_update(rd, ctup);
+
+	table_close(rd, RowExclusiveLock);
+
+	if (futurexid)
+		ereport(WARNING,
+				(errcode(ERRCODE_DATA_CORRUPTED),
+				 errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
+								 oldfrozenxid, frozenxid,
+								 RelationGetRelationName(relation))));
+	if (futuremxid)
+		ereport(WARNING,
+				(errcode(ERRCODE_DATA_CORRUPTED),
+				 errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
+								 oldminmulti, minmulti,
+								 RelationGetRelationName(relation))));
+}
+
+
+/*
+ *	vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
+ *
+ *		Update pg_database's datfrozenxid entry for our database to be the
+ *		minimum of the pg_class.relfrozenxid values.
+ *
+ *		Similarly, update our datminmxid to be the minimum of the
+ *		pg_class.relminmxid values.
+ *
+ *		If we are able to advance either pg_database value, also try to
+ *		truncate pg_xact and pg_multixact.
+ *
+ *		We violate transaction semantics here by overwriting the database's
+ *		existing pg_database tuple with the new values.  This is reasonably
+ *		safe since the new values are correct whether or not this transaction
+ *		commits.  As with vac_update_relstats, this avoids leaving dead tuples
+ *		behind after a VACUUM.
+ */
+void
+vac_update_datfrozenxid(void)
+{
+	HeapTuple	tuple;
+	Form_pg_database dbform;
+	Relation	relation;
+	SysScanDesc scan;
+	HeapTuple	classTup;
+	TransactionId newFrozenXid;
+	MultiXactId newMinMulti;
+	TransactionId lastSaneFrozenXid;
+	MultiXactId lastSaneMinMulti;
+	bool		bogus = false;
+	bool		dirty = false;
+	ScanKeyData key[1];
+
+	/*
+	 * Restrict this task to one backend per database.  This avoids race
+	 * conditions that would move datfrozenxid or datminmxid backward.  It
+	 * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
+	 * datfrozenxid passed to an earlier vac_truncate_clog() call.
+	 */
+	LockDatabaseFrozenIds(ExclusiveLock);
+
+	/*
+	 * Initialize the "min" calculation with
+	 * GetOldestNonRemovableTransactionId(), which is a reasonable
+	 * approximation to the minimum relfrozenxid for not-yet-committed
+	 * pg_class entries for new tables; see AddNewRelationTuple().  So we
+	 * cannot produce a wrong minimum by starting with this.
+	 */
+	newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
+
+	/*
+	 * Similarly, initialize the MultiXact "min" with the value that would be
+	 * used on pg_class for new tables.  See AddNewRelationTuple().
+	 */
+	newMinMulti = GetOldestMultiXactId();
+
+	/*
+	 * Identify the latest relfrozenxid and relminmxid values that we could
+	 * validly see during the scan.  These are conservative values, but it's
+	 * not really worth trying to be more exact.
+	 */
+	lastSaneFrozenXid = ReadNextTransactionId();
+	lastSaneMinMulti = ReadNextMultiXactId();
+
+	/*
+	 * We must seqscan pg_class to find the minimum Xid, because there is no
+	 * index that can help us here.
+	 */
+	relation = table_open(RelationRelationId, AccessShareLock);
+
+	scan = systable_beginscan(relation, InvalidOid, false,
+							  NULL, 0, NULL);
+
+	while ((classTup = systable_getnext(scan)) != NULL)
+	{
+		Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
+
+		/*
+		 * Only consider relations able to hold unfrozen XIDs (anything else
+		 * should have InvalidTransactionId in relfrozenxid anyway).
+		 */
+		if (classForm->relkind != RELKIND_RELATION &&
+			classForm->relkind != RELKIND_MATVIEW &&
+			classForm->relkind != RELKIND_TOASTVALUE)
+		{
+			Assert(!TransactionIdIsValid(classForm->relfrozenxid));
+			Assert(!MultiXactIdIsValid(classForm->relminmxid));
+			continue;
+		}
+
+		/*
+		 * Some table AMs might not need per-relation xid / multixid horizons.
+		 * It therefore seems reasonable to allow relfrozenxid and relminmxid
+		 * to not be set (i.e. set to their respective Invalid*Id)
+		 * independently. Thus validate and compute horizon for each only if
+		 * set.
+		 *
+		 * If things are working properly, no relation should have a
+		 * relfrozenxid or relminmxid that is "in the future".  However, such
+		 * cases have been known to arise due to bugs in pg_upgrade.  If we
+		 * see any entries that are "in the future", chicken out and don't do
+		 * anything.  This ensures we won't truncate clog & multixact SLRUs
+		 * before those relations have been scanned and cleaned up.
+		 */
+
+		if (TransactionIdIsValid(classForm->relfrozenxid))
+		{
+			Assert(TransactionIdIsNormal(classForm->relfrozenxid));
+
+			/* check for values in the future */
+			if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid))
+			{
+				bogus = true;
+				break;
+			}
+
+			/* determine new horizon */
+			if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
+				newFrozenXid = classForm->relfrozenxid;
+		}
+
+		if (MultiXactIdIsValid(classForm->relminmxid))
+		{
+			/* check for values in the future */
+			if (MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
+			{
+				bogus = true;
+				break;
+			}
+
+			/* determine new horizon */
+			if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
+				newMinMulti = classForm->relminmxid;
+		}
+	}
+
+	/* we're done with pg_class */
+	systable_endscan(scan);
+	table_close(relation, AccessShareLock);
+
+	/* chicken out if bogus data found */
+	if (bogus)
+		return;
+
+	Assert(TransactionIdIsNormal(newFrozenXid));
+	Assert(MultiXactIdIsValid(newMinMulti));
+
+	/* Now fetch the pg_database tuple we need to update. */
+	relation = table_open(DatabaseRelationId, RowExclusiveLock);
+
+	/*
+	 * Get the pg_database tuple to scribble on.  Note that this does not
+	 * directly rely on the syscache to avoid issues with flattened toast
+	 * values for the in-place update.
+	 */
+	ScanKeyInit(&key[0],
+				Anum_pg_database_oid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(MyDatabaseId));
+
+	scan = systable_beginscan(relation, DatabaseOidIndexId, true,
+							  NULL, 1, key);
+	tuple = systable_getnext(scan);
+	tuple = heap_copytuple(tuple);
+	systable_endscan(scan);
+
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
+
+	dbform = (Form_pg_database) GETSTRUCT(tuple);
+
+	/*
+	 * As in vac_update_relstats(), we ordinarily don't want to let
+	 * datfrozenxid go backward; but if it's "in the future" then it must be
+	 * corrupt and it seems best to overwrite it.
+	 */
+	if (dbform->datfrozenxid != newFrozenXid &&
+		(TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
+		 TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
+	{
+		dbform->datfrozenxid = newFrozenXid;
+		dirty = true;
+	}
+	else
+		newFrozenXid = dbform->datfrozenxid;
+
+	/* Ditto for datminmxid */
+	if (dbform->datminmxid != newMinMulti &&
+		(MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
+		 MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
+	{
+		dbform->datminmxid = newMinMulti;
+		dirty = true;
+	}
+	else
+		newMinMulti = dbform->datminmxid;
+
+	if (dirty)
+		heap_inplace_update(relation, tuple);
+
+	heap_freetuple(tuple);
+	table_close(relation, RowExclusiveLock);
+
+	/*
+	 * If we were able to advance datfrozenxid or datminmxid, see if we can
+	 * truncate pg_xact and/or pg_multixact.  Also do it if the shared
+	 * XID-wrap-limit info is stale, since this action will update that too.
+	 */
+	if (dirty || ForceTransactionIdLimitUpdate())
+		vac_truncate_clog(newFrozenXid, newMinMulti,
+						  lastSaneFrozenXid, lastSaneMinMulti);
+}
+
+
+/*
+ *	vac_truncate_clog() -- attempt to truncate the commit log
+ *
+ *		Scan pg_database to determine the system-wide oldest datfrozenxid,
+ *		and use it to truncate the transaction commit log (pg_xact).
+ *		Also update the XID wrap limit info maintained by varsup.c.
+ *		Likewise for datminmxid.
+ *
+ *		The passed frozenXID and minMulti are the updated values for my own
+ *		pg_database entry. They're used to initialize the "min" calculations.
+ *		The caller also passes the "last sane" XID and MXID, since it has
+ *		those at hand already.
+ *
+ *		This routine is only invoked when we've managed to change our
+ *		DB's datfrozenxid/datminmxid values, or we found that the shared
+ *		XID-wrap-limit info is stale.
+ */
+static void
+vac_truncate_clog(TransactionId frozenXID,
+				  MultiXactId minMulti,
+				  TransactionId lastSaneFrozenXid,
+				  MultiXactId lastSaneMinMulti)
+{
+	TransactionId nextXID = ReadNextTransactionId();
+	Relation	relation;
+	TableScanDesc scan;
+	HeapTuple	tuple;
+	Oid			oldestxid_datoid;
+	Oid			minmulti_datoid;
+	bool		bogus = false;
+	bool		frozenAlreadyWrapped = false;
+
+	/* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
+	LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
+
+	/* init oldest datoids to sync with my frozenXID/minMulti values */
+	oldestxid_datoid = MyDatabaseId;
+	minmulti_datoid = MyDatabaseId;
+
+	/*
+	 * Scan pg_database to compute the minimum datfrozenxid/datminmxid
+	 *
+	 * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
+	 * the values could change while we look at them.  Fetch each one just
+	 * once to ensure sane behavior of the comparison logic.  (Here, as in
+	 * many other places, we assume that fetching or updating an XID in shared
+	 * storage is atomic.)
+	 *
+	 * Note: we need not worry about a race condition with new entries being
+	 * inserted by CREATE DATABASE.  Any such entry will have a copy of some
+	 * existing DB's datfrozenxid, and that source DB cannot be ours because
+	 * of the interlock against copying a DB containing an active backend.
+	 * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
+	 * concurrently modify the datfrozenxid's of different databases, the
+	 * worst possible outcome is that pg_xact is not truncated as aggressively
+	 * as it could be.
+	 */
+	relation = table_open(DatabaseRelationId, AccessShareLock);
+
+	scan = table_beginscan_catalog(relation, 0, NULL);
+
+	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	{
+		volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
+		TransactionId datfrozenxid = dbform->datfrozenxid;
+		TransactionId datminmxid = dbform->datminmxid;
+
+		Assert(TransactionIdIsNormal(datfrozenxid));
+		Assert(MultiXactIdIsValid(datminmxid));
+
+		/*
+		 * If database is in the process of getting dropped, or has been
+		 * interrupted while doing so, no connections to it are possible
+		 * anymore. Therefore we don't need to take it into account here.
+		 * Which is good, because it can't be processed by autovacuum either.
+		 */
+		if (database_is_invalid_form((Form_pg_database) dbform))
+		{
+			elog(DEBUG2,
+				 "skipping invalid database \"%s\" while computing relfrozenxid",
+				 NameStr(dbform->datname));
+			continue;
+		}
+
+		/*
+		 * If things are working properly, no database should have a
+		 * datfrozenxid or datminmxid that is "in the future".  However, such
+		 * cases have been known to arise due to bugs in pg_upgrade.  If we
+		 * see any entries that are "in the future", chicken out and don't do
+		 * anything.  This ensures we won't truncate clog before those
+		 * databases have been scanned and cleaned up.  (We will issue the
+		 * "already wrapped" warning if appropriate, though.)
+		 */
+		if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
+			MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
+			bogus = true;
+
+		if (TransactionIdPrecedes(nextXID, datfrozenxid))
+			frozenAlreadyWrapped = true;
+		else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
+		{
+			frozenXID = datfrozenxid;
+			oldestxid_datoid = dbform->oid;
+		}
+
+		if (MultiXactIdPrecedes(datminmxid, minMulti))
+		{
+			minMulti = datminmxid;
+			minmulti_datoid = dbform->oid;
+		}
+	}
+
+	table_endscan(scan);
+
+	table_close(relation, AccessShareLock);
+
+	/*
+	 * Do not truncate CLOG if we seem to have suffered wraparound already;
+	 * the computed minimum XID might be bogus.  This case should now be
+	 * impossible due to the defenses in GetNewTransactionId, but we keep the
+	 * test anyway.
+	 */
+	if (frozenAlreadyWrapped)
+	{
+		ereport(WARNING,
+				(errmsg("some databases have not been vacuumed in over 2 billion transactions"),
+				 errdetail("You might have already suffered transaction-wraparound data loss.")));
+		LWLockRelease(WrapLimitsVacuumLock);
+		return;
+	}
+
+	/* chicken out if data is bogus in any other way */
+	if (bogus)
+	{
+		LWLockRelease(WrapLimitsVacuumLock);
+		return;
+	}
+
+	/*
+	 * Advance the oldest value for commit timestamps before truncating, so
+	 * that if a user requests a timestamp for a transaction we're truncating
+	 * away right after this point, they get NULL instead of an ugly "file not
+	 * found" error from slru.c.  This doesn't matter for xact/multixact
+	 * because they are not subject to arbitrary lookups from users.
+	 */
+	AdvanceOldestCommitTsXid(frozenXID);
+
+	/*
+	 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
+	 */
+	TruncateCLOG(frozenXID, oldestxid_datoid);
+	TruncateCommitTs(frozenXID);
+	TruncateMultiXact(minMulti, minmulti_datoid);
+
+	/*
+	 * Update the wrap limit for GetNewTransactionId and creation of new
+	 * MultiXactIds.  Note: these functions will also signal the postmaster
+	 * for an(other) autovac cycle if needed.   XXX should we avoid possibly
+	 * signaling twice?
+	 */
+	SetTransactionIdLimit(frozenXID, oldestxid_datoid);
+	SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
+
+	LWLockRelease(WrapLimitsVacuumLock);
+}
+
+
+/*
+ *	vacuum_rel() -- vacuum one heap relation
+ *
+ *		relid identifies the relation to vacuum.  If relation is supplied,
+ *		use the name therein for reporting any failure to open/lock the rel;
+ *		do not use it once we've successfully opened the rel, since it might
+ *		be stale.
+ *
+ *		Returns true if it's okay to proceed with a requested ANALYZE
+ *		operation on this table.
+ *
+ *		Doing one heap at a time incurs extra overhead, since we need to
+ *		check that the heap exists again just before we vacuum it.  The
+ *		reason that we do this is so that vacuuming can be spread across
+ *		many small transactions.  Otherwise, two-phase locking would require
+ *		us to lock the entire database during one pass of the vacuum cleaner.
+ *
+ *		At entry and exit, we are not inside a transaction.
+ */
+static bool
+vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
+{
+	LOCKMODE	lmode;
+	Relation	rel;
+	LockRelId	lockrelid;
+	Oid			toast_relid;
+	Oid			save_userid;
+	int			save_sec_context;
+	int			save_nestlevel;
+
+	Assert(params != NULL);
+
+	/* Begin a transaction for vacuuming this relation */
+	StartTransactionCommand();
+
+	if (!(params->options & VACOPT_FULL))
+	{
+		/*
+		 * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
+		 * other concurrent VACUUMs know that they can ignore this one while
+		 * determining their OldestXmin.  (The reason we don't set it during a
+		 * full VACUUM is exactly that we may have to run user-defined
+		 * functions for functional indexes, and we want to make sure that if
+		 * they use the snapshot set above, any tuples it requires can't get
+		 * removed from other tables.  An index function that depends on the
+		 * contents of other tables is arguably broken, but we won't break it
+		 * here by violating transaction semantics.)
+		 *
+		 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
+		 * autovacuum; it's used to avoid canceling a vacuum that was invoked
+		 * in an emergency.
+		 *
+		 * Note: these flags remain set until CommitTransaction or
+		 * AbortTransaction.  We don't want to clear them until we reset
+		 * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
+		 * might appear to go backwards, which is probably Not Good.  (We also
+		 * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
+		 * xmin doesn't become visible ahead of setting the flag.)
+		 */
+		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+		MyProc->statusFlags |= PROC_IN_VACUUM;
+		if (params->is_wraparound)
+			MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
+		ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
+		LWLockRelease(ProcArrayLock);
+	}
+
+	/*
+	 * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
+	 * cutoff xids in local memory wrapping around, and to have updated xmin
+	 * horizons.
+	 */
+	PushActiveSnapshot(GetTransactionSnapshot());
+
+	/*
+	 * Check for user-requested abort.  Note we want this to be inside a
+	 * transaction, so xact.c doesn't issue useless WARNING.
+	 */
+	CHECK_FOR_INTERRUPTS();
+
+	/*
+	 * Determine the type of lock we want --- hard exclusive lock for a FULL
+	 * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
+	 * way, we can be sure that no other backend is vacuuming the same table.
+	 */
+	lmode = (params->options & VACOPT_FULL) ?
+		AccessExclusiveLock : ShareUpdateExclusiveLock;
+
+	/* open the relation and get the appropriate lock on it */
+	rel = vacuum_open_relation(relid, relation, params->options,
+							   params->log_min_duration >= 0, lmode);
+
+	/* leave if relation could not be opened or locked */
+	if (!rel)
+	{
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+		return false;
+	}
+
+	/*
+	 * Check if relation needs to be skipped based on ownership.  This check
+	 * happens also when building the relation list to vacuum for a manual
+	 * operation, and needs to be done additionally here as VACUUM could
+	 * happen across multiple transactions where relation ownership could have
+	 * changed in-between.  Make sure to only generate logs for VACUUM in this
+	 * case.
+	 */
+	if (!vacuum_is_relation_owner(RelationGetRelid(rel),
+								  rel->rd_rel,
+								  params->options & VACOPT_VACUUM))
+	{
+		relation_close(rel, lmode);
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+		return false;
+	}
+
+	/*
+	 * Check that it's of a vacuumable relkind.
+	 */
+	if (rel->rd_rel->relkind != RELKIND_RELATION &&
+		rel->rd_rel->relkind != RELKIND_MATVIEW &&
+		rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
+		rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+	{
+		ereport(WARNING,
+				(errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
+						RelationGetRelationName(rel))));
+		relation_close(rel, lmode);
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+		return false;
+	}
+
+	/*
+	 * Silently ignore tables that are temp tables of other backends ---
+	 * trying to vacuum these will lead to great unhappiness, since their
+	 * contents are probably not up-to-date on disk.  (We don't throw a
+	 * warning here; it would just lead to chatter during a database-wide
+	 * VACUUM.)
+	 */
+	if (RELATION_IS_OTHER_TEMP(rel))
+	{
+		relation_close(rel, lmode);
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+		return false;
+	}
+
+	/*
+	 * Silently ignore partitioned tables as there is no work to be done.  The
+	 * useful work is on their child partitions, which have been queued up for
+	 * us separately.
+	 */
+	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+	{
+		relation_close(rel, lmode);
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+		/* It's OK to proceed with ANALYZE on this table */
+		return true;
+	}
+
+	/*
+	 * Get a session-level lock too. This will protect our access to the
+	 * relation across multiple transactions, so that we can vacuum the
+	 * relation's TOAST table (if any) secure in the knowledge that no one is
+	 * deleting the parent relation.
+	 *
+	 * NOTE: this cannot block, even if someone else is waiting for access,
+	 * because the lock manager knows that both lock requests are from the
+	 * same process.
+	 */
+	lockrelid = rel->rd_lockInfo.lockRelId;
+	LockRelationIdForSession(&lockrelid, lmode);
+
+	/*
+	 * Set index_cleanup option based on index_cleanup reloption if it wasn't
+	 * specified in VACUUM command, or when running in an autovacuum worker
+	 */
+	if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED)
+	{
+		StdRdOptIndexCleanup vacuum_index_cleanup;
+
+		if (rel->rd_options == NULL)
+			vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
+		else
+			vacuum_index_cleanup =
+				((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
+
+		if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
+			params->index_cleanup = VACOPTVALUE_AUTO;
+		else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
+			params->index_cleanup = VACOPTVALUE_ENABLED;
+		else
+		{
+			Assert(vacuum_index_cleanup ==
+				   STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
+			params->index_cleanup = VACOPTVALUE_DISABLED;
+		}
+	}
+
+	/*
+	 * Set truncate option based on truncate reloption if it wasn't specified
+	 * in VACUUM command, or when running in an autovacuum worker
+	 */
+	if (params->truncate == VACOPTVALUE_UNSPECIFIED)
+	{
+		if (rel->rd_options == NULL ||
+			((StdRdOptions *) rel->rd_options)->vacuum_truncate)
+			params->truncate = VACOPTVALUE_ENABLED;
+		else
+			params->truncate = VACOPTVALUE_DISABLED;
+	}
+
+	/*
+	 * Remember the relation's TOAST relation for later, if the caller asked
+	 * us to process it.  In VACUUM FULL, though, the toast table is
+	 * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
+	 */
+	if ((params->options & VACOPT_PROCESS_TOAST) != 0 &&
+		(params->options & VACOPT_FULL) == 0)
+		toast_relid = rel->rd_rel->reltoastrelid;
+	else
+		toast_relid = InvalidOid;
+
+	/*
+	 * Switch to the table owner's userid, so that any index functions are run
+	 * as that user.  Also lock down security-restricted operations and
+	 * arrange to make GUC variable changes local to this command. (This is
+	 * unnecessary, but harmless, for lazy VACUUM.)
+	 */
+	GetUserIdAndSecContext(&save_userid, &save_sec_context);
+	SetUserIdAndSecContext(rel->rd_rel->relowner,
+						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
+	save_nestlevel = NewGUCNestLevel();
+
+	/*
+	 * Do the actual work --- either FULL or "lazy" vacuum
+	 */
+	if (params->options & VACOPT_FULL)
+	{
+		ClusterParams cluster_params = {0};
+
+		/* close relation before vacuuming, but hold lock until commit */
+		relation_close(rel, NoLock);
+		rel = NULL;
+
+		if ((params->options & VACOPT_VERBOSE) != 0)
+			cluster_params.options |= CLUOPT_VERBOSE;
+
+		/* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
+		cluster_rel(relid, InvalidOid, &cluster_params);
+	}
+	else
+		table_relation_vacuum(rel, params, vac_strategy);
+
+	/* Roll back any GUC changes executed by index functions */
+	AtEOXact_GUC(false, save_nestlevel);
+
+	/* Restore userid and security context */
+	SetUserIdAndSecContext(save_userid, save_sec_context);
+
+	/* all done with this class, but hold lock until commit */
+	if (rel)
+		relation_close(rel, NoLock);
+
+	/*
+	 * Complete the transaction and free all temporary memory used.
+	 */
+	PopActiveSnapshot();
+	CommitTransactionCommand();
+
+	/*
+	 * If the relation has a secondary toast rel, vacuum that too while we
+	 * still hold the session lock on the main table.  Note however that
+	 * "analyze" will not get done on the toast table.  This is good, because
+	 * the toaster always uses hardcoded index access and statistics are
+	 * totally unimportant for toast relations.
+	 */
+	if (toast_relid != InvalidOid)
+		vacuum_rel(toast_relid, NULL, params);
+
+	/*
+	 * Now release the session-level lock on the main table.
+	 */
+	UnlockRelationIdForSession(&lockrelid, lmode);
+
+	/* Report that we really did it. */
+	return true;
+}
+
+
+/*
+ * Open all the vacuumable indexes of the given relation, obtaining the
+ * specified kind of lock on each.  Return an array of Relation pointers for
+ * the indexes into *Irel, and the number of indexes into *nindexes.
+ *
+ * We consider an index vacuumable if it is marked insertable (indisready).
+ * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
+ * execution, and what we have is too corrupt to be processable.  We will
+ * vacuum even if the index isn't indisvalid; this is important because in a
+ * unique index, uniqueness checks will be performed anyway and had better not
+ * hit dangling index pointers.
+ */
+void
+vac_open_indexes(Relation relation, LOCKMODE lockmode,
+				 int *nindexes, Relation **Irel)
+{
+	List	   *indexoidlist;
+	ListCell   *indexoidscan;
+	int			i;
+
+	Assert(lockmode != NoLock);
+
+	indexoidlist = RelationGetIndexList(relation);
+
+	/* allocate enough memory for all indexes */
+	i = list_length(indexoidlist);
+
+	if (i > 0)
+		*Irel = (Relation *) palloc(i * sizeof(Relation));
+	else
+		*Irel = NULL;
+
+	/* collect just the ready indexes */
+	i = 0;
+	foreach(indexoidscan, indexoidlist)
+	{
+		Oid			indexoid = lfirst_oid(indexoidscan);
+		Relation	indrel;
+
+		indrel = index_open(indexoid, lockmode);
+		if (indrel->rd_index->indisready)
+			(*Irel)[i++] = indrel;
+		else
+			index_close(indrel, lockmode);
+	}
+
+	*nindexes = i;
+
+	list_free(indexoidlist);
+}
+
+/*
+ * Release the resources acquired by vac_open_indexes.  Optionally release
+ * the locks (say NoLock to keep 'em).
+ */
+void
+vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
+{
+	if (Irel == NULL)
+		return;
+
+	while (nindexes--)
+	{
+		Relation	ind = Irel[nindexes];
+
+		index_close(ind, lockmode);
+	}
+	pfree(Irel);
+}
+
+/*
+ * vacuum_delay_point --- check for interrupts and cost-based delay.
+ *
+ * This should be called in each major loop of VACUUM processing,
+ * typically once per page processed.
+ */
+void
+vacuum_delay_point(void)
+{
+	double		msec = 0;
+
+	/* Always check for interrupts */
+	CHECK_FOR_INTERRUPTS();
+
+	if (!VacuumCostActive || InterruptPending)
+		return;
+
+	/*
+	 * For parallel vacuum, the delay is computed based on the shared cost
+	 * balance.  See compute_parallel_delay.
+	 */
+	if (VacuumSharedCostBalance != NULL)
+		msec = compute_parallel_delay();
+	else if (VacuumCostBalance >= VacuumCostLimit)
+		msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
+
+	/* Nap if appropriate */
+	if (msec > 0)
+	{
+		if (msec > VacuumCostDelay * 4)
+			msec = VacuumCostDelay * 4;
+
+		pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
+		pg_usleep(msec * 1000);
+		pgstat_report_wait_end();
+
+		/*
+		 * We don't want to ignore postmaster death during very long vacuums
+		 * with vacuum_cost_delay configured.  We can't use the usual
+		 * WaitLatch() approach here because we want microsecond-based sleep
+		 * durations above.
+		 */
+		if (IsUnderPostmaster && !PostmasterIsAlive())
+			exit(1);
+
+		VacuumCostBalance = 0;
+
+		/* update balance values for workers */
+		AutoVacuumUpdateDelay();
+
+		/* Might have gotten an interrupt while sleeping */
+		CHECK_FOR_INTERRUPTS();
+	}
+}
+
+/*
+ * Computes the vacuum delay for parallel workers.
+ *
+ * The basic idea of a cost-based delay for parallel vacuum is to allow each
+ * worker to sleep in proportion to the share of work it's done.  We achieve this
+ * by allowing all parallel vacuum workers including the leader process to
+ * have a shared view of cost related parameters (mainly VacuumCostBalance).
+ * We allow each worker to update it as and when it has incurred any cost and
+ * then based on that decide whether it needs to sleep.  We compute the time
+ * to sleep for a worker based on the cost it has incurred
+ * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
+ * that amount.  This avoids putting to sleep those workers which have done less
+ * I/O than other workers and therefore ensure that workers
+ * which are doing more I/O got throttled more.
+ *
+ * We allow a worker to sleep only if it has performed I/O above a certain
+ * threshold, which is calculated based on the number of active workers
+ * (VacuumActiveNWorkers), and the overall cost balance is more than
+ * VacuumCostLimit set by the system.  Testing reveals that we achieve
+ * the required throttling if we force a worker that has done more than 50%
+ * of its share of work to sleep.
+ */
+static double
+compute_parallel_delay(void)
+{
+	double		msec = 0;
+	uint32		shared_balance;
+	int			nworkers;
+
+	/* Parallel vacuum must be active */
+	Assert(VacuumSharedCostBalance);
+
+	nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
+
+	/* At least count itself */
+	Assert(nworkers >= 1);
+
+	/* Update the shared cost balance value atomically */
+	shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
+
+	/* Compute the total local balance for the current worker */
+	VacuumCostBalanceLocal += VacuumCostBalance;
+
+	if ((shared_balance >= VacuumCostLimit) &&
+		(VacuumCostBalanceLocal > 0.5 * ((double) VacuumCostLimit / nworkers)))
+	{
+		/* Compute sleep time based on the local cost balance */
+		msec = VacuumCostDelay * VacuumCostBalanceLocal / VacuumCostLimit;
+		pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
+		VacuumCostBalanceLocal = 0;
+	}
+
+	/*
+	 * Reset the local balance as we accumulated it into the shared value.
+	 */
+	VacuumCostBalance = 0;
+
+	return msec;
+}
+
+/*
+ * A wrapper function of defGetBoolean().
+ *
+ * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
+ * of true and false.
+ */
+static VacOptValue
+get_vacoptval_from_boolean(DefElem *def)
+{
+	return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
+}
+
+/*
+ *	vac_bulkdel_one_index() -- bulk-deletion for index relation.
+ *
+ * Returns bulk delete stats derived from input stats
+ */
+IndexBulkDeleteResult *
+vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
+					  VacDeadItems *dead_items)
+{
+	/* Do bulk deletion */
+	istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
+							  (void *) dead_items);
+
+	ereport(ivinfo->message_level,
+			(errmsg("scanned index \"%s\" to remove %d row versions",
+					RelationGetRelationName(ivinfo->index),
+					dead_items->num_items)));
+
+	return istat;
+}
+
+/*
+ *	vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
+ *
+ * Returns bulk delete stats derived from input stats
+ */
+IndexBulkDeleteResult *
+vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
+{
+	istat = index_vacuum_cleanup(ivinfo, istat);
+
+	if (istat)
+		ereport(ivinfo->message_level,
+				(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
+						RelationGetRelationName(ivinfo->index),
+						istat->num_index_tuples,
+						istat->num_pages),
+				 errdetail("%.0f index row versions were removed.\n"
+						   "%u index pages were newly deleted.\n"
+						   "%u index pages are currently deleted, of which %u are currently reusable.",
+						   istat->tuples_removed,
+						   istat->pages_newly_deleted,
+						   istat->pages_deleted, istat->pages_free)));
+
+	return istat;
+}
+
+/*
+ * Returns the total required space for VACUUM's dead_items array given a
+ * max_items value.
+ */
+Size
+vac_max_items_to_alloc_size(int max_items)
+{
+	Assert(max_items <= MAXDEADITEMS(MaxAllocSize));
+
+	return offsetof(VacDeadItems, items) + sizeof(ItemPointerData) * max_items;
+}
+
+/*
+ *	vac_tid_reaped() -- is a particular tid deletable?
+ *
+ *		This has the right signature to be an IndexBulkDeleteCallback.
+ *
+ *		Assumes dead_items array is sorted (in ascending TID order).
+ */
+static bool
+vac_tid_reaped(ItemPointer itemptr, void *state)
+{
+	VacDeadItems *dead_items = (VacDeadItems *) state;
+	int64		litem,
+				ritem,
+				item;
+	ItemPointer res;
+
+	litem = itemptr_encode(&dead_items->items[0]);
+	ritem = itemptr_encode(&dead_items->items[dead_items->num_items - 1]);
+	item = itemptr_encode(itemptr);
+
+	/*
+	 * Doing a simple bound check before bsearch() is useful to avoid the
+	 * extra cost of bsearch(), especially if dead items on the heap are
+	 * concentrated in a certain range.  Since this function is called for
+	 * every index tuple, it pays to be really fast.
+	 */
+	if (item < litem || item > ritem)
+		return false;
+
+	res = (ItemPointer) bsearch((void *) itemptr,
+								(void *) dead_items->items,
+								dead_items->num_items,
+								sizeof(ItemPointerData),
+								vac_cmp_itemptr);
+
+	return (res != NULL);
+}
+
+/*
+ * Comparator routines for use with qsort() and bsearch().
+ */
+static int
+vac_cmp_itemptr(const void *left, const void *right)
+{
+	BlockNumber lblk,
+				rblk;
+	OffsetNumber loff,
+				roff;
+
+	lblk = ItemPointerGetBlockNumber((ItemPointer) left);
+	rblk = ItemPointerGetBlockNumber((ItemPointer) right);
+
+	if (lblk < rblk)
+		return -1;
+	if (lblk > rblk)
+		return 1;
+
+	loff = ItemPointerGetOffsetNumber((ItemPointer) left);
+	roff = ItemPointerGetOffsetNumber((ItemPointer) right);
+
+	if (loff < roff)
+		return -1;
+	if (loff > roff)
+		return 1;
+
+	return 0;
+}
diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c
new file mode 100644
index 0000000..f26d796
--- /dev/null
+++ b/src/backend/commands/vacuumparallel.c
@@ -0,0 +1,1074 @@
+/*-------------------------------------------------------------------------
+ *
+ * vacuumparallel.c
+ *	  Support routines for parallel vacuum execution.
+ *
+ * This file contains routines that are intended to support setting up, using,
+ * and tearing down a ParallelVacuumState.
+ *
+ * In a parallel vacuum, we perform both index bulk deletion and index cleanup
+ * with parallel worker processes.  Individual indexes are processed by one
+ * vacuum process.  ParalleVacuumState contains shared information as well as
+ * the memory space for storing dead items allocated in the DSM segment.  We
+ * launch parallel worker processes at the start of parallel index
+ * bulk-deletion and index cleanup and once all indexes are processed, the
+ * parallel worker processes exit.  Each time we process indexes in parallel,
+ * the parallel context is re-initialized so that the same DSM can be used for
+ * multiple passes of index bulk-deletion and index cleanup.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/vacuumparallel.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/amapi.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/index.h"
+#include "commands/vacuum.h"
+#include "optimizer/paths.h"
+#include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "tcop/tcopprot.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+
+/*
+ * DSM keys for parallel vacuum.  Unlike other parallel execution code, since
+ * we don't need to worry about DSM keys conflicting with plan_node_id we can
+ * use small integers.
+ */
+#define PARALLEL_VACUUM_KEY_SHARED			1
+#define PARALLEL_VACUUM_KEY_DEAD_ITEMS		2
+#define PARALLEL_VACUUM_KEY_QUERY_TEXT		3
+#define PARALLEL_VACUUM_KEY_BUFFER_USAGE	4
+#define PARALLEL_VACUUM_KEY_WAL_USAGE		5
+#define PARALLEL_VACUUM_KEY_INDEX_STATS		6
+
+/*
+ * Shared information among parallel workers.  So this is allocated in the DSM
+ * segment.
+ */
+typedef struct PVShared
+{
+	/*
+	 * Target table relid and log level (for messages about parallel workers
+	 * launched during VACUUM VERBOSE).  These fields are not modified during
+	 * the parallel vacuum.
+	 */
+	Oid			relid;
+	int			elevel;
+
+	/*
+	 * Fields for both index vacuum and cleanup.
+	 *
+	 * reltuples is the total number of input heap tuples.  We set either old
+	 * live tuples in the index vacuum case or the new live tuples in the
+	 * index cleanup case.
+	 *
+	 * estimated_count is true if reltuples is an estimated value.  (Note that
+	 * reltuples could be -1 in this case, indicating we have no idea.)
+	 */
+	double		reltuples;
+	bool		estimated_count;
+
+	/*
+	 * In single process vacuum we could consume more memory during index
+	 * vacuuming or cleanup apart from the memory for heap scanning.  In
+	 * parallel vacuum, since individual vacuum workers can consume memory
+	 * equal to maintenance_work_mem, the new maintenance_work_mem for each
+	 * worker is set such that the parallel operation doesn't consume more
+	 * memory than single process vacuum.
+	 */
+	int			maintenance_work_mem_worker;
+
+	/*
+	 * Shared vacuum cost balance.  During parallel vacuum,
+	 * VacuumSharedCostBalance points to this value and it accumulates the
+	 * balance of each parallel vacuum worker.
+	 */
+	pg_atomic_uint32 cost_balance;
+
+	/*
+	 * Number of active parallel workers.  This is used for computing the
+	 * minimum threshold of the vacuum cost balance before a worker sleeps for
+	 * cost-based delay.
+	 */
+	pg_atomic_uint32 active_nworkers;
+
+	/* Counter for vacuuming and cleanup */
+	pg_atomic_uint32 idx;
+} PVShared;
+
+/* Status used during parallel index vacuum or cleanup */
+typedef enum PVIndVacStatus
+{
+	PARALLEL_INDVAC_STATUS_INITIAL = 0,
+	PARALLEL_INDVAC_STATUS_NEED_BULKDELETE,
+	PARALLEL_INDVAC_STATUS_NEED_CLEANUP,
+	PARALLEL_INDVAC_STATUS_COMPLETED
+} PVIndVacStatus;
+
+/*
+ * Struct for index vacuum statistics of an index that is used for parallel vacuum.
+ * This includes the status of parallel index vacuum as well as index statistics.
+ */
+typedef struct PVIndStats
+{
+	/*
+	 * The following two fields are set by leader process before executing
+	 * parallel index vacuum or parallel index cleanup.  These fields are not
+	 * fixed for the entire VACUUM operation.  They are only fixed for an
+	 * individual parallel index vacuum and cleanup.
+	 *
+	 * parallel_workers_can_process is true if both leader and worker can
+	 * process the index, otherwise only leader can process it.
+	 */
+	PVIndVacStatus status;
+	bool		parallel_workers_can_process;
+
+	/*
+	 * Individual worker or leader stores the result of index vacuum or
+	 * cleanup.
+	 */
+	bool		istat_updated;	/* are the stats updated? */
+	IndexBulkDeleteResult istat;
+} PVIndStats;
+
+/*
+ * Struct for maintaining a parallel vacuum state. typedef appears in vacuum.h.
+ */
+struct ParallelVacuumState
+{
+	/* NULL for worker processes */
+	ParallelContext *pcxt;
+
+	/* Target indexes */
+	Relation   *indrels;
+	int			nindexes;
+
+	/* Shared information among parallel vacuum workers */
+	PVShared   *shared;
+
+	/*
+	 * Shared index statistics among parallel vacuum workers. The array
+	 * element is allocated for every index, even those indexes where parallel
+	 * index vacuuming is unsafe or not worthwhile (e.g.,
+	 * will_parallel_vacuum[] is false).  During parallel vacuum,
+	 * IndexBulkDeleteResult of each index is kept in DSM and is copied into
+	 * local memory at the end of parallel vacuum.
+	 */
+	PVIndStats *indstats;
+
+	/* Shared dead items space among parallel vacuum workers */
+	VacDeadItems *dead_items;
+
+	/* Points to buffer usage area in DSM */
+	BufferUsage *buffer_usage;
+
+	/* Points to WAL usage area in DSM */
+	WalUsage   *wal_usage;
+
+	/*
+	 * False if the index is totally unsuitable target for all parallel
+	 * processing. For example, the index could be <
+	 * min_parallel_index_scan_size cutoff.
+	 */
+	bool	   *will_parallel_vacuum;
+
+	/*
+	 * The number of indexes that support parallel index bulk-deletion and
+	 * parallel index cleanup respectively.
+	 */
+	int			nindexes_parallel_bulkdel;
+	int			nindexes_parallel_cleanup;
+	int			nindexes_parallel_condcleanup;
+
+	/* Buffer access strategy used by leader process */
+	BufferAccessStrategy bstrategy;
+
+	/*
+	 * Error reporting state.  The error callback is set only for workers
+	 * processes during parallel index vacuum.
+	 */
+	char	   *relnamespace;
+	char	   *relname;
+	char	   *indname;
+	PVIndVacStatus status;
+};
+
+static int	parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested,
+											bool *will_parallel_vacuum);
+static void parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scans,
+												bool vacuum);
+static void parallel_vacuum_process_safe_indexes(ParallelVacuumState *pvs);
+static void parallel_vacuum_process_unsafe_indexes(ParallelVacuumState *pvs);
+static void parallel_vacuum_process_one_index(ParallelVacuumState *pvs, Relation indrel,
+											  PVIndStats *indstats);
+static bool parallel_vacuum_index_is_parallel_safe(Relation indrel, int num_index_scans,
+												   bool vacuum);
+static void parallel_vacuum_error_callback(void *arg);
+
+/*
+ * Try to enter parallel mode and create a parallel context.  Then initialize
+ * shared memory state.
+ *
+ * On success, return parallel vacuum state.  Otherwise return NULL.
+ */
+ParallelVacuumState *
+parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
+					 int nrequested_workers, int max_items,
+					 int elevel, BufferAccessStrategy bstrategy)
+{
+	ParallelVacuumState *pvs;
+	ParallelContext *pcxt;
+	PVShared   *shared;
+	VacDeadItems *dead_items;
+	PVIndStats *indstats;
+	BufferUsage *buffer_usage;
+	WalUsage   *wal_usage;
+	bool	   *will_parallel_vacuum;
+	Size		est_indstats_len;
+	Size		est_shared_len;
+	Size		est_dead_items_len;
+	int			nindexes_mwm = 0;
+	int			parallel_workers = 0;
+	int			querylen;
+
+	/*
+	 * A parallel vacuum must be requested and there must be indexes on the
+	 * relation
+	 */
+	Assert(nrequested_workers >= 0);
+	Assert(nindexes > 0);
+
+	/*
+	 * Compute the number of parallel vacuum workers to launch
+	 */
+	will_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes);
+	parallel_workers = parallel_vacuum_compute_workers(indrels, nindexes,
+													   nrequested_workers,
+													   will_parallel_vacuum);
+	if (parallel_workers <= 0)
+	{
+		/* Can't perform vacuum in parallel -- return NULL */
+		pfree(will_parallel_vacuum);
+		return NULL;
+	}
+
+	pvs = (ParallelVacuumState *) palloc0(sizeof(ParallelVacuumState));
+	pvs->indrels = indrels;
+	pvs->nindexes = nindexes;
+	pvs->will_parallel_vacuum = will_parallel_vacuum;
+	pvs->bstrategy = bstrategy;
+
+	EnterParallelMode();
+	pcxt = CreateParallelContext("postgres", "parallel_vacuum_main",
+								 parallel_workers);
+	Assert(pcxt->nworkers > 0);
+	pvs->pcxt = pcxt;
+
+	/* Estimate size for index vacuum stats -- PARALLEL_VACUUM_KEY_INDEX_STATS */
+	est_indstats_len = mul_size(sizeof(PVIndStats), nindexes);
+	shm_toc_estimate_chunk(&pcxt->estimator, est_indstats_len);
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+	/* Estimate size for shared information -- PARALLEL_VACUUM_KEY_SHARED */
+	est_shared_len = sizeof(PVShared);
+	shm_toc_estimate_chunk(&pcxt->estimator, est_shared_len);
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+	/* Estimate size for dead_items -- PARALLEL_VACUUM_KEY_DEAD_ITEMS */
+	est_dead_items_len = vac_max_items_to_alloc_size(max_items);
+	shm_toc_estimate_chunk(&pcxt->estimator, est_dead_items_len);
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+	/*
+	 * Estimate space for BufferUsage and WalUsage --
+	 * PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE.
+	 *
+	 * If there are no extensions loaded that care, we could skip this.  We
+	 * have no way of knowing whether anyone's looking at pgBufferUsage or
+	 * pgWalUsage, so do it unconditionally.
+	 */
+	shm_toc_estimate_chunk(&pcxt->estimator,
+						   mul_size(sizeof(BufferUsage), pcxt->nworkers));
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+	shm_toc_estimate_chunk(&pcxt->estimator,
+						   mul_size(sizeof(WalUsage), pcxt->nworkers));
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+	/* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */
+	if (debug_query_string)
+	{
+		querylen = strlen(debug_query_string);
+		shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
+		shm_toc_estimate_keys(&pcxt->estimator, 1);
+	}
+	else
+		querylen = 0;			/* keep compiler quiet */
+
+	InitializeParallelDSM(pcxt);
+
+	/* Prepare index vacuum stats */
+	indstats = (PVIndStats *) shm_toc_allocate(pcxt->toc, est_indstats_len);
+	MemSet(indstats, 0, est_indstats_len);
+	for (int i = 0; i < nindexes; i++)
+	{
+		Relation	indrel = indrels[i];
+		uint8		vacoptions = indrel->rd_indam->amparallelvacuumoptions;
+
+		/*
+		 * Cleanup option should be either disabled, always performing in
+		 * parallel or conditionally performing in parallel.
+		 */
+		Assert(((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) ||
+			   ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0));
+		Assert(vacoptions <= VACUUM_OPTION_MAX_VALID_VALUE);
+
+		if (!will_parallel_vacuum[i])
+			continue;
+
+		if (indrel->rd_indam->amusemaintenanceworkmem)
+			nindexes_mwm++;
+
+		/*
+		 * Remember the number of indexes that support parallel operation for
+		 * each phase.
+		 */
+		if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
+			pvs->nindexes_parallel_bulkdel++;
+		if ((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0)
+			pvs->nindexes_parallel_cleanup++;
+		if ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0)
+			pvs->nindexes_parallel_condcleanup++;
+	}
+	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_INDEX_STATS, indstats);
+	pvs->indstats = indstats;
+
+	/* Prepare shared information */
+	shared = (PVShared *) shm_toc_allocate(pcxt->toc, est_shared_len);
+	MemSet(shared, 0, est_shared_len);
+	shared->relid = RelationGetRelid(rel);
+	shared->elevel = elevel;
+	shared->maintenance_work_mem_worker =
+		(nindexes_mwm > 0) ?
+		maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
+		maintenance_work_mem;
+
+	pg_atomic_init_u32(&(shared->cost_balance), 0);
+	pg_atomic_init_u32(&(shared->active_nworkers), 0);
+	pg_atomic_init_u32(&(shared->idx), 0);
+
+	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_SHARED, shared);
+	pvs->shared = shared;
+
+	/* Prepare the dead_items space */
+	dead_items = (VacDeadItems *) shm_toc_allocate(pcxt->toc,
+												   est_dead_items_len);
+	dead_items->max_items = max_items;
+	dead_items->num_items = 0;
+	MemSet(dead_items->items, 0, sizeof(ItemPointerData) * max_items);
+	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_ITEMS, dead_items);
+	pvs->dead_items = dead_items;
+
+	/*
+	 * Allocate space for each worker's BufferUsage and WalUsage; no need to
+	 * initialize
+	 */
+	buffer_usage = shm_toc_allocate(pcxt->toc,
+									mul_size(sizeof(BufferUsage), pcxt->nworkers));
+	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, buffer_usage);
+	pvs->buffer_usage = buffer_usage;
+	wal_usage = shm_toc_allocate(pcxt->toc,
+								 mul_size(sizeof(WalUsage), pcxt->nworkers));
+	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_WAL_USAGE, wal_usage);
+	pvs->wal_usage = wal_usage;
+
+	/* Store query string for workers */
+	if (debug_query_string)
+	{
+		char	   *sharedquery;
+
+		sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
+		memcpy(sharedquery, debug_query_string, querylen + 1);
+		sharedquery[querylen] = '\0';
+		shm_toc_insert(pcxt->toc,
+					   PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery);
+	}
+
+	/* Success -- return parallel vacuum state */
+	return pvs;
+}
+
+/*
+ * Destroy the parallel context, and end parallel mode.
+ *
+ * Since writes are not allowed during parallel mode, copy the
+ * updated index statistics from DSM into local memory and then later use that
+ * to update the index statistics.  One might think that we can exit from
+ * parallel mode, update the index statistics and then destroy parallel
+ * context, but that won't be safe (see ExitParallelMode).
+ */
+void
+parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
+{
+	Assert(!IsParallelWorker());
+
+	/* Copy the updated statistics */
+	for (int i = 0; i < pvs->nindexes; i++)
+	{
+		PVIndStats *indstats = &(pvs->indstats[i]);
+
+		if (indstats->istat_updated)
+		{
+			istats[i] = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+			memcpy(istats[i], &indstats->istat, sizeof(IndexBulkDeleteResult));
+		}
+		else
+			istats[i] = NULL;
+	}
+
+	DestroyParallelContext(pvs->pcxt);
+	ExitParallelMode();
+
+	pfree(pvs->will_parallel_vacuum);
+	pfree(pvs);
+}
+
+/* Returns the dead items space */
+VacDeadItems *
+parallel_vacuum_get_dead_items(ParallelVacuumState *pvs)
+{
+	return pvs->dead_items;
+}
+
+/*
+ * Do parallel index bulk-deletion with parallel workers.
+ */
+void
+parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs, long num_table_tuples,
+									int num_index_scans)
+{
+	Assert(!IsParallelWorker());
+
+	/*
+	 * We can only provide an approximate value of num_heap_tuples, at least
+	 * for now.
+	 */
+	pvs->shared->reltuples = num_table_tuples;
+	pvs->shared->estimated_count = true;
+
+	parallel_vacuum_process_all_indexes(pvs, num_index_scans, true);
+}
+
+/*
+ * Do parallel index cleanup with parallel workers.
+ */
+void
+parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs, long num_table_tuples,
+									int num_index_scans, bool estimated_count)
+{
+	Assert(!IsParallelWorker());
+
+	/*
+	 * We can provide a better estimate of total number of surviving tuples
+	 * (we assume indexes are more interested in that than in the number of
+	 * nominally live tuples).
+	 */
+	pvs->shared->reltuples = num_table_tuples;
+	pvs->shared->estimated_count = estimated_count;
+
+	parallel_vacuum_process_all_indexes(pvs, num_index_scans, false);
+}
+
+/*
+ * Compute the number of parallel worker processes to request.  Both index
+ * vacuum and index cleanup can be executed with parallel workers.
+ * The index is eligible for parallel vacuum iff its size is greater than
+ * min_parallel_index_scan_size as invoking workers for very small indexes
+ * can hurt performance.
+ *
+ * nrequested is the number of parallel workers that user requested.  If
+ * nrequested is 0, we compute the parallel degree based on nindexes, that is
+ * the number of indexes that support parallel vacuum.  This function also
+ * sets will_parallel_vacuum to remember indexes that participate in parallel
+ * vacuum.
+ */
+static int
+parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested,
+								bool *will_parallel_vacuum)
+{
+	int			nindexes_parallel = 0;
+	int			nindexes_parallel_bulkdel = 0;
+	int			nindexes_parallel_cleanup = 0;
+	int			parallel_workers;
+
+	/*
+	 * We don't allow performing parallel operation in standalone backend or
+	 * when parallelism is disabled.
+	 */
+	if (!IsUnderPostmaster || max_parallel_maintenance_workers == 0)
+		return 0;
+
+	/*
+	 * Compute the number of indexes that can participate in parallel vacuum.
+	 */
+	for (int i = 0; i < nindexes; i++)
+	{
+		Relation	indrel = indrels[i];
+		uint8		vacoptions = indrel->rd_indam->amparallelvacuumoptions;
+
+		/* Skip index that is not a suitable target for parallel index vacuum */
+		if (vacoptions == VACUUM_OPTION_NO_PARALLEL ||
+			RelationGetNumberOfBlocks(indrel) < min_parallel_index_scan_size)
+			continue;
+
+		will_parallel_vacuum[i] = true;
+
+		if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
+			nindexes_parallel_bulkdel++;
+		if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0) ||
+			((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
+			nindexes_parallel_cleanup++;
+	}
+
+	nindexes_parallel = Max(nindexes_parallel_bulkdel,
+							nindexes_parallel_cleanup);
+
+	/* The leader process takes one index */
+	nindexes_parallel--;
+
+	/* No index supports parallel vacuum */
+	if (nindexes_parallel <= 0)
+		return 0;
+
+	/* Compute the parallel degree */
+	parallel_workers = (nrequested > 0) ?
+		Min(nrequested, nindexes_parallel) : nindexes_parallel;
+
+	/* Cap by max_parallel_maintenance_workers */
+	parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers);
+
+	return parallel_workers;
+}
+
+/*
+ * Perform index vacuum or index cleanup with parallel workers.  This function
+ * must be used by the parallel vacuum leader process.
+ */
+static void
+parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scans,
+									bool vacuum)
+{
+	int			nworkers;
+	PVIndVacStatus new_status;
+
+	Assert(!IsParallelWorker());
+
+	if (vacuum)
+	{
+		new_status = PARALLEL_INDVAC_STATUS_NEED_BULKDELETE;
+
+		/* Determine the number of parallel workers to launch */
+		nworkers = pvs->nindexes_parallel_bulkdel;
+	}
+	else
+	{
+		new_status = PARALLEL_INDVAC_STATUS_NEED_CLEANUP;
+
+		/* Determine the number of parallel workers to launch */
+		nworkers = pvs->nindexes_parallel_cleanup;
+
+		/* Add conditionally parallel-aware indexes if in the first time call */
+		if (num_index_scans == 0)
+			nworkers += pvs->nindexes_parallel_condcleanup;
+	}
+
+	/* The leader process will participate */
+	nworkers--;
+
+	/*
+	 * It is possible that parallel context is initialized with fewer workers
+	 * than the number of indexes that need a separate worker in the current
+	 * phase, so we need to consider it.  See
+	 * parallel_vacuum_compute_workers().
+	 */
+	nworkers = Min(nworkers, pvs->pcxt->nworkers);
+
+	/*
+	 * Set index vacuum status and mark whether parallel vacuum worker can
+	 * process it.
+	 */
+	for (int i = 0; i < pvs->nindexes; i++)
+	{
+		PVIndStats *indstats = &(pvs->indstats[i]);
+
+		Assert(indstats->status == PARALLEL_INDVAC_STATUS_INITIAL);
+		indstats->status = new_status;
+		indstats->parallel_workers_can_process =
+			(pvs->will_parallel_vacuum[i] &&
+			 parallel_vacuum_index_is_parallel_safe(pvs->indrels[i],
+													num_index_scans,
+													vacuum));
+	}
+
+	/* Reset the parallel index processing counter */
+	pg_atomic_write_u32(&(pvs->shared->idx), 0);
+
+	/* Setup the shared cost-based vacuum delay and launch workers */
+	if (nworkers > 0)
+	{
+		/* Reinitialize parallel context to relaunch parallel workers */
+		if (num_index_scans > 0)
+			ReinitializeParallelDSM(pvs->pcxt);
+
+		/*
+		 * Set up shared cost balance and the number of active workers for
+		 * vacuum delay.  We need to do this before launching workers as
+		 * otherwise, they might not see the updated values for these
+		 * parameters.
+		 */
+		pg_atomic_write_u32(&(pvs->shared->cost_balance), VacuumCostBalance);
+		pg_atomic_write_u32(&(pvs->shared->active_nworkers), 0);
+
+		/*
+		 * The number of workers can vary between bulkdelete and cleanup
+		 * phase.
+		 */
+		ReinitializeParallelWorkers(pvs->pcxt, nworkers);
+
+		LaunchParallelWorkers(pvs->pcxt);
+
+		if (pvs->pcxt->nworkers_launched > 0)
+		{
+			/*
+			 * Reset the local cost values for leader backend as we have
+			 * already accumulated the remaining balance of heap.
+			 */
+			VacuumCostBalance = 0;
+			VacuumCostBalanceLocal = 0;
+
+			/* Enable shared cost balance for leader backend */
+			VacuumSharedCostBalance = &(pvs->shared->cost_balance);
+			VacuumActiveNWorkers = &(pvs->shared->active_nworkers);
+		}
+
+		if (vacuum)
+			ereport(pvs->shared->elevel,
+					(errmsg(ngettext("launched %d parallel vacuum worker for index vacuuming (planned: %d)",
+									 "launched %d parallel vacuum workers for index vacuuming (planned: %d)",
+									 pvs->pcxt->nworkers_launched),
+							pvs->pcxt->nworkers_launched, nworkers)));
+		else
+			ereport(pvs->shared->elevel,
+					(errmsg(ngettext("launched %d parallel vacuum worker for index cleanup (planned: %d)",
+									 "launched %d parallel vacuum workers for index cleanup (planned: %d)",
+									 pvs->pcxt->nworkers_launched),
+							pvs->pcxt->nworkers_launched, nworkers)));
+	}
+
+	/* Vacuum the indexes that can be processed by only leader process */
+	parallel_vacuum_process_unsafe_indexes(pvs);
+
+	/*
+	 * Join as a parallel worker.  The leader vacuums alone processes all
+	 * parallel-safe indexes in the case where no workers are launched.
+	 */
+	parallel_vacuum_process_safe_indexes(pvs);
+
+	/*
+	 * Next, accumulate buffer and WAL usage.  (This must wait for the workers
+	 * to finish, or we might get incomplete data.)
+	 */
+	if (nworkers > 0)
+	{
+		/* Wait for all vacuum workers to finish */
+		WaitForParallelWorkersToFinish(pvs->pcxt);
+
+		for (int i = 0; i < pvs->pcxt->nworkers_launched; i++)
+			InstrAccumParallelQuery(&pvs->buffer_usage[i], &pvs->wal_usage[i]);
+	}
+
+	/*
+	 * Reset all index status back to initial (while checking that we have
+	 * vacuumed all indexes).
+	 */
+	for (int i = 0; i < pvs->nindexes; i++)
+	{
+		PVIndStats *indstats = &(pvs->indstats[i]);
+
+		if (indstats->status != PARALLEL_INDVAC_STATUS_COMPLETED)
+			elog(ERROR, "parallel index vacuum on index \"%s\" is not completed",
+				 RelationGetRelationName(pvs->indrels[i]));
+
+		indstats->status = PARALLEL_INDVAC_STATUS_INITIAL;
+	}
+
+	/*
+	 * Carry the shared balance value to heap scan and disable shared costing
+	 */
+	if (VacuumSharedCostBalance)
+	{
+		VacuumCostBalance = pg_atomic_read_u32(VacuumSharedCostBalance);
+		VacuumSharedCostBalance = NULL;
+		VacuumActiveNWorkers = NULL;
+	}
+}
+
+/*
+ * Index vacuum/cleanup routine used by the leader process and parallel
+ * vacuum worker processes to vacuum the indexes in parallel.
+ */
+static void
+parallel_vacuum_process_safe_indexes(ParallelVacuumState *pvs)
+{
+	/*
+	 * Increment the active worker count if we are able to launch any worker.
+	 */
+	if (VacuumActiveNWorkers)
+		pg_atomic_add_fetch_u32(VacuumActiveNWorkers, 1);
+
+	/* Loop until all indexes are vacuumed */
+	for (;;)
+	{
+		int			idx;
+		PVIndStats *indstats;
+
+		/* Get an index number to process */
+		idx = pg_atomic_fetch_add_u32(&(pvs->shared->idx), 1);
+
+		/* Done for all indexes? */
+		if (idx >= pvs->nindexes)
+			break;
+
+		indstats = &(pvs->indstats[idx]);
+
+		/*
+		 * Skip vacuuming index that is unsafe for workers or has an
+		 * unsuitable target for parallel index vacuum (this is vacuumed in
+		 * parallel_vacuum_process_unsafe_indexes() by the leader).
+		 */
+		if (!indstats->parallel_workers_can_process)
+			continue;
+
+		/* Do vacuum or cleanup of the index */
+		parallel_vacuum_process_one_index(pvs, pvs->indrels[idx], indstats);
+	}
+
+	/*
+	 * We have completed the index vacuum so decrement the active worker
+	 * count.
+	 */
+	if (VacuumActiveNWorkers)
+		pg_atomic_sub_fetch_u32(VacuumActiveNWorkers, 1);
+}
+
+/*
+ * Perform parallel vacuuming of indexes in leader process.
+ *
+ * Handles index vacuuming (or index cleanup) for indexes that are not
+ * parallel safe.  It's possible that this will vary for a given index, based
+ * on details like whether we're performing index cleanup right now.
+ *
+ * Also performs vacuuming of smaller indexes that fell under the size cutoff
+ * enforced by parallel_vacuum_compute_workers().
+ */
+static void
+parallel_vacuum_process_unsafe_indexes(ParallelVacuumState *pvs)
+{
+	Assert(!IsParallelWorker());
+
+	/*
+	 * Increment the active worker count if we are able to launch any worker.
+	 */
+	if (VacuumActiveNWorkers)
+		pg_atomic_add_fetch_u32(VacuumActiveNWorkers, 1);
+
+	for (int i = 0; i < pvs->nindexes; i++)
+	{
+		PVIndStats *indstats = &(pvs->indstats[i]);
+
+		/* Skip, indexes that are safe for workers */
+		if (indstats->parallel_workers_can_process)
+			continue;
+
+		/* Do vacuum or cleanup of the index */
+		parallel_vacuum_process_one_index(pvs, pvs->indrels[i], indstats);
+	}
+
+	/*
+	 * We have completed the index vacuum so decrement the active worker
+	 * count.
+	 */
+	if (VacuumActiveNWorkers)
+		pg_atomic_sub_fetch_u32(VacuumActiveNWorkers, 1);
+}
+
+/*
+ * Vacuum or cleanup index either by leader process or by one of the worker
+ * process.  After vacuuming the index this function copies the index
+ * statistics returned from ambulkdelete and amvacuumcleanup to the DSM
+ * segment.
+ */
+static void
+parallel_vacuum_process_one_index(ParallelVacuumState *pvs, Relation indrel,
+								  PVIndStats *indstats)
+{
+	IndexBulkDeleteResult *istat = NULL;
+	IndexBulkDeleteResult *istat_res;
+	IndexVacuumInfo ivinfo;
+
+	/*
+	 * Update the pointer to the corresponding bulk-deletion result if someone
+	 * has already updated it
+	 */
+	if (indstats->istat_updated)
+		istat = &(indstats->istat);
+
+	ivinfo.index = indrel;
+	ivinfo.analyze_only = false;
+	ivinfo.report_progress = false;
+	ivinfo.message_level = DEBUG2;
+	ivinfo.estimated_count = pvs->shared->estimated_count;
+	ivinfo.num_heap_tuples = pvs->shared->reltuples;
+	ivinfo.strategy = pvs->bstrategy;
+
+	/* Update error traceback information */
+	pvs->indname = pstrdup(RelationGetRelationName(indrel));
+	pvs->status = indstats->status;
+
+	switch (indstats->status)
+	{
+		case PARALLEL_INDVAC_STATUS_NEED_BULKDELETE:
+			istat_res = vac_bulkdel_one_index(&ivinfo, istat, pvs->dead_items);
+			break;
+		case PARALLEL_INDVAC_STATUS_NEED_CLEANUP:
+			istat_res = vac_cleanup_one_index(&ivinfo, istat);
+			break;
+		default:
+			elog(ERROR, "unexpected parallel vacuum index status %d for index \"%s\"",
+				 indstats->status,
+				 RelationGetRelationName(indrel));
+	}
+
+	/*
+	 * Copy the index bulk-deletion result returned from ambulkdelete and
+	 * amvacuumcleanup to the DSM segment if it's the first cycle because they
+	 * allocate locally and it's possible that an index will be vacuumed by a
+	 * different vacuum process the next cycle.  Copying the result normally
+	 * happens only the first time an index is vacuumed.  For any additional
+	 * vacuum pass, we directly point to the result on the DSM segment and
+	 * pass it to vacuum index APIs so that workers can update it directly.
+	 *
+	 * Since all vacuum workers write the bulk-deletion result at different
+	 * slots we can write them without locking.
+	 */
+	if (!indstats->istat_updated && istat_res != NULL)
+	{
+		memcpy(&(indstats->istat), istat_res, sizeof(IndexBulkDeleteResult));
+		indstats->istat_updated = true;
+
+		/* Free the locally-allocated bulk-deletion result */
+		pfree(istat_res);
+	}
+
+	/*
+	 * Update the status to completed. No need to lock here since each worker
+	 * touches different indexes.
+	 */
+	indstats->status = PARALLEL_INDVAC_STATUS_COMPLETED;
+
+	/* Reset error traceback information */
+	pvs->status = PARALLEL_INDVAC_STATUS_COMPLETED;
+	pfree(pvs->indname);
+	pvs->indname = NULL;
+}
+
+/*
+ * Returns false, if the given index can't participate in the next execution of
+ * parallel index vacuum or parallel index cleanup.
+ */
+static bool
+parallel_vacuum_index_is_parallel_safe(Relation indrel, int num_index_scans,
+									   bool vacuum)
+{
+	uint8		vacoptions;
+
+	vacoptions = indrel->rd_indam->amparallelvacuumoptions;
+
+	/* In parallel vacuum case, check if it supports parallel bulk-deletion */
+	if (vacuum)
+		return ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0);
+
+	/* Not safe, if the index does not support parallel cleanup */
+	if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) &&
+		((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0))
+		return false;
+
+	/*
+	 * Not safe, if the index supports parallel cleanup conditionally, but we
+	 * have already processed the index (for bulkdelete).  We do this to avoid
+	 * the need to invoke workers when parallel index cleanup doesn't need to
+	 * scan the index.  See the comments for option
+	 * VACUUM_OPTION_PARALLEL_COND_CLEANUP to know when indexes support
+	 * parallel cleanup conditionally.
+	 */
+	if (num_index_scans > 0 &&
+		((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
+		return false;
+
+	return true;
+}
+
+/*
+ * Perform work within a launched parallel process.
+ *
+ * Since parallel vacuum workers perform only index vacuum or index cleanup,
+ * we don't need to report progress information.
+ */
+void
+parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
+{
+	ParallelVacuumState pvs;
+	Relation	rel;
+	Relation   *indrels;
+	PVIndStats *indstats;
+	PVShared   *shared;
+	VacDeadItems *dead_items;
+	BufferUsage *buffer_usage;
+	WalUsage   *wal_usage;
+	int			nindexes;
+	char	   *sharedquery;
+	ErrorContextCallback errcallback;
+
+	/*
+	 * A parallel vacuum worker must have only PROC_IN_VACUUM flag since we
+	 * don't support parallel vacuum for autovacuum as of now.
+	 */
+	Assert(MyProc->statusFlags == PROC_IN_VACUUM);
+
+	elog(DEBUG1, "starting parallel vacuum worker");
+
+	shared = (PVShared *) shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_SHARED, false);
+
+	/* Set debug_query_string for individual workers */
+	sharedquery = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, true);
+	debug_query_string = sharedquery;
+	pgstat_report_activity(STATE_RUNNING, debug_query_string);
+
+	/*
+	 * Open table.  The lock mode is the same as the leader process.  It's
+	 * okay because the lock mode does not conflict among the parallel
+	 * workers.
+	 */
+	rel = table_open(shared->relid, ShareUpdateExclusiveLock);
+
+	/*
+	 * Open all indexes. indrels are sorted in order by OID, which should be
+	 * matched to the leader's one.
+	 */
+	vac_open_indexes(rel, RowExclusiveLock, &nindexes, &indrels);
+	Assert(nindexes > 0);
+
+	if (shared->maintenance_work_mem_worker > 0)
+		maintenance_work_mem = shared->maintenance_work_mem_worker;
+
+	/* Set index statistics */
+	indstats = (PVIndStats *) shm_toc_lookup(toc,
+											 PARALLEL_VACUUM_KEY_INDEX_STATS,
+											 false);
+
+	/* Set dead_items space */
+	dead_items = (VacDeadItems *) shm_toc_lookup(toc,
+												 PARALLEL_VACUUM_KEY_DEAD_ITEMS,
+												 false);
+
+	/* Set cost-based vacuum delay */
+	VacuumCostActive = (VacuumCostDelay > 0);
+	VacuumCostBalance = 0;
+	VacuumPageHit = 0;
+	VacuumPageMiss = 0;
+	VacuumPageDirty = 0;
+	VacuumCostBalanceLocal = 0;
+	VacuumSharedCostBalance = &(shared->cost_balance);
+	VacuumActiveNWorkers = &(shared->active_nworkers);
+
+	/* Set parallel vacuum state */
+	pvs.indrels = indrels;
+	pvs.nindexes = nindexes;
+	pvs.indstats = indstats;
+	pvs.shared = shared;
+	pvs.dead_items = dead_items;
+	pvs.relnamespace = get_namespace_name(RelationGetNamespace(rel));
+	pvs.relname = pstrdup(RelationGetRelationName(rel));
+
+	/* These fields will be filled during index vacuum or cleanup */
+	pvs.indname = NULL;
+	pvs.status = PARALLEL_INDVAC_STATUS_INITIAL;
+
+	/* Each parallel VACUUM worker gets its own access strategy */
+	pvs.bstrategy = GetAccessStrategy(BAS_VACUUM);
+
+	/* Setup error traceback support for ereport() */
+	errcallback.callback = parallel_vacuum_error_callback;
+	errcallback.arg = &pvs;
+	errcallback.previous = error_context_stack;
+	error_context_stack = &errcallback;
+
+	/* Prepare to track buffer usage during parallel execution */
+	InstrStartParallelQuery();
+
+	/* Process indexes to perform vacuum/cleanup */
+	parallel_vacuum_process_safe_indexes(&pvs);
+
+	/* Report buffer/WAL usage during parallel execution */
+	buffer_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, false);
+	wal_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_WAL_USAGE, false);
+	InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber],
+						  &wal_usage[ParallelWorkerNumber]);
+
+	/* Pop the error context stack */
+	error_context_stack = errcallback.previous;
+
+	vac_close_indexes(nindexes, indrels, RowExclusiveLock);
+	table_close(rel, ShareUpdateExclusiveLock);
+	FreeAccessStrategy(pvs.bstrategy);
+}
+
+/*
+ * Error context callback for errors occurring during parallel index vacuum.
+ * The error context messages should match the messages set in the lazy vacuum
+ * error context.  If you change this function, change vacuum_error_callback()
+ * as well.
+ */
+static void
+parallel_vacuum_error_callback(void *arg)
+{
+	ParallelVacuumState *errinfo = arg;
+
+	switch (errinfo->status)
+	{
+		case PARALLEL_INDVAC_STATUS_NEED_BULKDELETE:
+			errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
+					   errinfo->indname,
+					   errinfo->relnamespace,
+					   errinfo->relname);
+			break;
+		case PARALLEL_INDVAC_STATUS_NEED_CLEANUP:
+			errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
+					   errinfo->indname,
+					   errinfo->relnamespace,
+					   errinfo->relname);
+			break;
+		case PARALLEL_INDVAC_STATUS_INITIAL:
+		case PARALLEL_INDVAC_STATUS_COMPLETED:
+		default:
+			return;
+	}
+}
diff --git a/src/backend/commands/variable.c b/src/backend/commands/variable.c
new file mode 100644
index 0000000..e5ddcda
--- /dev/null
+++ b/src/backend/commands/variable.c
@@ -0,0 +1,935 @@
+/*-------------------------------------------------------------------------
+ *
+ * variable.c
+ *		Routines for handling specialized SET variables.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/variable.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "access/htup_details.h"
+#include "access/parallel.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "catalog/pg_authid.h"
+#include "commands/variable.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/timestamp.h"
+#include "utils/varlena.h"
+
+/*
+ * DATESTYLE
+ */
+
+/*
+ * check_datestyle: GUC check_hook for datestyle
+ */
+bool
+check_datestyle(char **newval, void **extra, GucSource source)
+{
+	int			newDateStyle = DateStyle;
+	int			newDateOrder = DateOrder;
+	bool		have_style = false;
+	bool		have_order = false;
+	bool		ok = true;
+	char	   *rawstring;
+	int		   *myextra;
+	char	   *result;
+	List	   *elemlist;
+	ListCell   *l;
+
+	/* Need a modifiable copy of string */
+	rawstring = pstrdup(*newval);
+
+	/* Parse string into list of identifiers */
+	if (!SplitIdentifierString(rawstring, ',', &elemlist))
+	{
+		/* syntax error in list */
+		GUC_check_errdetail("List syntax is invalid.");
+		pfree(rawstring);
+		list_free(elemlist);
+		return false;
+	}
+
+	foreach(l, elemlist)
+	{
+		char	   *tok = (char *) lfirst(l);
+
+		/* Ugh. Somebody ought to write a table driven version -- mjl */
+
+		if (pg_strcasecmp(tok, "ISO") == 0)
+		{
+			if (have_style && newDateStyle != USE_ISO_DATES)
+				ok = false;		/* conflicting styles */
+			newDateStyle = USE_ISO_DATES;
+			have_style = true;
+		}
+		else if (pg_strcasecmp(tok, "SQL") == 0)
+		{
+			if (have_style && newDateStyle != USE_SQL_DATES)
+				ok = false;		/* conflicting styles */
+			newDateStyle = USE_SQL_DATES;
+			have_style = true;
+		}
+		else if (pg_strncasecmp(tok, "POSTGRES", 8) == 0)
+		{
+			if (have_style && newDateStyle != USE_POSTGRES_DATES)
+				ok = false;		/* conflicting styles */
+			newDateStyle = USE_POSTGRES_DATES;
+			have_style = true;
+		}
+		else if (pg_strcasecmp(tok, "GERMAN") == 0)
+		{
+			if (have_style && newDateStyle != USE_GERMAN_DATES)
+				ok = false;		/* conflicting styles */
+			newDateStyle = USE_GERMAN_DATES;
+			have_style = true;
+			/* GERMAN also sets DMY, unless explicitly overridden */
+			if (!have_order)
+				newDateOrder = DATEORDER_DMY;
+		}
+		else if (pg_strcasecmp(tok, "YMD") == 0)
+		{
+			if (have_order && newDateOrder != DATEORDER_YMD)
+				ok = false;		/* conflicting orders */
+			newDateOrder = DATEORDER_YMD;
+			have_order = true;
+		}
+		else if (pg_strcasecmp(tok, "DMY") == 0 ||
+				 pg_strncasecmp(tok, "EURO", 4) == 0)
+		{
+			if (have_order && newDateOrder != DATEORDER_DMY)
+				ok = false;		/* conflicting orders */
+			newDateOrder = DATEORDER_DMY;
+			have_order = true;
+		}
+		else if (pg_strcasecmp(tok, "MDY") == 0 ||
+				 pg_strcasecmp(tok, "US") == 0 ||
+				 pg_strncasecmp(tok, "NONEURO", 7) == 0)
+		{
+			if (have_order && newDateOrder != DATEORDER_MDY)
+				ok = false;		/* conflicting orders */
+			newDateOrder = DATEORDER_MDY;
+			have_order = true;
+		}
+		else if (pg_strcasecmp(tok, "DEFAULT") == 0)
+		{
+			/*
+			 * Easiest way to get the current DEFAULT state is to fetch the
+			 * DEFAULT string from guc.c and recursively parse it.
+			 *
+			 * We can't simply "return check_datestyle(...)" because we need
+			 * to handle constructs like "DEFAULT, ISO".
+			 */
+			char	   *subval;
+			void	   *subextra = NULL;
+
+			subval = strdup(GetConfigOptionResetString("datestyle"));
+			if (!subval)
+			{
+				ok = false;
+				break;
+			}
+			if (!check_datestyle(&subval, &subextra, source))
+			{
+				free(subval);
+				ok = false;
+				break;
+			}
+			myextra = (int *) subextra;
+			if (!have_style)
+				newDateStyle = myextra[0];
+			if (!have_order)
+				newDateOrder = myextra[1];
+			free(subval);
+			free(subextra);
+		}
+		else
+		{
+			GUC_check_errdetail("Unrecognized key word: \"%s\".", tok);
+			pfree(rawstring);
+			list_free(elemlist);
+			return false;
+		}
+	}
+
+	pfree(rawstring);
+	list_free(elemlist);
+
+	if (!ok)
+	{
+		GUC_check_errdetail("Conflicting \"datestyle\" specifications.");
+		return false;
+	}
+
+	/*
+	 * Prepare the canonical string to return.  GUC wants it malloc'd.
+	 */
+	result = (char *) malloc(32);
+	if (!result)
+		return false;
+
+	switch (newDateStyle)
+	{
+		case USE_ISO_DATES:
+			strcpy(result, "ISO");
+			break;
+		case USE_SQL_DATES:
+			strcpy(result, "SQL");
+			break;
+		case USE_GERMAN_DATES:
+			strcpy(result, "German");
+			break;
+		default:
+			strcpy(result, "Postgres");
+			break;
+	}
+	switch (newDateOrder)
+	{
+		case DATEORDER_YMD:
+			strcat(result, ", YMD");
+			break;
+		case DATEORDER_DMY:
+			strcat(result, ", DMY");
+			break;
+		default:
+			strcat(result, ", MDY");
+			break;
+	}
+
+	free(*newval);
+	*newval = result;
+
+	/*
+	 * Set up the "extra" struct actually used by assign_datestyle.
+	 */
+	myextra = (int *) malloc(2 * sizeof(int));
+	if (!myextra)
+		return false;
+	myextra[0] = newDateStyle;
+	myextra[1] = newDateOrder;
+	*extra = (void *) myextra;
+
+	return true;
+}
+
+/*
+ * assign_datestyle: GUC assign_hook for datestyle
+ */
+void
+assign_datestyle(const char *newval, void *extra)
+{
+	int		   *myextra = (int *) extra;
+
+	DateStyle = myextra[0];
+	DateOrder = myextra[1];
+}
+
+
+/*
+ * TIMEZONE
+ */
+
+/*
+ * check_timezone: GUC check_hook for timezone
+ */
+bool
+check_timezone(char **newval, void **extra, GucSource source)
+{
+	pg_tz	   *new_tz;
+	long		gmtoffset;
+	char	   *endptr;
+	double		hours;
+
+	if (pg_strncasecmp(*newval, "interval", 8) == 0)
+	{
+		/*
+		 * Support INTERVAL 'foo'.  This is for SQL spec compliance, not
+		 * because it has any actual real-world usefulness.
+		 */
+		const char *valueptr = *newval;
+		char	   *val;
+		Interval   *interval;
+
+		valueptr += 8;
+		while (isspace((unsigned char) *valueptr))
+			valueptr++;
+		if (*valueptr++ != '\'')
+			return false;
+		val = pstrdup(valueptr);
+		/* Check and remove trailing quote */
+		endptr = strchr(val, '\'');
+		if (!endptr || endptr[1] != '\0')
+		{
+			pfree(val);
+			return false;
+		}
+		*endptr = '\0';
+
+		/*
+		 * Try to parse it.  XXX an invalid interval format will result in
+		 * ereport(ERROR), which is not desirable for GUC.  We did what we
+		 * could to guard against this in flatten_set_variable_args, but a
+		 * string coming in from postgresql.conf might contain anything.
+		 */
+		interval = DatumGetIntervalP(DirectFunctionCall3(interval_in,
+														 CStringGetDatum(val),
+														 ObjectIdGetDatum(InvalidOid),
+														 Int32GetDatum(-1)));
+
+		pfree(val);
+		if (interval->month != 0)
+		{
+			GUC_check_errdetail("Cannot specify months in time zone interval.");
+			pfree(interval);
+			return false;
+		}
+		if (interval->day != 0)
+		{
+			GUC_check_errdetail("Cannot specify days in time zone interval.");
+			pfree(interval);
+			return false;
+		}
+
+		/* Here we change from SQL to Unix sign convention */
+		gmtoffset = -(interval->time / USECS_PER_SEC);
+		new_tz = pg_tzset_offset(gmtoffset);
+
+		pfree(interval);
+	}
+	else
+	{
+		/*
+		 * Try it as a numeric number of hours (possibly fractional).
+		 */
+		hours = strtod(*newval, &endptr);
+		if (endptr != *newval && *endptr == '\0')
+		{
+			/* Here we change from SQL to Unix sign convention */
+			gmtoffset = -hours * SECS_PER_HOUR;
+			new_tz = pg_tzset_offset(gmtoffset);
+		}
+		else
+		{
+			/*
+			 * Otherwise assume it is a timezone name, and try to load it.
+			 */
+			new_tz = pg_tzset(*newval);
+
+			if (!new_tz)
+			{
+				/* Doesn't seem to be any great value in errdetail here */
+				return false;
+			}
+
+			if (!pg_tz_acceptable(new_tz))
+			{
+				GUC_check_errmsg("time zone \"%s\" appears to use leap seconds",
+								 *newval);
+				GUC_check_errdetail("PostgreSQL does not support leap seconds.");
+				return false;
+			}
+		}
+	}
+
+	/* Test for failure in pg_tzset_offset, which we assume is out-of-range */
+	if (!new_tz)
+	{
+		GUC_check_errdetail("UTC timezone offset is out of range.");
+		return false;
+	}
+
+	/*
+	 * Pass back data for assign_timezone to use
+	 */
+	*extra = malloc(sizeof(pg_tz *));
+	if (!*extra)
+		return false;
+	*((pg_tz **) *extra) = new_tz;
+
+	return true;
+}
+
+/*
+ * assign_timezone: GUC assign_hook for timezone
+ */
+void
+assign_timezone(const char *newval, void *extra)
+{
+	session_timezone = *((pg_tz **) extra);
+}
+
+/*
+ * show_timezone: GUC show_hook for timezone
+ */
+const char *
+show_timezone(void)
+{
+	const char *tzn;
+
+	/* Always show the zone's canonical name */
+	tzn = pg_get_timezone_name(session_timezone);
+
+	if (tzn != NULL)
+		return tzn;
+
+	return "unknown";
+}
+
+
+/*
+ * LOG_TIMEZONE
+ *
+ * For log_timezone, we don't support the interval-based methods of setting a
+ * zone, which are only there for SQL spec compliance not because they're
+ * actually useful.
+ */
+
+/*
+ * check_log_timezone: GUC check_hook for log_timezone
+ */
+bool
+check_log_timezone(char **newval, void **extra, GucSource source)
+{
+	pg_tz	   *new_tz;
+
+	/*
+	 * Assume it is a timezone name, and try to load it.
+	 */
+	new_tz = pg_tzset(*newval);
+
+	if (!new_tz)
+	{
+		/* Doesn't seem to be any great value in errdetail here */
+		return false;
+	}
+
+	if (!pg_tz_acceptable(new_tz))
+	{
+		GUC_check_errmsg("time zone \"%s\" appears to use leap seconds",
+						 *newval);
+		GUC_check_errdetail("PostgreSQL does not support leap seconds.");
+		return false;
+	}
+
+	/*
+	 * Pass back data for assign_log_timezone to use
+	 */
+	*extra = malloc(sizeof(pg_tz *));
+	if (!*extra)
+		return false;
+	*((pg_tz **) *extra) = new_tz;
+
+	return true;
+}
+
+/*
+ * assign_log_timezone: GUC assign_hook for log_timezone
+ */
+void
+assign_log_timezone(const char *newval, void *extra)
+{
+	log_timezone = *((pg_tz **) extra);
+}
+
+/*
+ * show_log_timezone: GUC show_hook for log_timezone
+ */
+const char *
+show_log_timezone(void)
+{
+	const char *tzn;
+
+	/* Always show the zone's canonical name */
+	tzn = pg_get_timezone_name(log_timezone);
+
+	if (tzn != NULL)
+		return tzn;
+
+	return "unknown";
+}
+
+
+/*
+ * SET TRANSACTION READ ONLY and SET TRANSACTION READ WRITE
+ *
+ * We allow idempotent changes (r/w -> r/w and r/o -> r/o) at any time, and
+ * we also always allow changes from read-write to read-only.  However,
+ * read-only may be changed to read-write only when in a top-level transaction
+ * that has not yet taken an initial snapshot.  Can't do it in a hot standby,
+ * either.
+ *
+ * If we are not in a transaction at all, just allow the change; it means
+ * nothing since XactReadOnly will be reset by the next StartTransaction().
+ * The IsTransactionState() test protects us against trying to check
+ * RecoveryInProgress() in contexts where shared memory is not accessible.
+ * (Similarly, if we're restoring state in a parallel worker, just allow
+ * the change.)
+ */
+bool
+check_transaction_read_only(bool *newval, void **extra, GucSource source)
+{
+	if (*newval == false && XactReadOnly && IsTransactionState() && !InitializingParallelWorker)
+	{
+		/* Can't go to r/w mode inside a r/o transaction */
+		if (IsSubTransaction())
+		{
+			GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION);
+			GUC_check_errmsg("cannot set transaction read-write mode inside a read-only transaction");
+			return false;
+		}
+		/* Top level transaction can't change to r/w after first snapshot. */
+		if (FirstSnapshotSet)
+		{
+			GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION);
+			GUC_check_errmsg("transaction read-write mode must be set before any query");
+			return false;
+		}
+		/* Can't go to r/w mode while recovery is still active */
+		if (RecoveryInProgress())
+		{
+			GUC_check_errcode(ERRCODE_FEATURE_NOT_SUPPORTED);
+			GUC_check_errmsg("cannot set transaction read-write mode during recovery");
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/*
+ * SET TRANSACTION ISOLATION LEVEL
+ *
+ * We allow idempotent changes at any time, but otherwise this can only be
+ * changed in a toplevel transaction that has not yet taken a snapshot.
+ *
+ * As in check_transaction_read_only, allow it if not inside a transaction.
+ */
+bool
+check_XactIsoLevel(int *newval, void **extra, GucSource source)
+{
+	int			newXactIsoLevel = *newval;
+
+	if (newXactIsoLevel != XactIsoLevel && IsTransactionState())
+	{
+		if (FirstSnapshotSet)
+		{
+			GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION);
+			GUC_check_errmsg("SET TRANSACTION ISOLATION LEVEL must be called before any query");
+			return false;
+		}
+		/* We ignore a subtransaction setting it to the existing value. */
+		if (IsSubTransaction())
+		{
+			GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION);
+			GUC_check_errmsg("SET TRANSACTION ISOLATION LEVEL must not be called in a subtransaction");
+			return false;
+		}
+		/* Can't go to serializable mode while recovery is still active */
+		if (newXactIsoLevel == XACT_SERIALIZABLE && RecoveryInProgress())
+		{
+			GUC_check_errcode(ERRCODE_FEATURE_NOT_SUPPORTED);
+			GUC_check_errmsg("cannot use serializable mode in a hot standby");
+			GUC_check_errhint("You can use REPEATABLE READ instead.");
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/*
+ * SET TRANSACTION [NOT] DEFERRABLE
+ */
+
+bool
+check_transaction_deferrable(bool *newval, void **extra, GucSource source)
+{
+	if (IsSubTransaction())
+	{
+		GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION);
+		GUC_check_errmsg("SET TRANSACTION [NOT] DEFERRABLE cannot be called within a subtransaction");
+		return false;
+	}
+	if (FirstSnapshotSet)
+	{
+		GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION);
+		GUC_check_errmsg("SET TRANSACTION [NOT] DEFERRABLE must be called before any query");
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * Random number seed
+ *
+ * We can't roll back the random sequence on error, and we don't want
+ * config file reloads to affect it, so we only want interactive SET SEED
+ * commands to set it.  We use the "extra" storage to ensure that rollbacks
+ * don't try to do the operation again.
+ */
+
+bool
+check_random_seed(double *newval, void **extra, GucSource source)
+{
+	*extra = malloc(sizeof(int));
+	if (!*extra)
+		return false;
+	/* Arm the assign only if source of value is an interactive SET */
+	*((int *) *extra) = (source >= PGC_S_INTERACTIVE);
+
+	return true;
+}
+
+void
+assign_random_seed(double newval, void *extra)
+{
+	/* We'll do this at most once for any setting of the GUC variable */
+	if (*((int *) extra))
+		DirectFunctionCall1(setseed, Float8GetDatum(newval));
+	*((int *) extra) = 0;
+}
+
+const char *
+show_random_seed(void)
+{
+	return "unavailable";
+}
+
+
+/*
+ * SET CLIENT_ENCODING
+ */
+
+bool
+check_client_encoding(char **newval, void **extra, GucSource source)
+{
+	int			encoding;
+	const char *canonical_name;
+
+	/* Look up the encoding by name */
+	encoding = pg_valid_client_encoding(*newval);
+	if (encoding < 0)
+		return false;
+
+	/* Get the canonical name (no aliases, uniform case) */
+	canonical_name = pg_encoding_to_char(encoding);
+
+	/*
+	 * If we are not within a transaction then PrepareClientEncoding will not
+	 * be able to look up the necessary conversion procs.  If we are still
+	 * starting up, it will return "OK" anyway, and InitializeClientEncoding
+	 * will fix things once initialization is far enough along.  After
+	 * startup, we'll fail.  This would only happen if someone tries to change
+	 * client_encoding in postgresql.conf and then SIGHUP existing sessions.
+	 * It seems like a bad idea for client_encoding to change that way anyhow,
+	 * so we don't go out of our way to support it.
+	 *
+	 * Note: in the postmaster, or any other process that never calls
+	 * InitializeClientEncoding, PrepareClientEncoding will always succeed,
+	 * and so will SetClientEncoding; but they won't do anything, which is OK.
+	 */
+	if (PrepareClientEncoding(encoding) < 0)
+	{
+		if (IsTransactionState())
+		{
+			/* Must be a genuine no-such-conversion problem */
+			GUC_check_errcode(ERRCODE_FEATURE_NOT_SUPPORTED);
+			GUC_check_errdetail("Conversion between %s and %s is not supported.",
+								canonical_name,
+								GetDatabaseEncodingName());
+		}
+		else
+		{
+			/* Provide a useful complaint */
+			GUC_check_errdetail("Cannot change \"client_encoding\" now.");
+		}
+		return false;
+	}
+
+	/*
+	 * Replace the user-supplied string with the encoding's canonical name.
+	 * This gets rid of aliases and case-folding variations.
+	 *
+	 * XXX Although canonicalizing seems like a good idea in the abstract, it
+	 * breaks pre-9.1 JDBC drivers, which expect that if they send "UNICODE"
+	 * as the client_encoding setting then it will read back the same way. As
+	 * a workaround, don't replace the string if it's "UNICODE".  Remove that
+	 * hack when pre-9.1 JDBC drivers are no longer in use.
+	 */
+	if (strcmp(*newval, canonical_name) != 0 &&
+		strcmp(*newval, "UNICODE") != 0)
+	{
+		free(*newval);
+		*newval = strdup(canonical_name);
+		if (!*newval)
+			return false;
+	}
+
+	/*
+	 * Save the encoding's ID in *extra, for use by assign_client_encoding.
+	 */
+	*extra = malloc(sizeof(int));
+	if (!*extra)
+		return false;
+	*((int *) *extra) = encoding;
+
+	return true;
+}
+
+void
+assign_client_encoding(const char *newval, void *extra)
+{
+	int			encoding = *((int *) extra);
+
+	/*
+	 * Parallel workers send data to the leader, not the client.  They always
+	 * send data using the database encoding.
+	 */
+	if (IsParallelWorker())
+	{
+		/*
+		 * During parallel worker startup, we want to accept the leader's
+		 * client_encoding setting so that anyone who looks at the value in
+		 * the worker sees the same value that they would see in the leader.
+		 */
+		if (InitializingParallelWorker)
+			return;
+
+		/*
+		 * A change other than during startup, for example due to a SET clause
+		 * attached to a function definition, should be rejected, as there is
+		 * nothing we can do inside the worker to make it take effect.
+		 */
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+				 errmsg("cannot change client_encoding during a parallel operation")));
+	}
+
+	/* We do not expect an error if PrepareClientEncoding succeeded */
+	if (SetClientEncoding(encoding) < 0)
+		elog(LOG, "SetClientEncoding(%d) failed", encoding);
+}
+
+
+/*
+ * SET SESSION AUTHORIZATION
+ */
+
+typedef struct
+{
+	/* This is the "extra" state for both SESSION AUTHORIZATION and ROLE */
+	Oid			roleid;
+	bool		is_superuser;
+} role_auth_extra;
+
+bool
+check_session_authorization(char **newval, void **extra, GucSource source)
+{
+	HeapTuple	roleTup;
+	Form_pg_authid roleform;
+	Oid			roleid;
+	bool		is_superuser;
+	role_auth_extra *myextra;
+
+	/* Do nothing for the boot_val default of NULL */
+	if (*newval == NULL)
+		return true;
+
+	if (!IsTransactionState())
+	{
+		/*
+		 * Can't do catalog lookups, so fail.  The result of this is that
+		 * session_authorization cannot be set in postgresql.conf, which seems
+		 * like a good thing anyway, so we don't work hard to avoid it.
+		 */
+		return false;
+	}
+
+	/* Look up the username */
+	roleTup = SearchSysCache1(AUTHNAME, PointerGetDatum(*newval));
+	if (!HeapTupleIsValid(roleTup))
+	{
+		/*
+		 * When source == PGC_S_TEST, we don't throw a hard error for a
+		 * nonexistent user name, only a NOTICE.  See comments in guc.h.
+		 */
+		if (source == PGC_S_TEST)
+		{
+			ereport(NOTICE,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("role \"%s\" does not exist", *newval)));
+			return true;
+		}
+		GUC_check_errmsg("role \"%s\" does not exist", *newval);
+		return false;
+	}
+
+	roleform = (Form_pg_authid) GETSTRUCT(roleTup);
+	roleid = roleform->oid;
+	is_superuser = roleform->rolsuper;
+
+	ReleaseSysCache(roleTup);
+
+	/* Set up "extra" struct for assign_session_authorization to use */
+	myextra = (role_auth_extra *) malloc(sizeof(role_auth_extra));
+	if (!myextra)
+		return false;
+	myextra->roleid = roleid;
+	myextra->is_superuser = is_superuser;
+	*extra = (void *) myextra;
+
+	return true;
+}
+
+void
+assign_session_authorization(const char *newval, void *extra)
+{
+	role_auth_extra *myextra = (role_auth_extra *) extra;
+
+	/* Do nothing for the boot_val default of NULL */
+	if (!myextra)
+		return;
+
+	SetSessionAuthorization(myextra->roleid, myextra->is_superuser);
+}
+
+
+/*
+ * SET ROLE
+ *
+ * The SQL spec requires "SET ROLE NONE" to unset the role, so we hardwire
+ * a translation of "none" to InvalidOid.  Otherwise this is much like
+ * SET SESSION AUTHORIZATION.
+ */
+extern char *role_string;		/* in guc.c */
+
+bool
+check_role(char **newval, void **extra, GucSource source)
+{
+	HeapTuple	roleTup;
+	Oid			roleid;
+	bool		is_superuser;
+	role_auth_extra *myextra;
+	Form_pg_authid roleform;
+
+	if (strcmp(*newval, "none") == 0)
+	{
+		/* hardwired translation */
+		roleid = InvalidOid;
+		is_superuser = false;
+	}
+	else
+	{
+		if (!IsTransactionState())
+		{
+			/*
+			 * Can't do catalog lookups, so fail.  The result of this is that
+			 * role cannot be set in postgresql.conf, which seems like a good
+			 * thing anyway, so we don't work hard to avoid it.
+			 */
+			return false;
+		}
+
+		/*
+		 * When source == PGC_S_TEST, we don't throw a hard error for a
+		 * nonexistent user name or insufficient privileges, only a NOTICE.
+		 * See comments in guc.h.
+		 */
+
+		/* Look up the username */
+		roleTup = SearchSysCache1(AUTHNAME, PointerGetDatum(*newval));
+		if (!HeapTupleIsValid(roleTup))
+		{
+			if (source == PGC_S_TEST)
+			{
+				ereport(NOTICE,
+						(errcode(ERRCODE_UNDEFINED_OBJECT),
+						 errmsg("role \"%s\" does not exist", *newval)));
+				return true;
+			}
+			GUC_check_errmsg("role \"%s\" does not exist", *newval);
+			return false;
+		}
+
+		roleform = (Form_pg_authid) GETSTRUCT(roleTup);
+		roleid = roleform->oid;
+		is_superuser = roleform->rolsuper;
+
+		ReleaseSysCache(roleTup);
+
+		/*
+		 * Verify that session user is allowed to become this role, but skip
+		 * this in parallel mode, where we must blindly recreate the parallel
+		 * leader's state.
+		 */
+		if (!InitializingParallelWorker &&
+			!is_member_of_role(GetSessionUserId(), roleid))
+		{
+			if (source == PGC_S_TEST)
+			{
+				ereport(NOTICE,
+						(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+						 errmsg("permission will be denied to set role \"%s\"",
+								*newval)));
+				return true;
+			}
+			GUC_check_errcode(ERRCODE_INSUFFICIENT_PRIVILEGE);
+			GUC_check_errmsg("permission denied to set role \"%s\"",
+							 *newval);
+			return false;
+		}
+	}
+
+	/* Set up "extra" struct for assign_role to use */
+	myextra = (role_auth_extra *) malloc(sizeof(role_auth_extra));
+	if (!myextra)
+		return false;
+	myextra->roleid = roleid;
+	myextra->is_superuser = is_superuser;
+	*extra = (void *) myextra;
+
+	return true;
+}
+
+void
+assign_role(const char *newval, void *extra)
+{
+	role_auth_extra *myextra = (role_auth_extra *) extra;
+
+	SetCurrentRoleId(myextra->roleid, myextra->is_superuser);
+}
+
+const char *
+show_role(void)
+{
+	/*
+	 * Check whether SET ROLE is active; if not return "none".  This is a
+	 * kluge to deal with the fact that SET SESSION AUTHORIZATION logically
+	 * resets SET ROLE to NONE, but we cannot set the GUC role variable from
+	 * assign_session_authorization (because we haven't got enough info to
+	 * call set_config_option).
+	 */
+	if (!OidIsValid(GetCurrentRoleId()))
+		return "none";
+
+	/* Otherwise we can just use the GUC string */
+	return role_string ? role_string : "none";
+}
diff --git a/src/backend/commands/view.c b/src/backend/commands/view.c
new file mode 100644
index 0000000..b5a0fc0
--- /dev/null
+++ b/src/backend/commands/view.c
@@ -0,0 +1,604 @@
+/*-------------------------------------------------------------------------
+ *
+ * view.c
+ *	  use rewrite rules to construct views
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/view.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/relation.h"
+#include "access/xact.h"
+#include "catalog/namespace.h"
+#include "commands/defrem.h"
+#include "commands/tablecmds.h"
+#include "commands/view.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/analyze.h"
+#include "parser/parse_relation.h"
+#include "rewrite/rewriteDefine.h"
+#include "rewrite/rewriteHandler.h"
+#include "rewrite/rewriteManip.h"
+#include "rewrite/rewriteSupport.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+static void checkViewTupleDesc(TupleDesc newdesc, TupleDesc olddesc);
+
+/*---------------------------------------------------------------------
+ * DefineVirtualRelation
+ *
+ * Create a view relation and use the rules system to store the query
+ * for the view.
+ *
+ * EventTriggerAlterTableStart must have been called already.
+ *---------------------------------------------------------------------
+ */
+static ObjectAddress
+DefineVirtualRelation(RangeVar *relation, List *tlist, bool replace,
+					  List *options, Query *viewParse)
+{
+	Oid			viewOid;
+	LOCKMODE	lockmode;
+	CreateStmt *createStmt = makeNode(CreateStmt);
+	List	   *attrList;
+	ListCell   *t;
+
+	/*
+	 * create a list of ColumnDef nodes based on the names and types of the
+	 * (non-junk) targetlist items from the view's SELECT list.
+	 */
+	attrList = NIL;
+	foreach(t, tlist)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(t);
+
+		if (!tle->resjunk)
+		{
+			ColumnDef  *def = makeColumnDef(tle->resname,
+											exprType((Node *) tle->expr),
+											exprTypmod((Node *) tle->expr),
+											exprCollation((Node *) tle->expr));
+
+			/*
+			 * It's possible that the column is of a collatable type but the
+			 * collation could not be resolved, so double-check.
+			 */
+			if (type_is_collatable(exprType((Node *) tle->expr)))
+			{
+				if (!OidIsValid(def->collOid))
+					ereport(ERROR,
+							(errcode(ERRCODE_INDETERMINATE_COLLATION),
+							 errmsg("could not determine which collation to use for view column \"%s\"",
+									def->colname),
+							 errhint("Use the COLLATE clause to set the collation explicitly.")));
+			}
+			else
+				Assert(!OidIsValid(def->collOid));
+
+			attrList = lappend(attrList, def);
+		}
+	}
+
+	/*
+	 * Look up, check permissions on, and lock the creation namespace; also
+	 * check for a preexisting view with the same name.  This will also set
+	 * relation->relpersistence to RELPERSISTENCE_TEMP if the selected
+	 * namespace is temporary.
+	 */
+	lockmode = replace ? AccessExclusiveLock : NoLock;
+	(void) RangeVarGetAndCheckCreationNamespace(relation, lockmode, &viewOid);
+
+	if (OidIsValid(viewOid) && replace)
+	{
+		Relation	rel;
+		TupleDesc	descriptor;
+		List	   *atcmds = NIL;
+		AlterTableCmd *atcmd;
+		ObjectAddress address;
+
+		/* Relation is already locked, but we must build a relcache entry. */
+		rel = relation_open(viewOid, NoLock);
+
+		/* Make sure it *is* a view. */
+		if (rel->rd_rel->relkind != RELKIND_VIEW)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("\"%s\" is not a view",
+							RelationGetRelationName(rel))));
+
+		/* Also check it's not in use already */
+		CheckTableNotInUse(rel, "CREATE OR REPLACE VIEW");
+
+		/*
+		 * Due to the namespace visibility rules for temporary objects, we
+		 * should only end up replacing a temporary view with another
+		 * temporary view, and similarly for permanent views.
+		 */
+		Assert(relation->relpersistence == rel->rd_rel->relpersistence);
+
+		/*
+		 * Create a tuple descriptor to compare against the existing view, and
+		 * verify that the old column list is an initial prefix of the new
+		 * column list.
+		 */
+		descriptor = BuildDescForRelation(attrList);
+		checkViewTupleDesc(descriptor, rel->rd_att);
+
+		/*
+		 * If new attributes have been added, we must add pg_attribute entries
+		 * for them.  It is convenient (although overkill) to use the ALTER
+		 * TABLE ADD COLUMN infrastructure for this.
+		 *
+		 * Note that we must do this before updating the query for the view,
+		 * since the rules system requires that the correct view columns be in
+		 * place when defining the new rules.
+		 *
+		 * Also note that ALTER TABLE doesn't run parse transformation on
+		 * AT_AddColumnToView commands.  The ColumnDef we supply must be ready
+		 * to execute as-is.
+		 */
+		if (list_length(attrList) > rel->rd_att->natts)
+		{
+			ListCell   *c;
+			int			skip = rel->rd_att->natts;
+
+			foreach(c, attrList)
+			{
+				if (skip > 0)
+				{
+					skip--;
+					continue;
+				}
+				atcmd = makeNode(AlterTableCmd);
+				atcmd->subtype = AT_AddColumnToView;
+				atcmd->def = (Node *) lfirst(c);
+				atcmds = lappend(atcmds, atcmd);
+			}
+
+			/* EventTriggerAlterTableStart called by ProcessUtilitySlow */
+			AlterTableInternal(viewOid, atcmds, true);
+
+			/* Make the new view columns visible */
+			CommandCounterIncrement();
+		}
+
+		/*
+		 * Update the query for the view.
+		 *
+		 * Note that we must do this before updating the view options, because
+		 * the new options may not be compatible with the old view query (for
+		 * example if we attempt to add the WITH CHECK OPTION, we require that
+		 * the new view be automatically updatable, but the old view may not
+		 * have been).
+		 */
+		StoreViewQuery(viewOid, viewParse, replace);
+
+		/* Make the new view query visible */
+		CommandCounterIncrement();
+
+		/*
+		 * Update the view's options.
+		 *
+		 * The new options list replaces the existing options list, even if
+		 * it's empty.
+		 */
+		atcmd = makeNode(AlterTableCmd);
+		atcmd->subtype = AT_ReplaceRelOptions;
+		atcmd->def = (Node *) options;
+		atcmds = list_make1(atcmd);
+
+		/* EventTriggerAlterTableStart called by ProcessUtilitySlow */
+		AlterTableInternal(viewOid, atcmds, true);
+
+		/*
+		 * There is very little to do here to update the view's dependencies.
+		 * Most view-level dependency relationships, such as those on the
+		 * owner, schema, and associated composite type, aren't changing.
+		 * Because we don't allow changing type or collation of an existing
+		 * view column, those dependencies of the existing columns don't
+		 * change either, while the AT_AddColumnToView machinery took care of
+		 * adding such dependencies for new view columns.  The dependencies of
+		 * the view's query could have changed arbitrarily, but that was dealt
+		 * with inside StoreViewQuery.  What remains is only to check that
+		 * view replacement is allowed when we're creating an extension.
+		 */
+		ObjectAddressSet(address, RelationRelationId, viewOid);
+
+		recordDependencyOnCurrentExtension(&address, true);
+
+		/*
+		 * Seems okay, so return the OID of the pre-existing view.
+		 */
+		relation_close(rel, NoLock);	/* keep the lock! */
+
+		return address;
+	}
+	else
+	{
+		ObjectAddress address;
+
+		/*
+		 * Set the parameters for keys/inheritance etc. All of these are
+		 * uninteresting for views...
+		 */
+		createStmt->relation = relation;
+		createStmt->tableElts = attrList;
+		createStmt->inhRelations = NIL;
+		createStmt->constraints = NIL;
+		createStmt->options = options;
+		createStmt->oncommit = ONCOMMIT_NOOP;
+		createStmt->tablespacename = NULL;
+		createStmt->if_not_exists = false;
+
+		/*
+		 * Create the relation (this will error out if there's an existing
+		 * view, so we don't need more code to complain if "replace" is
+		 * false).
+		 */
+		address = DefineRelation(createStmt, RELKIND_VIEW, InvalidOid, NULL,
+								 NULL);
+		Assert(address.objectId != InvalidOid);
+
+		/* Make the new view relation visible */
+		CommandCounterIncrement();
+
+		/* Store the query for the view */
+		StoreViewQuery(address.objectId, viewParse, replace);
+
+		return address;
+	}
+}
+
+/*
+ * Verify that tupledesc associated with proposed new view definition
+ * matches tupledesc of old view.  This is basically a cut-down version
+ * of equalTupleDescs(), with code added to generate specific complaints.
+ * Also, we allow the new tupledesc to have more columns than the old.
+ */
+static void
+checkViewTupleDesc(TupleDesc newdesc, TupleDesc olddesc)
+{
+	int			i;
+
+	if (newdesc->natts < olddesc->natts)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+				 errmsg("cannot drop columns from view")));
+
+	for (i = 0; i < olddesc->natts; i++)
+	{
+		Form_pg_attribute newattr = TupleDescAttr(newdesc, i);
+		Form_pg_attribute oldattr = TupleDescAttr(olddesc, i);
+
+		/* XXX msg not right, but we don't support DROP COL on view anyway */
+		if (newattr->attisdropped != oldattr->attisdropped)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot drop columns from view")));
+
+		if (strcmp(NameStr(newattr->attname), NameStr(oldattr->attname)) != 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot change name of view column \"%s\" to \"%s\"",
+							NameStr(oldattr->attname),
+							NameStr(newattr->attname)),
+					 errhint("Use ALTER VIEW ... RENAME COLUMN ... to change name of view column instead.")));
+
+		/*
+		 * We cannot allow type, typmod, or collation to change, since these
+		 * properties may be embedded in Vars of other views/rules referencing
+		 * this one.  Other column attributes can be ignored.
+		 */
+		if (newattr->atttypid != oldattr->atttypid ||
+			newattr->atttypmod != oldattr->atttypmod)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot change data type of view column \"%s\" from %s to %s",
+							NameStr(oldattr->attname),
+							format_type_with_typemod(oldattr->atttypid,
+													 oldattr->atttypmod),
+							format_type_with_typemod(newattr->atttypid,
+													 newattr->atttypmod))));
+
+		/*
+		 * At this point, attcollations should be both valid or both invalid,
+		 * so applying get_collation_name unconditionally should be fine.
+		 */
+		if (newattr->attcollation != oldattr->attcollation)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+					 errmsg("cannot change collation of view column \"%s\" from \"%s\" to \"%s\"",
+							NameStr(oldattr->attname),
+							get_collation_name(oldattr->attcollation),
+							get_collation_name(newattr->attcollation))));
+	}
+
+	/*
+	 * We ignore the constraint fields.  The new view desc can't have any
+	 * constraints, and the only ones that could be on the old view are
+	 * defaults, which we are happy to leave in place.
+	 */
+}
+
+static void
+DefineViewRules(Oid viewOid, Query *viewParse, bool replace)
+{
+	/*
+	 * Set up the ON SELECT rule.  Since the query has already been through
+	 * parse analysis, we use DefineQueryRewrite() directly.
+	 */
+	DefineQueryRewrite(pstrdup(ViewSelectRuleName),
+					   viewOid,
+					   NULL,
+					   CMD_SELECT,
+					   true,
+					   replace,
+					   list_make1(viewParse));
+
+	/*
+	 * Someday: automatic ON INSERT, etc
+	 */
+}
+
+/*---------------------------------------------------------------
+ * UpdateRangeTableOfViewParse
+ *
+ * Update the range table of the given parsetree.
+ * This update consists of adding two new entries IN THE BEGINNING
+ * of the range table (otherwise the rule system will die a slow,
+ * horrible and painful death, and we do not want that now, do we?)
+ * one for the OLD relation and one for the NEW one (both of
+ * them refer in fact to the "view" relation).
+ *
+ * Of course we must also increase the 'varnos' of all the Var nodes
+ * by 2...
+ *
+ * These extra RT entries are not actually used in the query,
+ * except for run-time locking and permission checking.
+ *---------------------------------------------------------------
+ */
+static Query *
+UpdateRangeTableOfViewParse(Oid viewOid, Query *viewParse)
+{
+	Relation	viewRel;
+	List	   *new_rt;
+	ParseNamespaceItem *nsitem;
+	RangeTblEntry *rt_entry1,
+			   *rt_entry2;
+	ParseState *pstate;
+
+	/*
+	 * Make a copy of the given parsetree.  It's not so much that we don't
+	 * want to scribble on our input, it's that the parser has a bad habit of
+	 * outputting multiple links to the same subtree for constructs like
+	 * BETWEEN, and we mustn't have OffsetVarNodes increment the varno of a
+	 * Var node twice.  copyObject will expand any multiply-referenced subtree
+	 * into multiple copies.
+	 */
+	viewParse = copyObject(viewParse);
+
+	/* Create a dummy ParseState for addRangeTableEntryForRelation */
+	pstate = make_parsestate(NULL);
+
+	/* need to open the rel for addRangeTableEntryForRelation */
+	viewRel = relation_open(viewOid, AccessShareLock);
+
+	/*
+	 * Create the 2 new range table entries and form the new range table...
+	 * OLD first, then NEW....
+	 */
+	nsitem = addRangeTableEntryForRelation(pstate, viewRel,
+										   AccessShareLock,
+										   makeAlias("old", NIL),
+										   false, false);
+	rt_entry1 = nsitem->p_rte;
+	nsitem = addRangeTableEntryForRelation(pstate, viewRel,
+										   AccessShareLock,
+										   makeAlias("new", NIL),
+										   false, false);
+	rt_entry2 = nsitem->p_rte;
+
+	/* Must override addRangeTableEntry's default access-check flags */
+	rt_entry1->requiredPerms = 0;
+	rt_entry2->requiredPerms = 0;
+
+	new_rt = lcons(rt_entry1, lcons(rt_entry2, viewParse->rtable));
+
+	viewParse->rtable = new_rt;
+
+	/*
+	 * Now offset all var nodes by 2, and jointree RT indexes too.
+	 */
+	OffsetVarNodes((Node *) viewParse, 2, 0);
+
+	relation_close(viewRel, AccessShareLock);
+
+	return viewParse;
+}
+
+/*
+ * DefineView
+ *		Execute a CREATE VIEW command.
+ */
+ObjectAddress
+DefineView(ViewStmt *stmt, const char *queryString,
+		   int stmt_location, int stmt_len)
+{
+	RawStmt    *rawstmt;
+	Query	   *viewParse;
+	RangeVar   *view;
+	ListCell   *cell;
+	bool		check_option;
+	ObjectAddress address;
+
+	/*
+	 * Run parse analysis to convert the raw parse tree to a Query.  Note this
+	 * also acquires sufficient locks on the source table(s).
+	 */
+	rawstmt = makeNode(RawStmt);
+	rawstmt->stmt = stmt->query;
+	rawstmt->stmt_location = stmt_location;
+	rawstmt->stmt_len = stmt_len;
+
+	viewParse = parse_analyze_fixedparams(rawstmt, queryString, NULL, 0, NULL);
+
+	/*
+	 * The grammar should ensure that the result is a single SELECT Query.
+	 * However, it doesn't forbid SELECT INTO, so we have to check for that.
+	 */
+	if (!IsA(viewParse, Query))
+		elog(ERROR, "unexpected parse analysis result");
+	if (viewParse->utilityStmt != NULL &&
+		IsA(viewParse->utilityStmt, CreateTableAsStmt))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("views must not contain SELECT INTO")));
+	if (viewParse->commandType != CMD_SELECT)
+		elog(ERROR, "unexpected parse analysis result");
+
+	/*
+	 * Check for unsupported cases.  These tests are redundant with ones in
+	 * DefineQueryRewrite(), but that function will complain about a bogus ON
+	 * SELECT rule, and we'd rather the message complain about a view.
+	 */
+	if (viewParse->hasModifyingCTE)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("views must not contain data-modifying statements in WITH")));
+
+	/*
+	 * If the user specified the WITH CHECK OPTION, add it to the list of
+	 * reloptions.
+	 */
+	if (stmt->withCheckOption == LOCAL_CHECK_OPTION)
+		stmt->options = lappend(stmt->options,
+								makeDefElem("check_option",
+											(Node *) makeString("local"), -1));
+	else if (stmt->withCheckOption == CASCADED_CHECK_OPTION)
+		stmt->options = lappend(stmt->options,
+								makeDefElem("check_option",
+											(Node *) makeString("cascaded"), -1));
+
+	/*
+	 * Check that the view is auto-updatable if WITH CHECK OPTION was
+	 * specified.
+	 */
+	check_option = false;
+
+	foreach(cell, stmt->options)
+	{
+		DefElem    *defel = (DefElem *) lfirst(cell);
+
+		if (strcmp(defel->defname, "check_option") == 0)
+			check_option = true;
+	}
+
+	/*
+	 * If the check option is specified, look to see if the view is actually
+	 * auto-updatable or not.
+	 */
+	if (check_option)
+	{
+		const char *view_updatable_error =
+		view_query_is_auto_updatable(viewParse, true);
+
+		if (view_updatable_error)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("WITH CHECK OPTION is supported only on automatically updatable views"),
+					 errhint("%s", _(view_updatable_error))));
+	}
+
+	/*
+	 * If a list of column names was given, run through and insert these into
+	 * the actual query tree. - thomas 2000-03-08
+	 */
+	if (stmt->aliases != NIL)
+	{
+		ListCell   *alist_item = list_head(stmt->aliases);
+		ListCell   *targetList;
+
+		foreach(targetList, viewParse->targetList)
+		{
+			TargetEntry *te = lfirst_node(TargetEntry, targetList);
+
+			/* junk columns don't get aliases */
+			if (te->resjunk)
+				continue;
+			te->resname = pstrdup(strVal(lfirst(alist_item)));
+			alist_item = lnext(stmt->aliases, alist_item);
+			if (alist_item == NULL)
+				break;			/* done assigning aliases */
+		}
+
+		if (alist_item != NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("CREATE VIEW specifies more column "
+							"names than columns")));
+	}
+
+	/* Unlogged views are not sensible. */
+	if (stmt->view->relpersistence == RELPERSISTENCE_UNLOGGED)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("views cannot be unlogged because they do not have storage")));
+
+	/*
+	 * If the user didn't explicitly ask for a temporary view, check whether
+	 * we need one implicitly.  We allow TEMP to be inserted automatically as
+	 * long as the CREATE command is consistent with that --- no explicit
+	 * schema name.
+	 */
+	view = copyObject(stmt->view);	/* don't corrupt original command */
+	if (view->relpersistence == RELPERSISTENCE_PERMANENT
+		&& isQueryUsingTempRelation(viewParse))
+	{
+		view->relpersistence = RELPERSISTENCE_TEMP;
+		ereport(NOTICE,
+				(errmsg("view \"%s\" will be a temporary view",
+						view->relname)));
+	}
+
+	/*
+	 * Create the view relation
+	 *
+	 * NOTE: if it already exists and replace is false, the xact will be
+	 * aborted.
+	 */
+	address = DefineVirtualRelation(view, viewParse->targetList,
+									stmt->replace, stmt->options, viewParse);
+
+	return address;
+}
+
+/*
+ * Use the rules system to store the query for the view.
+ */
+void
+StoreViewQuery(Oid viewOid, Query *viewParse, bool replace)
+{
+	/*
+	 * The range table of 'viewParse' does not contain entries for the "OLD"
+	 * and "NEW" relations. So... add them!
+	 */
+	viewParse = UpdateRangeTableOfViewParse(viewOid, viewParse);
+
+	/*
+	 * Now create the rules associated with the view.
+	 */
+	DefineViewRules(viewOid, viewParse, replace);
+}
-- 
cgit v1.2.3