summaryrefslogtreecommitdiffstats
path: root/src/backend/commands
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:17:33 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:17:33 +0000
commit5e45211a64149b3c659b90ff2de6fa982a5a93ed (patch)
tree739caf8c461053357daa9f162bef34516c7bf452 /src/backend/commands
parentInitial commit. (diff)
downloadpostgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.tar.xz
postgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.zip
Adding upstream version 15.5.upstream/15.5
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/commands')
-rw-r--r--src/backend/commands/Makefile66
-rw-r--r--src/backend/commands/aggregatecmds.c496
-rw-r--r--src/backend/commands/alter.c1061
-rw-r--r--src/backend/commands/amcmds.c269
-rw-r--r--src/backend/commands/analyze.c3076
-rw-r--r--src/backend/commands/async.c2446
-rw-r--r--src/backend/commands/cluster.c1736
-rw-r--r--src/backend/commands/collationcmds.c820
-rw-r--r--src/backend/commands/comment.c459
-rw-r--r--src/backend/commands/constraint.c205
-rw-r--r--src/backend/commands/conversioncmds.c139
-rw-r--r--src/backend/commands/copy.c798
-rw-r--r--src/backend/commands/copyfrom.c1624
-rw-r--r--src/backend/commands/copyfromparse.c1921
-rw-r--r--src/backend/commands/copyto.c1310
-rw-r--r--src/backend/commands/createas.c637
-rw-r--r--src/backend/commands/dbcommands.c3285
-rw-r--r--src/backend/commands/define.c391
-rw-r--r--src/backend/commands/discard.c78
-rw-r--r--src/backend/commands/dropcmds.c493
-rw-r--r--src/backend/commands/event_trigger.c2182
-rw-r--r--src/backend/commands/explain.c5022
-rw-r--r--src/backend/commands/extension.c3417
-rw-r--r--src/backend/commands/foreigncmds.c1617
-rw-r--r--src/backend/commands/functioncmds.c2374
-rw-r--r--src/backend/commands/indexcmds.c4355
-rw-r--r--src/backend/commands/lockcmds.c306
-rw-r--r--src/backend/commands/matview.c936
-rw-r--r--src/backend/commands/opclasscmds.c1745
-rw-r--r--src/backend/commands/operatorcmds.c552
-rw-r--r--src/backend/commands/policy.c1285
-rw-r--r--src/backend/commands/portalcmds.c496
-rw-r--r--src/backend/commands/prepare.c729
-rw-r--r--src/backend/commands/proclang.c239
-rw-r--r--src/backend/commands/publicationcmds.c2006
-rw-r--r--src/backend/commands/schemacmds.c441
-rw-r--r--src/backend/commands/seclabel.c581
-rw-r--r--src/backend/commands/sequence.c1917
-rw-r--r--src/backend/commands/statscmds.c898
-rw-r--r--src/backend/commands/subscriptioncmds.c1966
-rw-r--r--src/backend/commands/tablecmds.c19402
-rw-r--r--src/backend/commands/tablespace.c1595
-rw-r--r--src/backend/commands/trigger.c6664
-rw-r--r--src/backend/commands/tsearchcmds.c1759
-rw-r--r--src/backend/commands/typecmds.c4495
-rw-r--r--src/backend/commands/user.c1645
-rw-r--r--src/backend/commands/vacuum.c2465
-rw-r--r--src/backend/commands/vacuumparallel.c1074
-rw-r--r--src/backend/commands/variable.c935
-rw-r--r--src/backend/commands/view.c604
50 files changed, 95012 insertions, 0 deletions
diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile
new file mode 100644
index 0000000..48f7348
--- /dev/null
+++ b/src/backend/commands/Makefile
@@ -0,0 +1,66 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+# Makefile for backend/commands
+#
+# IDENTIFICATION
+# src/backend/commands/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/commands
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = \
+ aggregatecmds.o \
+ alter.o \
+ amcmds.o \
+ analyze.o \
+ async.o \
+ cluster.o \
+ collationcmds.o \
+ comment.o \
+ constraint.o \
+ conversioncmds.o \
+ copy.o \
+ copyfrom.o \
+ copyfromparse.o \
+ copyto.o \
+ createas.o \
+ dbcommands.o \
+ define.o \
+ discard.o \
+ dropcmds.o \
+ event_trigger.o \
+ explain.o \
+ extension.o \
+ foreigncmds.o \
+ functioncmds.o \
+ indexcmds.o \
+ lockcmds.o \
+ matview.o \
+ opclasscmds.o \
+ operatorcmds.o \
+ policy.o \
+ portalcmds.o \
+ prepare.o \
+ proclang.o \
+ publicationcmds.o \
+ schemacmds.o \
+ seclabel.o \
+ sequence.o \
+ statscmds.o \
+ subscriptioncmds.o \
+ tablecmds.o \
+ tablespace.o \
+ trigger.o \
+ tsearchcmds.o \
+ typecmds.o \
+ user.o \
+ vacuum.o \
+ vacuumparallel.o \
+ variable.o \
+ view.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/commands/aggregatecmds.c b/src/backend/commands/aggregatecmds.c
new file mode 100644
index 0000000..010eca7
--- /dev/null
+++ b/src/backend/commands/aggregatecmds.c
@@ -0,0 +1,496 @@
+/*-------------------------------------------------------------------------
+ *
+ * aggregatecmds.c
+ *
+ * Routines for aggregate-manipulation commands
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/aggregatecmds.c
+ *
+ * DESCRIPTION
+ * The "DefineFoo" routines take the parse tree and pick out the
+ * appropriate arguments/flags, passing the results to the
+ * corresponding "FooDefine" routines (in src/catalog) that do
+ * the actual catalog-munging. These routines also verify permission
+ * of the user to execute the command.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "catalog/dependency.h"
+#include "catalog/pg_aggregate.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_type.h"
+#include "commands/alter.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "parser/parse_type.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+
+static char extractModify(DefElem *defel);
+
+
+/*
+ * DefineAggregate
+ *
+ * "oldstyle" signals the old (pre-8.2) style where the aggregate input type
+ * is specified by a BASETYPE element in the parameters. Otherwise,
+ * "args" is a pair, whose first element is a list of FunctionParameter structs
+ * defining the agg's arguments (both direct and aggregated), and whose second
+ * element is an Integer node with the number of direct args, or -1 if this
+ * isn't an ordered-set aggregate.
+ * "parameters" is a list of DefElem representing the agg's definition clauses.
+ */
+ObjectAddress
+DefineAggregate(ParseState *pstate,
+ List *name,
+ List *args,
+ bool oldstyle,
+ List *parameters,
+ bool replace)
+{
+ char *aggName;
+ Oid aggNamespace;
+ AclResult aclresult;
+ char aggKind = AGGKIND_NORMAL;
+ List *transfuncName = NIL;
+ List *finalfuncName = NIL;
+ List *combinefuncName = NIL;
+ List *serialfuncName = NIL;
+ List *deserialfuncName = NIL;
+ List *mtransfuncName = NIL;
+ List *minvtransfuncName = NIL;
+ List *mfinalfuncName = NIL;
+ bool finalfuncExtraArgs = false;
+ bool mfinalfuncExtraArgs = false;
+ char finalfuncModify = 0;
+ char mfinalfuncModify = 0;
+ List *sortoperatorName = NIL;
+ TypeName *baseType = NULL;
+ TypeName *transType = NULL;
+ TypeName *mtransType = NULL;
+ int32 transSpace = 0;
+ int32 mtransSpace = 0;
+ char *initval = NULL;
+ char *minitval = NULL;
+ char *parallel = NULL;
+ int numArgs;
+ int numDirectArgs = 0;
+ oidvector *parameterTypes;
+ ArrayType *allParameterTypes;
+ ArrayType *parameterModes;
+ ArrayType *parameterNames;
+ List *parameterDefaults;
+ Oid variadicArgType;
+ Oid transTypeId;
+ Oid mtransTypeId = InvalidOid;
+ char transTypeType;
+ char mtransTypeType = 0;
+ char proparallel = PROPARALLEL_UNSAFE;
+ ListCell *pl;
+
+ /* Convert list of names to a name and namespace */
+ aggNamespace = QualifiedNameGetCreationNamespace(name, &aggName);
+
+ /* Check we have creation rights in target namespace */
+ aclresult = pg_namespace_aclcheck(aggNamespace, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(aggNamespace));
+
+ /* Deconstruct the output of the aggr_args grammar production */
+ if (!oldstyle)
+ {
+ Assert(list_length(args) == 2);
+ numDirectArgs = intVal(lsecond(args));
+ if (numDirectArgs >= 0)
+ aggKind = AGGKIND_ORDERED_SET;
+ else
+ numDirectArgs = 0;
+ args = linitial_node(List, args);
+ }
+
+ /* Examine aggregate's definition clauses */
+ foreach(pl, parameters)
+ {
+ DefElem *defel = lfirst_node(DefElem, pl);
+
+ /*
+ * sfunc1, stype1, and initcond1 are accepted as obsolete spellings
+ * for sfunc, stype, initcond.
+ */
+ if (strcmp(defel->defname, "sfunc") == 0)
+ transfuncName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "sfunc1") == 0)
+ transfuncName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "finalfunc") == 0)
+ finalfuncName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "combinefunc") == 0)
+ combinefuncName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "serialfunc") == 0)
+ serialfuncName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "deserialfunc") == 0)
+ deserialfuncName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "msfunc") == 0)
+ mtransfuncName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "minvfunc") == 0)
+ minvtransfuncName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "mfinalfunc") == 0)
+ mfinalfuncName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "finalfunc_extra") == 0)
+ finalfuncExtraArgs = defGetBoolean(defel);
+ else if (strcmp(defel->defname, "mfinalfunc_extra") == 0)
+ mfinalfuncExtraArgs = defGetBoolean(defel);
+ else if (strcmp(defel->defname, "finalfunc_modify") == 0)
+ finalfuncModify = extractModify(defel);
+ else if (strcmp(defel->defname, "mfinalfunc_modify") == 0)
+ mfinalfuncModify = extractModify(defel);
+ else if (strcmp(defel->defname, "sortop") == 0)
+ sortoperatorName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "basetype") == 0)
+ baseType = defGetTypeName(defel);
+ else if (strcmp(defel->defname, "hypothetical") == 0)
+ {
+ if (defGetBoolean(defel))
+ {
+ if (aggKind == AGGKIND_NORMAL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("only ordered-set aggregates can be hypothetical")));
+ aggKind = AGGKIND_HYPOTHETICAL;
+ }
+ }
+ else if (strcmp(defel->defname, "stype") == 0)
+ transType = defGetTypeName(defel);
+ else if (strcmp(defel->defname, "stype1") == 0)
+ transType = defGetTypeName(defel);
+ else if (strcmp(defel->defname, "sspace") == 0)
+ transSpace = defGetInt32(defel);
+ else if (strcmp(defel->defname, "mstype") == 0)
+ mtransType = defGetTypeName(defel);
+ else if (strcmp(defel->defname, "msspace") == 0)
+ mtransSpace = defGetInt32(defel);
+ else if (strcmp(defel->defname, "initcond") == 0)
+ initval = defGetString(defel);
+ else if (strcmp(defel->defname, "initcond1") == 0)
+ initval = defGetString(defel);
+ else if (strcmp(defel->defname, "minitcond") == 0)
+ minitval = defGetString(defel);
+ else if (strcmp(defel->defname, "parallel") == 0)
+ parallel = defGetString(defel);
+ else
+ ereport(WARNING,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("aggregate attribute \"%s\" not recognized",
+ defel->defname)));
+ }
+
+ /*
+ * make sure we have our required definitions
+ */
+ if (transType == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregate stype must be specified")));
+ if (transfuncName == NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregate sfunc must be specified")));
+
+ /*
+ * if mtransType is given, mtransfuncName and minvtransfuncName must be as
+ * well; if not, then none of the moving-aggregate options should have
+ * been given.
+ */
+ if (mtransType != NULL)
+ {
+ if (mtransfuncName == NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregate msfunc must be specified when mstype is specified")));
+ if (minvtransfuncName == NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregate minvfunc must be specified when mstype is specified")));
+ }
+ else
+ {
+ if (mtransfuncName != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregate msfunc must not be specified without mstype")));
+ if (minvtransfuncName != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregate minvfunc must not be specified without mstype")));
+ if (mfinalfuncName != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregate mfinalfunc must not be specified without mstype")));
+ if (mtransSpace != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregate msspace must not be specified without mstype")));
+ if (minitval != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregate minitcond must not be specified without mstype")));
+ }
+
+ /*
+ * Default values for modify flags can only be determined once we know the
+ * aggKind.
+ */
+ if (finalfuncModify == 0)
+ finalfuncModify = (aggKind == AGGKIND_NORMAL) ? AGGMODIFY_READ_ONLY : AGGMODIFY_READ_WRITE;
+ if (mfinalfuncModify == 0)
+ mfinalfuncModify = (aggKind == AGGKIND_NORMAL) ? AGGMODIFY_READ_ONLY : AGGMODIFY_READ_WRITE;
+
+ /*
+ * look up the aggregate's input datatype(s).
+ */
+ if (oldstyle)
+ {
+ /*
+ * Old style: use basetype parameter. This supports aggregates of
+ * zero or one input, with input type ANY meaning zero inputs.
+ *
+ * Historically we allowed the command to look like basetype = 'ANY'
+ * so we must do a case-insensitive comparison for the name ANY. Ugh.
+ */
+ Oid aggArgTypes[1];
+
+ if (baseType == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregate input type must be specified")));
+
+ if (pg_strcasecmp(TypeNameToString(baseType), "ANY") == 0)
+ {
+ numArgs = 0;
+ aggArgTypes[0] = InvalidOid;
+ }
+ else
+ {
+ numArgs = 1;
+ aggArgTypes[0] = typenameTypeId(NULL, baseType);
+ }
+ parameterTypes = buildoidvector(aggArgTypes, numArgs);
+ allParameterTypes = NULL;
+ parameterModes = NULL;
+ parameterNames = NULL;
+ parameterDefaults = NIL;
+ variadicArgType = InvalidOid;
+ }
+ else
+ {
+ /*
+ * New style: args is a list of FunctionParameters (possibly zero of
+ * 'em). We share functioncmds.c's code for processing them.
+ */
+ Oid requiredResultType;
+
+ if (baseType != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("basetype is redundant with aggregate input type specification")));
+
+ numArgs = list_length(args);
+ interpret_function_parameter_list(pstate,
+ args,
+ InvalidOid,
+ OBJECT_AGGREGATE,
+ &parameterTypes,
+ NULL,
+ &allParameterTypes,
+ &parameterModes,
+ &parameterNames,
+ NULL,
+ &parameterDefaults,
+ &variadicArgType,
+ &requiredResultType);
+ /* Parameter defaults are not currently allowed by the grammar */
+ Assert(parameterDefaults == NIL);
+ /* There shouldn't have been any OUT parameters, either */
+ Assert(requiredResultType == InvalidOid);
+ }
+
+ /*
+ * look up the aggregate's transtype.
+ *
+ * transtype can't be a pseudo-type, since we need to be able to store
+ * values of the transtype. However, we can allow polymorphic transtype
+ * in some cases (AggregateCreate will check). Also, we allow "internal"
+ * for functions that want to pass pointers to private data structures;
+ * but allow that only to superusers, since you could crash the system (or
+ * worse) by connecting up incompatible internal-using functions in an
+ * aggregate.
+ */
+ transTypeId = typenameTypeId(NULL, transType);
+ transTypeType = get_typtype(transTypeId);
+ if (transTypeType == TYPTYPE_PSEUDO &&
+ !IsPolymorphicType(transTypeId))
+ {
+ if (transTypeId == INTERNALOID && superuser())
+ /* okay */ ;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregate transition data type cannot be %s",
+ format_type_be(transTypeId))));
+ }
+
+ if (serialfuncName && deserialfuncName)
+ {
+ /*
+ * Serialization is only needed/allowed for transtype INTERNAL.
+ */
+ if (transTypeId != INTERNALOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("serialization functions may be specified only when the aggregate transition data type is %s",
+ format_type_be(INTERNALOID))));
+ }
+ else if (serialfuncName || deserialfuncName)
+ {
+ /*
+ * Cannot specify one function without the other.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("must specify both or neither of serialization and deserialization functions")));
+ }
+
+ /*
+ * If a moving-aggregate transtype is specified, look that up. Same
+ * restrictions as for transtype.
+ */
+ if (mtransType)
+ {
+ mtransTypeId = typenameTypeId(NULL, mtransType);
+ mtransTypeType = get_typtype(mtransTypeId);
+ if (mtransTypeType == TYPTYPE_PSEUDO &&
+ !IsPolymorphicType(mtransTypeId))
+ {
+ if (mtransTypeId == INTERNALOID && superuser())
+ /* okay */ ;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregate transition data type cannot be %s",
+ format_type_be(mtransTypeId))));
+ }
+ }
+
+ /*
+ * If we have an initval, and it's not for a pseudotype (particularly a
+ * polymorphic type), make sure it's acceptable to the type's input
+ * function. We will store the initval as text, because the input
+ * function isn't necessarily immutable (consider "now" for timestamp),
+ * and we want to use the runtime not creation-time interpretation of the
+ * value. However, if it's an incorrect value it seems much more
+ * user-friendly to complain at CREATE AGGREGATE time.
+ */
+ if (initval && transTypeType != TYPTYPE_PSEUDO)
+ {
+ Oid typinput,
+ typioparam;
+
+ getTypeInputInfo(transTypeId, &typinput, &typioparam);
+ (void) OidInputFunctionCall(typinput, initval, typioparam, -1);
+ }
+
+ /*
+ * Likewise for moving-aggregate initval.
+ */
+ if (minitval && mtransTypeType != TYPTYPE_PSEUDO)
+ {
+ Oid typinput,
+ typioparam;
+
+ getTypeInputInfo(mtransTypeId, &typinput, &typioparam);
+ (void) OidInputFunctionCall(typinput, minitval, typioparam, -1);
+ }
+
+ if (parallel)
+ {
+ if (strcmp(parallel, "safe") == 0)
+ proparallel = PROPARALLEL_SAFE;
+ else if (strcmp(parallel, "restricted") == 0)
+ proparallel = PROPARALLEL_RESTRICTED;
+ else if (strcmp(parallel, "unsafe") == 0)
+ proparallel = PROPARALLEL_UNSAFE;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("parameter \"parallel\" must be SAFE, RESTRICTED, or UNSAFE")));
+ }
+
+ /*
+ * Most of the argument-checking is done inside of AggregateCreate
+ */
+ return AggregateCreate(aggName, /* aggregate name */
+ aggNamespace, /* namespace */
+ replace,
+ aggKind,
+ numArgs,
+ numDirectArgs,
+ parameterTypes,
+ PointerGetDatum(allParameterTypes),
+ PointerGetDatum(parameterModes),
+ PointerGetDatum(parameterNames),
+ parameterDefaults,
+ variadicArgType,
+ transfuncName, /* step function name */
+ finalfuncName, /* final function name */
+ combinefuncName, /* combine function name */
+ serialfuncName, /* serial function name */
+ deserialfuncName, /* deserial function name */
+ mtransfuncName, /* fwd trans function name */
+ minvtransfuncName, /* inv trans function name */
+ mfinalfuncName, /* final function name */
+ finalfuncExtraArgs,
+ mfinalfuncExtraArgs,
+ finalfuncModify,
+ mfinalfuncModify,
+ sortoperatorName, /* sort operator name */
+ transTypeId, /* transition data type */
+ transSpace, /* transition space */
+ mtransTypeId, /* transition data type */
+ mtransSpace, /* transition space */
+ initval, /* initial condition */
+ minitval, /* initial condition */
+ proparallel); /* parallel safe? */
+}
+
+/*
+ * Convert the string form of [m]finalfunc_modify to the catalog representation
+ */
+static char
+extractModify(DefElem *defel)
+{
+ char *val = defGetString(defel);
+
+ if (strcmp(val, "read_only") == 0)
+ return AGGMODIFY_READ_ONLY;
+ if (strcmp(val, "shareable") == 0)
+ return AGGMODIFY_SHAREABLE;
+ if (strcmp(val, "read_write") == 0)
+ return AGGMODIFY_READ_WRITE;
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("parameter \"%s\" must be READ_ONLY, SHAREABLE, or READ_WRITE",
+ defel->defname)));
+ return 0; /* keep compiler quiet */
+}
diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c
new file mode 100644
index 0000000..5456b82
--- /dev/null
+++ b/src/backend/commands/alter.c
@@ -0,0 +1,1061 @@
+/*-------------------------------------------------------------------------
+ *
+ * alter.c
+ * Drivers for generic alter commands
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/alter.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/relation.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_conversion.h"
+#include "catalog/pg_event_trigger.h"
+#include "catalog/pg_foreign_data_wrapper.h"
+#include "catalog/pg_foreign_server.h"
+#include "catalog/pg_language.h"
+#include "catalog/pg_largeobject.h"
+#include "catalog/pg_largeobject_metadata.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_statistic_ext.h"
+#include "catalog/pg_subscription.h"
+#include "catalog/pg_ts_config.h"
+#include "catalog/pg_ts_dict.h"
+#include "catalog/pg_ts_parser.h"
+#include "catalog/pg_ts_template.h"
+#include "commands/alter.h"
+#include "commands/collationcmds.h"
+#include "commands/conversioncmds.h"
+#include "commands/dbcommands.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/extension.h"
+#include "commands/policy.h"
+#include "commands/proclang.h"
+#include "commands/publicationcmds.h"
+#include "commands/schemacmds.h"
+#include "commands/subscriptioncmds.h"
+#include "commands/tablecmds.h"
+#include "commands/tablespace.h"
+#include "commands/trigger.h"
+#include "commands/typecmds.h"
+#include "commands/user.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "rewrite/rewriteDefine.h"
+#include "tcop/utility.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+static Oid AlterObjectNamespace_internal(Relation rel, Oid objid, Oid nspOid);
+
+/*
+ * Raise an error to the effect that an object of the given name is already
+ * present in the given namespace.
+ */
+static void
+report_name_conflict(Oid classId, const char *name)
+{
+ char *msgfmt;
+
+ switch (classId)
+ {
+ case EventTriggerRelationId:
+ msgfmt = gettext_noop("event trigger \"%s\" already exists");
+ break;
+ case ForeignDataWrapperRelationId:
+ msgfmt = gettext_noop("foreign-data wrapper \"%s\" already exists");
+ break;
+ case ForeignServerRelationId:
+ msgfmt = gettext_noop("server \"%s\" already exists");
+ break;
+ case LanguageRelationId:
+ msgfmt = gettext_noop("language \"%s\" already exists");
+ break;
+ case PublicationRelationId:
+ msgfmt = gettext_noop("publication \"%s\" already exists");
+ break;
+ case SubscriptionRelationId:
+ msgfmt = gettext_noop("subscription \"%s\" already exists");
+ break;
+ default:
+ elog(ERROR, "unsupported object class %u", classId);
+ break;
+ }
+
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg(msgfmt, name)));
+}
+
+static void
+report_namespace_conflict(Oid classId, const char *name, Oid nspOid)
+{
+ char *msgfmt;
+
+ Assert(OidIsValid(nspOid));
+
+ switch (classId)
+ {
+ case ConversionRelationId:
+ Assert(OidIsValid(nspOid));
+ msgfmt = gettext_noop("conversion \"%s\" already exists in schema \"%s\"");
+ break;
+ case StatisticExtRelationId:
+ Assert(OidIsValid(nspOid));
+ msgfmt = gettext_noop("statistics object \"%s\" already exists in schema \"%s\"");
+ break;
+ case TSParserRelationId:
+ Assert(OidIsValid(nspOid));
+ msgfmt = gettext_noop("text search parser \"%s\" already exists in schema \"%s\"");
+ break;
+ case TSDictionaryRelationId:
+ Assert(OidIsValid(nspOid));
+ msgfmt = gettext_noop("text search dictionary \"%s\" already exists in schema \"%s\"");
+ break;
+ case TSTemplateRelationId:
+ Assert(OidIsValid(nspOid));
+ msgfmt = gettext_noop("text search template \"%s\" already exists in schema \"%s\"");
+ break;
+ case TSConfigRelationId:
+ Assert(OidIsValid(nspOid));
+ msgfmt = gettext_noop("text search configuration \"%s\" already exists in schema \"%s\"");
+ break;
+ default:
+ elog(ERROR, "unsupported object class %u", classId);
+ break;
+ }
+
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg(msgfmt, name, get_namespace_name(nspOid))));
+}
+
+/*
+ * AlterObjectRename_internal
+ *
+ * Generic function to rename the given object, for simple cases (won't
+ * work for tables, nor other cases where we need to do more than change
+ * the name column of a single catalog entry).
+ *
+ * rel: catalog relation containing object (RowExclusiveLock'd by caller)
+ * objectId: OID of object to be renamed
+ * new_name: CString representation of new name
+ */
+static void
+AlterObjectRename_internal(Relation rel, Oid objectId, const char *new_name)
+{
+ Oid classId = RelationGetRelid(rel);
+ int oidCacheId = get_object_catcache_oid(classId);
+ int nameCacheId = get_object_catcache_name(classId);
+ AttrNumber Anum_name = get_object_attnum_name(classId);
+ AttrNumber Anum_namespace = get_object_attnum_namespace(classId);
+ AttrNumber Anum_owner = get_object_attnum_owner(classId);
+ HeapTuple oldtup;
+ HeapTuple newtup;
+ Datum datum;
+ bool isnull;
+ Oid namespaceId;
+ Oid ownerId;
+ char *old_name;
+ AclResult aclresult;
+ Datum *values;
+ bool *nulls;
+ bool *replaces;
+ NameData nameattrdata;
+
+ oldtup = SearchSysCache1(oidCacheId, ObjectIdGetDatum(objectId));
+ if (!HeapTupleIsValid(oldtup))
+ elog(ERROR, "cache lookup failed for object %u of catalog \"%s\"",
+ objectId, RelationGetRelationName(rel));
+
+ datum = heap_getattr(oldtup, Anum_name,
+ RelationGetDescr(rel), &isnull);
+ Assert(!isnull);
+ old_name = NameStr(*(DatumGetName(datum)));
+
+ /* Get OID of namespace */
+ if (Anum_namespace > 0)
+ {
+ datum = heap_getattr(oldtup, Anum_namespace,
+ RelationGetDescr(rel), &isnull);
+ Assert(!isnull);
+ namespaceId = DatumGetObjectId(datum);
+ }
+ else
+ namespaceId = InvalidOid;
+
+ /* Permission checks ... superusers can always do it */
+ if (!superuser())
+ {
+ /* Fail if object does not have an explicit owner */
+ if (Anum_owner <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to rename %s",
+ getObjectDescriptionOids(classId, objectId))));
+
+ /* Otherwise, must be owner of the existing object */
+ datum = heap_getattr(oldtup, Anum_owner,
+ RelationGetDescr(rel), &isnull);
+ Assert(!isnull);
+ ownerId = DatumGetObjectId(datum);
+
+ if (!has_privs_of_role(GetUserId(), DatumGetObjectId(ownerId)))
+ aclcheck_error(ACLCHECK_NOT_OWNER, get_object_type(classId, objectId),
+ old_name);
+
+ /* User must have CREATE privilege on the namespace */
+ if (OidIsValid(namespaceId))
+ {
+ aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(namespaceId));
+ }
+ }
+
+ /*
+ * Check for duplicate name (more friendly than unique-index failure).
+ * Since this is just a friendliness check, we can just skip it in cases
+ * where there isn't suitable support.
+ */
+ if (classId == ProcedureRelationId)
+ {
+ Form_pg_proc proc = (Form_pg_proc) GETSTRUCT(oldtup);
+
+ IsThereFunctionInNamespace(new_name, proc->pronargs,
+ &proc->proargtypes, proc->pronamespace);
+ }
+ else if (classId == CollationRelationId)
+ {
+ Form_pg_collation coll = (Form_pg_collation) GETSTRUCT(oldtup);
+
+ IsThereCollationInNamespace(new_name, coll->collnamespace);
+ }
+ else if (classId == OperatorClassRelationId)
+ {
+ Form_pg_opclass opc = (Form_pg_opclass) GETSTRUCT(oldtup);
+
+ IsThereOpClassInNamespace(new_name, opc->opcmethod,
+ opc->opcnamespace);
+ }
+ else if (classId == OperatorFamilyRelationId)
+ {
+ Form_pg_opfamily opf = (Form_pg_opfamily) GETSTRUCT(oldtup);
+
+ IsThereOpFamilyInNamespace(new_name, opf->opfmethod,
+ opf->opfnamespace);
+ }
+ else if (classId == SubscriptionRelationId)
+ {
+ if (SearchSysCacheExists2(SUBSCRIPTIONNAME, MyDatabaseId,
+ CStringGetDatum(new_name)))
+ report_name_conflict(classId, new_name);
+
+ /* Also enforce regression testing naming rules, if enabled */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+ if (strncmp(new_name, "regress_", 8) != 0)
+ elog(WARNING, "subscriptions created by regression test cases should have names starting with \"regress_\"");
+#endif
+ }
+ else if (nameCacheId >= 0)
+ {
+ if (OidIsValid(namespaceId))
+ {
+ if (SearchSysCacheExists2(nameCacheId,
+ CStringGetDatum(new_name),
+ ObjectIdGetDatum(namespaceId)))
+ report_namespace_conflict(classId, new_name, namespaceId);
+ }
+ else
+ {
+ if (SearchSysCacheExists1(nameCacheId,
+ CStringGetDatum(new_name)))
+ report_name_conflict(classId, new_name);
+ }
+ }
+
+ /* Build modified tuple */
+ values = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(Datum));
+ nulls = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(bool));
+ replaces = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(bool));
+ namestrcpy(&nameattrdata, new_name);
+ values[Anum_name - 1] = NameGetDatum(&nameattrdata);
+ replaces[Anum_name - 1] = true;
+ newtup = heap_modify_tuple(oldtup, RelationGetDescr(rel),
+ values, nulls, replaces);
+
+ /* Perform actual update */
+ CatalogTupleUpdate(rel, &oldtup->t_self, newtup);
+
+ InvokeObjectPostAlterHook(classId, objectId, 0);
+
+ /* Release memory */
+ pfree(values);
+ pfree(nulls);
+ pfree(replaces);
+ heap_freetuple(newtup);
+
+ ReleaseSysCache(oldtup);
+}
+
+/*
+ * Executes an ALTER OBJECT / RENAME TO statement. Based on the object
+ * type, the function appropriate to that type is executed.
+ *
+ * Return value is the address of the renamed object.
+ */
+ObjectAddress
+ExecRenameStmt(RenameStmt *stmt)
+{
+ switch (stmt->renameType)
+ {
+ case OBJECT_TABCONSTRAINT:
+ case OBJECT_DOMCONSTRAINT:
+ return RenameConstraint(stmt);
+
+ case OBJECT_DATABASE:
+ return RenameDatabase(stmt->subname, stmt->newname);
+
+ case OBJECT_ROLE:
+ return RenameRole(stmt->subname, stmt->newname);
+
+ case OBJECT_SCHEMA:
+ return RenameSchema(stmt->subname, stmt->newname);
+
+ case OBJECT_TABLESPACE:
+ return RenameTableSpace(stmt->subname, stmt->newname);
+
+ case OBJECT_TABLE:
+ case OBJECT_SEQUENCE:
+ case OBJECT_VIEW:
+ case OBJECT_MATVIEW:
+ case OBJECT_INDEX:
+ case OBJECT_FOREIGN_TABLE:
+ return RenameRelation(stmt);
+
+ case OBJECT_COLUMN:
+ case OBJECT_ATTRIBUTE:
+ return renameatt(stmt);
+
+ case OBJECT_RULE:
+ return RenameRewriteRule(stmt->relation, stmt->subname,
+ stmt->newname);
+
+ case OBJECT_TRIGGER:
+ return renametrig(stmt);
+
+ case OBJECT_POLICY:
+ return rename_policy(stmt);
+
+ case OBJECT_DOMAIN:
+ case OBJECT_TYPE:
+ return RenameType(stmt);
+
+ case OBJECT_AGGREGATE:
+ case OBJECT_COLLATION:
+ case OBJECT_CONVERSION:
+ case OBJECT_EVENT_TRIGGER:
+ case OBJECT_FDW:
+ case OBJECT_FOREIGN_SERVER:
+ case OBJECT_FUNCTION:
+ case OBJECT_OPCLASS:
+ case OBJECT_OPFAMILY:
+ case OBJECT_LANGUAGE:
+ case OBJECT_PROCEDURE:
+ case OBJECT_ROUTINE:
+ case OBJECT_STATISTIC_EXT:
+ case OBJECT_TSCONFIGURATION:
+ case OBJECT_TSDICTIONARY:
+ case OBJECT_TSPARSER:
+ case OBJECT_TSTEMPLATE:
+ case OBJECT_PUBLICATION:
+ case OBJECT_SUBSCRIPTION:
+ {
+ ObjectAddress address;
+ Relation catalog;
+ Relation relation;
+
+ address = get_object_address(stmt->renameType,
+ stmt->object,
+ &relation,
+ AccessExclusiveLock, false);
+ Assert(relation == NULL);
+
+ catalog = table_open(address.classId, RowExclusiveLock);
+ AlterObjectRename_internal(catalog,
+ address.objectId,
+ stmt->newname);
+ table_close(catalog, RowExclusiveLock);
+
+ return address;
+ }
+
+ default:
+ elog(ERROR, "unrecognized rename stmt type: %d",
+ (int) stmt->renameType);
+ return InvalidObjectAddress; /* keep compiler happy */
+ }
+}
+
+/*
+ * Executes an ALTER OBJECT / [NO] DEPENDS ON EXTENSION statement.
+ *
+ * Return value is the address of the altered object. refAddress is an output
+ * argument which, if not null, receives the address of the object that the
+ * altered object now depends on.
+ */
+ObjectAddress
+ExecAlterObjectDependsStmt(AlterObjectDependsStmt *stmt, ObjectAddress *refAddress)
+{
+ ObjectAddress address;
+ ObjectAddress refAddr;
+ Relation rel;
+
+ address =
+ get_object_address_rv(stmt->objectType, stmt->relation, (List *) stmt->object,
+ &rel, AccessExclusiveLock, false);
+
+ /*
+ * Verify that the user is entitled to run the command.
+ *
+ * We don't check any privileges on the extension, because that's not
+ * needed. The object owner is stipulating, by running this command, that
+ * the extension owner can drop the object whenever they feel like it,
+ * which is not considered a problem.
+ */
+ check_object_ownership(GetUserId(),
+ stmt->objectType, address, stmt->object, rel);
+
+ /*
+ * If a relation was involved, it would have been opened and locked. We
+ * don't need the relation here, but we'll retain the lock until commit.
+ */
+ if (rel)
+ table_close(rel, NoLock);
+
+ refAddr = get_object_address(OBJECT_EXTENSION, (Node *) stmt->extname,
+ &rel, AccessExclusiveLock, false);
+ Assert(rel == NULL);
+ if (refAddress)
+ *refAddress = refAddr;
+
+ if (stmt->remove)
+ {
+ deleteDependencyRecordsForSpecific(address.classId, address.objectId,
+ DEPENDENCY_AUTO_EXTENSION,
+ refAddr.classId, refAddr.objectId);
+ }
+ else
+ {
+ List *currexts;
+
+ /* Avoid duplicates */
+ currexts = getAutoExtensionsOfObject(address.classId,
+ address.objectId);
+ if (!list_member_oid(currexts, refAddr.objectId))
+ recordDependencyOn(&address, &refAddr, DEPENDENCY_AUTO_EXTENSION);
+ }
+
+ return address;
+}
+
+/*
+ * Executes an ALTER OBJECT / SET SCHEMA statement. Based on the object
+ * type, the function appropriate to that type is executed.
+ *
+ * Return value is that of the altered object.
+ *
+ * oldSchemaAddr is an output argument which, if not NULL, is set to the object
+ * address of the original schema.
+ */
+ObjectAddress
+ExecAlterObjectSchemaStmt(AlterObjectSchemaStmt *stmt,
+ ObjectAddress *oldSchemaAddr)
+{
+ ObjectAddress address;
+ Oid oldNspOid;
+
+ switch (stmt->objectType)
+ {
+ case OBJECT_EXTENSION:
+ address = AlterExtensionNamespace(strVal(stmt->object), stmt->newschema,
+ oldSchemaAddr ? &oldNspOid : NULL);
+ break;
+
+ case OBJECT_FOREIGN_TABLE:
+ case OBJECT_SEQUENCE:
+ case OBJECT_TABLE:
+ case OBJECT_VIEW:
+ case OBJECT_MATVIEW:
+ address = AlterTableNamespace(stmt,
+ oldSchemaAddr ? &oldNspOid : NULL);
+ break;
+
+ case OBJECT_DOMAIN:
+ case OBJECT_TYPE:
+ address = AlterTypeNamespace(castNode(List, stmt->object), stmt->newschema,
+ stmt->objectType,
+ oldSchemaAddr ? &oldNspOid : NULL);
+ break;
+
+ /* generic code path */
+ case OBJECT_AGGREGATE:
+ case OBJECT_COLLATION:
+ case OBJECT_CONVERSION:
+ case OBJECT_FUNCTION:
+ case OBJECT_OPERATOR:
+ case OBJECT_OPCLASS:
+ case OBJECT_OPFAMILY:
+ case OBJECT_PROCEDURE:
+ case OBJECT_ROUTINE:
+ case OBJECT_STATISTIC_EXT:
+ case OBJECT_TSCONFIGURATION:
+ case OBJECT_TSDICTIONARY:
+ case OBJECT_TSPARSER:
+ case OBJECT_TSTEMPLATE:
+ {
+ Relation catalog;
+ Relation relation;
+ Oid classId;
+ Oid nspOid;
+
+ address = get_object_address(stmt->objectType,
+ stmt->object,
+ &relation,
+ AccessExclusiveLock,
+ false);
+ Assert(relation == NULL);
+ classId = address.classId;
+ catalog = table_open(classId, RowExclusiveLock);
+ nspOid = LookupCreationNamespace(stmt->newschema);
+
+ oldNspOid = AlterObjectNamespace_internal(catalog, address.objectId,
+ nspOid);
+ table_close(catalog, RowExclusiveLock);
+ }
+ break;
+
+ default:
+ elog(ERROR, "unrecognized AlterObjectSchemaStmt type: %d",
+ (int) stmt->objectType);
+ return InvalidObjectAddress; /* keep compiler happy */
+ }
+
+ if (oldSchemaAddr)
+ ObjectAddressSet(*oldSchemaAddr, NamespaceRelationId, oldNspOid);
+
+ return address;
+}
+
+/*
+ * Change an object's namespace given its classOid and object Oid.
+ *
+ * Objects that don't have a namespace should be ignored.
+ *
+ * This function is currently used only by ALTER EXTENSION SET SCHEMA,
+ * so it only needs to cover object types that can be members of an
+ * extension, and it doesn't have to deal with certain special cases
+ * such as not wanting to process array types --- those should never
+ * be direct members of an extension anyway. Nonetheless, we insist
+ * on listing all OCLASS types in the switch.
+ *
+ * Returns the OID of the object's previous namespace, or InvalidOid if
+ * object doesn't have a schema.
+ */
+Oid
+AlterObjectNamespace_oid(Oid classId, Oid objid, Oid nspOid,
+ ObjectAddresses *objsMoved)
+{
+ Oid oldNspOid = InvalidOid;
+ ObjectAddress dep;
+
+ dep.classId = classId;
+ dep.objectId = objid;
+ dep.objectSubId = 0;
+
+ switch (getObjectClass(&dep))
+ {
+ case OCLASS_CLASS:
+ {
+ Relation rel;
+
+ rel = relation_open(objid, AccessExclusiveLock);
+ oldNspOid = RelationGetNamespace(rel);
+
+ AlterTableNamespaceInternal(rel, oldNspOid, nspOid, objsMoved);
+
+ relation_close(rel, NoLock);
+ break;
+ }
+
+ case OCLASS_TYPE:
+ oldNspOid = AlterTypeNamespace_oid(objid, nspOid, objsMoved);
+ break;
+
+ case OCLASS_PROC:
+ case OCLASS_COLLATION:
+ case OCLASS_CONVERSION:
+ case OCLASS_OPERATOR:
+ case OCLASS_OPCLASS:
+ case OCLASS_OPFAMILY:
+ case OCLASS_STATISTIC_EXT:
+ case OCLASS_TSPARSER:
+ case OCLASS_TSDICT:
+ case OCLASS_TSTEMPLATE:
+ case OCLASS_TSCONFIG:
+ {
+ Relation catalog;
+
+ catalog = table_open(classId, RowExclusiveLock);
+
+ oldNspOid = AlterObjectNamespace_internal(catalog, objid,
+ nspOid);
+
+ table_close(catalog, RowExclusiveLock);
+ }
+ break;
+
+ case OCLASS_CAST:
+ case OCLASS_CONSTRAINT:
+ case OCLASS_DEFAULT:
+ case OCLASS_LANGUAGE:
+ case OCLASS_LARGEOBJECT:
+ case OCLASS_AM:
+ case OCLASS_AMOP:
+ case OCLASS_AMPROC:
+ case OCLASS_REWRITE:
+ case OCLASS_TRIGGER:
+ case OCLASS_SCHEMA:
+ case OCLASS_ROLE:
+ case OCLASS_DATABASE:
+ case OCLASS_TBLSPACE:
+ case OCLASS_FDW:
+ case OCLASS_FOREIGN_SERVER:
+ case OCLASS_USER_MAPPING:
+ case OCLASS_DEFACL:
+ case OCLASS_EXTENSION:
+ case OCLASS_EVENT_TRIGGER:
+ case OCLASS_PARAMETER_ACL:
+ case OCLASS_POLICY:
+ case OCLASS_PUBLICATION:
+ case OCLASS_PUBLICATION_NAMESPACE:
+ case OCLASS_PUBLICATION_REL:
+ case OCLASS_SUBSCRIPTION:
+ case OCLASS_TRANSFORM:
+ /* ignore object types that don't have schema-qualified names */
+ break;
+
+ /*
+ * There's intentionally no default: case here; we want the
+ * compiler to warn if a new OCLASS hasn't been handled above.
+ */
+ }
+
+ return oldNspOid;
+}
+
+/*
+ * Generic function to change the namespace of a given object, for simple
+ * cases (won't work for tables, nor other cases where we need to do more
+ * than change the namespace column of a single catalog entry).
+ *
+ * rel: catalog relation containing object (RowExclusiveLock'd by caller)
+ * objid: OID of object to change the namespace of
+ * nspOid: OID of new namespace
+ *
+ * Returns the OID of the object's previous namespace.
+ */
+static Oid
+AlterObjectNamespace_internal(Relation rel, Oid objid, Oid nspOid)
+{
+ Oid classId = RelationGetRelid(rel);
+ int oidCacheId = get_object_catcache_oid(classId);
+ int nameCacheId = get_object_catcache_name(classId);
+ AttrNumber Anum_name = get_object_attnum_name(classId);
+ AttrNumber Anum_namespace = get_object_attnum_namespace(classId);
+ AttrNumber Anum_owner = get_object_attnum_owner(classId);
+ Oid oldNspOid;
+ Datum name,
+ namespace;
+ bool isnull;
+ HeapTuple tup,
+ newtup;
+ Datum *values;
+ bool *nulls;
+ bool *replaces;
+
+ tup = SearchSysCacheCopy1(oidCacheId, ObjectIdGetDatum(objid));
+ if (!HeapTupleIsValid(tup)) /* should not happen */
+ elog(ERROR, "cache lookup failed for object %u of catalog \"%s\"",
+ objid, RelationGetRelationName(rel));
+
+ name = heap_getattr(tup, Anum_name, RelationGetDescr(rel), &isnull);
+ Assert(!isnull);
+ namespace = heap_getattr(tup, Anum_namespace, RelationGetDescr(rel),
+ &isnull);
+ Assert(!isnull);
+ oldNspOid = DatumGetObjectId(namespace);
+
+ /*
+ * If the object is already in the correct namespace, we don't need to do
+ * anything except fire the object access hook.
+ */
+ if (oldNspOid == nspOid)
+ {
+ InvokeObjectPostAlterHook(classId, objid, 0);
+ return oldNspOid;
+ }
+
+ /* Check basic namespace related issues */
+ CheckSetNamespace(oldNspOid, nspOid);
+
+ /* Permission checks ... superusers can always do it */
+ if (!superuser())
+ {
+ Datum owner;
+ Oid ownerId;
+ AclResult aclresult;
+
+ /* Fail if object does not have an explicit owner */
+ if (Anum_owner <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to set schema of %s",
+ getObjectDescriptionOids(classId, objid))));
+
+ /* Otherwise, must be owner of the existing object */
+ owner = heap_getattr(tup, Anum_owner, RelationGetDescr(rel), &isnull);
+ Assert(!isnull);
+ ownerId = DatumGetObjectId(owner);
+
+ if (!has_privs_of_role(GetUserId(), ownerId))
+ aclcheck_error(ACLCHECK_NOT_OWNER, get_object_type(classId, objid),
+ NameStr(*(DatumGetName(name))));
+
+ /* User must have CREATE privilege on new namespace */
+ aclresult = pg_namespace_aclcheck(nspOid, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(nspOid));
+ }
+
+ /*
+ * Check for duplicate name (more friendly than unique-index failure).
+ * Since this is just a friendliness check, we can just skip it in cases
+ * where there isn't suitable support.
+ */
+ if (classId == ProcedureRelationId)
+ {
+ Form_pg_proc proc = (Form_pg_proc) GETSTRUCT(tup);
+
+ IsThereFunctionInNamespace(NameStr(proc->proname), proc->pronargs,
+ &proc->proargtypes, nspOid);
+ }
+ else if (classId == CollationRelationId)
+ {
+ Form_pg_collation coll = (Form_pg_collation) GETSTRUCT(tup);
+
+ IsThereCollationInNamespace(NameStr(coll->collname), nspOid);
+ }
+ else if (classId == OperatorClassRelationId)
+ {
+ Form_pg_opclass opc = (Form_pg_opclass) GETSTRUCT(tup);
+
+ IsThereOpClassInNamespace(NameStr(opc->opcname),
+ opc->opcmethod, nspOid);
+ }
+ else if (classId == OperatorFamilyRelationId)
+ {
+ Form_pg_opfamily opf = (Form_pg_opfamily) GETSTRUCT(tup);
+
+ IsThereOpFamilyInNamespace(NameStr(opf->opfname),
+ opf->opfmethod, nspOid);
+ }
+ else if (nameCacheId >= 0 &&
+ SearchSysCacheExists2(nameCacheId, name,
+ ObjectIdGetDatum(nspOid)))
+ report_namespace_conflict(classId,
+ NameStr(*(DatumGetName(name))),
+ nspOid);
+
+ /* Build modified tuple */
+ values = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(Datum));
+ nulls = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(bool));
+ replaces = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(bool));
+ values[Anum_namespace - 1] = ObjectIdGetDatum(nspOid);
+ replaces[Anum_namespace - 1] = true;
+ newtup = heap_modify_tuple(tup, RelationGetDescr(rel),
+ values, nulls, replaces);
+
+ /* Perform actual update */
+ CatalogTupleUpdate(rel, &tup->t_self, newtup);
+
+ /* Release memory */
+ pfree(values);
+ pfree(nulls);
+ pfree(replaces);
+
+ /* update dependencies to point to the new schema */
+ changeDependencyFor(classId, objid,
+ NamespaceRelationId, oldNspOid, nspOid);
+
+ InvokeObjectPostAlterHook(classId, objid, 0);
+
+ return oldNspOid;
+}
+
+/*
+ * Executes an ALTER OBJECT / OWNER TO statement. Based on the object
+ * type, the function appropriate to that type is executed.
+ */
+ObjectAddress
+ExecAlterOwnerStmt(AlterOwnerStmt *stmt)
+{
+ Oid newowner = get_rolespec_oid(stmt->newowner, false);
+
+ switch (stmt->objectType)
+ {
+ case OBJECT_DATABASE:
+ return AlterDatabaseOwner(strVal(stmt->object), newowner);
+
+ case OBJECT_SCHEMA:
+ return AlterSchemaOwner(strVal(stmt->object), newowner);
+
+ case OBJECT_TYPE:
+ case OBJECT_DOMAIN: /* same as TYPE */
+ return AlterTypeOwner(castNode(List, stmt->object), newowner, stmt->objectType);
+ break;
+
+ case OBJECT_FDW:
+ return AlterForeignDataWrapperOwner(strVal(stmt->object),
+ newowner);
+
+ case OBJECT_FOREIGN_SERVER:
+ return AlterForeignServerOwner(strVal(stmt->object),
+ newowner);
+
+ case OBJECT_EVENT_TRIGGER:
+ return AlterEventTriggerOwner(strVal(stmt->object),
+ newowner);
+
+ case OBJECT_PUBLICATION:
+ return AlterPublicationOwner(strVal(stmt->object),
+ newowner);
+
+ case OBJECT_SUBSCRIPTION:
+ return AlterSubscriptionOwner(strVal(stmt->object),
+ newowner);
+
+ /* Generic cases */
+ case OBJECT_AGGREGATE:
+ case OBJECT_COLLATION:
+ case OBJECT_CONVERSION:
+ case OBJECT_FUNCTION:
+ case OBJECT_LANGUAGE:
+ case OBJECT_LARGEOBJECT:
+ case OBJECT_OPERATOR:
+ case OBJECT_OPCLASS:
+ case OBJECT_OPFAMILY:
+ case OBJECT_PROCEDURE:
+ case OBJECT_ROUTINE:
+ case OBJECT_STATISTIC_EXT:
+ case OBJECT_TABLESPACE:
+ case OBJECT_TSDICTIONARY:
+ case OBJECT_TSCONFIGURATION:
+ {
+ Relation catalog;
+ Relation relation;
+ Oid classId;
+ ObjectAddress address;
+
+ address = get_object_address(stmt->objectType,
+ stmt->object,
+ &relation,
+ AccessExclusiveLock,
+ false);
+ Assert(relation == NULL);
+ classId = address.classId;
+
+ /*
+ * XXX - get_object_address returns Oid of pg_largeobject
+ * catalog for OBJECT_LARGEOBJECT because of historical
+ * reasons. Fix up it here.
+ */
+ if (classId == LargeObjectRelationId)
+ classId = LargeObjectMetadataRelationId;
+
+ catalog = table_open(classId, RowExclusiveLock);
+
+ AlterObjectOwner_internal(catalog, address.objectId, newowner);
+ table_close(catalog, RowExclusiveLock);
+
+ return address;
+ }
+ break;
+
+ default:
+ elog(ERROR, "unrecognized AlterOwnerStmt type: %d",
+ (int) stmt->objectType);
+ return InvalidObjectAddress; /* keep compiler happy */
+ }
+}
+
+/*
+ * Generic function to change the ownership of a given object, for simple
+ * cases (won't work for tables, nor other cases where we need to do more than
+ * change the ownership column of a single catalog entry).
+ *
+ * rel: catalog relation containing object (RowExclusiveLock'd by caller)
+ * objectId: OID of object to change the ownership of
+ * new_ownerId: OID of new object owner
+ */
+void
+AlterObjectOwner_internal(Relation rel, Oid objectId, Oid new_ownerId)
+{
+ Oid classId = RelationGetRelid(rel);
+ AttrNumber Anum_oid = get_object_attnum_oid(classId);
+ AttrNumber Anum_owner = get_object_attnum_owner(classId);
+ AttrNumber Anum_namespace = get_object_attnum_namespace(classId);
+ AttrNumber Anum_acl = get_object_attnum_acl(classId);
+ AttrNumber Anum_name = get_object_attnum_name(classId);
+ HeapTuple oldtup;
+ Datum datum;
+ bool isnull;
+ Oid old_ownerId;
+ Oid namespaceId = InvalidOid;
+
+ oldtup = get_catalog_object_by_oid(rel, Anum_oid, objectId);
+ if (oldtup == NULL)
+ elog(ERROR, "cache lookup failed for object %u of catalog \"%s\"",
+ objectId, RelationGetRelationName(rel));
+
+ datum = heap_getattr(oldtup, Anum_owner,
+ RelationGetDescr(rel), &isnull);
+ Assert(!isnull);
+ old_ownerId = DatumGetObjectId(datum);
+
+ if (Anum_namespace != InvalidAttrNumber)
+ {
+ datum = heap_getattr(oldtup, Anum_namespace,
+ RelationGetDescr(rel), &isnull);
+ Assert(!isnull);
+ namespaceId = DatumGetObjectId(datum);
+ }
+
+ if (old_ownerId != new_ownerId)
+ {
+ AttrNumber nattrs;
+ HeapTuple newtup;
+ Datum *values;
+ bool *nulls;
+ bool *replaces;
+
+ /* Superusers can bypass permission checks */
+ if (!superuser())
+ {
+ /* must be owner */
+ if (!has_privs_of_role(GetUserId(), old_ownerId))
+ {
+ char *objname;
+ char namebuf[NAMEDATALEN];
+
+ if (Anum_name != InvalidAttrNumber)
+ {
+ datum = heap_getattr(oldtup, Anum_name,
+ RelationGetDescr(rel), &isnull);
+ Assert(!isnull);
+ objname = NameStr(*DatumGetName(datum));
+ }
+ else
+ {
+ snprintf(namebuf, sizeof(namebuf), "%u", objectId);
+ objname = namebuf;
+ }
+ aclcheck_error(ACLCHECK_NOT_OWNER, get_object_type(classId, objectId),
+ objname);
+ }
+ /* Must be able to become new owner */
+ check_is_member_of_role(GetUserId(), new_ownerId);
+
+ /* New owner must have CREATE privilege on namespace */
+ if (OidIsValid(namespaceId))
+ {
+ AclResult aclresult;
+
+ aclresult = pg_namespace_aclcheck(namespaceId, new_ownerId,
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(namespaceId));
+ }
+ }
+
+ /* Build a modified tuple */
+ nattrs = RelationGetNumberOfAttributes(rel);
+ values = palloc0(nattrs * sizeof(Datum));
+ nulls = palloc0(nattrs * sizeof(bool));
+ replaces = palloc0(nattrs * sizeof(bool));
+ values[Anum_owner - 1] = ObjectIdGetDatum(new_ownerId);
+ replaces[Anum_owner - 1] = true;
+
+ /*
+ * Determine the modified ACL for the new owner. This is only
+ * necessary when the ACL is non-null.
+ */
+ if (Anum_acl != InvalidAttrNumber)
+ {
+ datum = heap_getattr(oldtup,
+ Anum_acl, RelationGetDescr(rel), &isnull);
+ if (!isnull)
+ {
+ Acl *newAcl;
+
+ newAcl = aclnewowner(DatumGetAclP(datum),
+ old_ownerId, new_ownerId);
+ values[Anum_acl - 1] = PointerGetDatum(newAcl);
+ replaces[Anum_acl - 1] = true;
+ }
+ }
+
+ newtup = heap_modify_tuple(oldtup, RelationGetDescr(rel),
+ values, nulls, replaces);
+
+ /* Perform actual update */
+ CatalogTupleUpdate(rel, &newtup->t_self, newtup);
+
+ /* Update owner dependency reference */
+ if (classId == LargeObjectMetadataRelationId)
+ classId = LargeObjectRelationId;
+ changeDependencyOnOwner(classId, objectId, new_ownerId);
+
+ /* Release memory */
+ pfree(values);
+ pfree(nulls);
+ pfree(replaces);
+ }
+
+ InvokeObjectPostAlterHook(classId, objectId, 0);
+}
diff --git a/src/backend/commands/amcmds.c b/src/backend/commands/amcmds.c
new file mode 100644
index 0000000..914cfa4
--- /dev/null
+++ b/src/backend/commands/amcmds.c
@@ -0,0 +1,269 @@
+/*-------------------------------------------------------------------------
+ *
+ * amcmds.c
+ * Routines for SQL commands that manipulate access methods.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/amcmds.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_type.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+static Oid lookup_am_handler_func(List *handler_name, char amtype);
+static const char *get_am_type_string(char amtype);
+
+
+/*
+ * CreateAccessMethod
+ * Registers a new access method.
+ */
+ObjectAddress
+CreateAccessMethod(CreateAmStmt *stmt)
+{
+ Relation rel;
+ ObjectAddress myself;
+ ObjectAddress referenced;
+ Oid amoid;
+ Oid amhandler;
+ bool nulls[Natts_pg_am];
+ Datum values[Natts_pg_am];
+ HeapTuple tup;
+
+ rel = table_open(AccessMethodRelationId, RowExclusiveLock);
+
+ /* Must be superuser */
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to create access method \"%s\"",
+ stmt->amname),
+ errhint("Must be superuser to create an access method.")));
+
+ /* Check if name is used */
+ amoid = GetSysCacheOid1(AMNAME, Anum_pg_am_oid,
+ CStringGetDatum(stmt->amname));
+ if (OidIsValid(amoid))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("access method \"%s\" already exists",
+ stmt->amname)));
+ }
+
+ /*
+ * Get the handler function oid, verifying the AM type while at it.
+ */
+ amhandler = lookup_am_handler_func(stmt->handler_name, stmt->amtype);
+
+ /*
+ * Insert tuple into pg_am.
+ */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ amoid = GetNewOidWithIndex(rel, AmOidIndexId, Anum_pg_am_oid);
+ values[Anum_pg_am_oid - 1] = ObjectIdGetDatum(amoid);
+ values[Anum_pg_am_amname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(stmt->amname));
+ values[Anum_pg_am_amhandler - 1] = ObjectIdGetDatum(amhandler);
+ values[Anum_pg_am_amtype - 1] = CharGetDatum(stmt->amtype);
+
+ tup = heap_form_tuple(RelationGetDescr(rel), values, nulls);
+
+ CatalogTupleInsert(rel, tup);
+ heap_freetuple(tup);
+
+ myself.classId = AccessMethodRelationId;
+ myself.objectId = amoid;
+ myself.objectSubId = 0;
+
+ /* Record dependency on handler function */
+ referenced.classId = ProcedureRelationId;
+ referenced.objectId = amhandler;
+ referenced.objectSubId = 0;
+
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+ recordDependencyOnCurrentExtension(&myself, false);
+
+ InvokeObjectPostCreateHook(AccessMethodRelationId, amoid, 0);
+
+ table_close(rel, RowExclusiveLock);
+
+ return myself;
+}
+
+/*
+ * get_am_type_oid
+ * Worker for various get_am_*_oid variants
+ *
+ * If missing_ok is false, throw an error if access method not found. If
+ * true, just return InvalidOid.
+ *
+ * If amtype is not '\0', an error is raised if the AM found is not of the
+ * given type.
+ */
+static Oid
+get_am_type_oid(const char *amname, char amtype, bool missing_ok)
+{
+ HeapTuple tup;
+ Oid oid = InvalidOid;
+
+ tup = SearchSysCache1(AMNAME, CStringGetDatum(amname));
+ if (HeapTupleIsValid(tup))
+ {
+ Form_pg_am amform = (Form_pg_am) GETSTRUCT(tup);
+
+ if (amtype != '\0' &&
+ amform->amtype != amtype)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("access method \"%s\" is not of type %s",
+ NameStr(amform->amname),
+ get_am_type_string(amtype))));
+
+ oid = amform->oid;
+ ReleaseSysCache(tup);
+ }
+
+ if (!OidIsValid(oid) && !missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("access method \"%s\" does not exist", amname)));
+ return oid;
+}
+
+/*
+ * get_index_am_oid - given an access method name, look up its OID
+ * and verify it corresponds to an index AM.
+ */
+Oid
+get_index_am_oid(const char *amname, bool missing_ok)
+{
+ return get_am_type_oid(amname, AMTYPE_INDEX, missing_ok);
+}
+
+/*
+ * get_table_am_oid - given an access method name, look up its OID
+ * and verify it corresponds to an table AM.
+ */
+Oid
+get_table_am_oid(const char *amname, bool missing_ok)
+{
+ return get_am_type_oid(amname, AMTYPE_TABLE, missing_ok);
+}
+
+/*
+ * get_am_oid - given an access method name, look up its OID.
+ * The type is not checked.
+ */
+Oid
+get_am_oid(const char *amname, bool missing_ok)
+{
+ return get_am_type_oid(amname, '\0', missing_ok);
+}
+
+/*
+ * get_am_name - given an access method OID, look up its name.
+ */
+char *
+get_am_name(Oid amOid)
+{
+ HeapTuple tup;
+ char *result = NULL;
+
+ tup = SearchSysCache1(AMOID, ObjectIdGetDatum(amOid));
+ if (HeapTupleIsValid(tup))
+ {
+ Form_pg_am amform = (Form_pg_am) GETSTRUCT(tup);
+
+ result = pstrdup(NameStr(amform->amname));
+ ReleaseSysCache(tup);
+ }
+ return result;
+}
+
+/*
+ * Convert single-character access method type into string for error reporting.
+ */
+static const char *
+get_am_type_string(char amtype)
+{
+ switch (amtype)
+ {
+ case AMTYPE_INDEX:
+ return "INDEX";
+ case AMTYPE_TABLE:
+ return "TABLE";
+ default:
+ /* shouldn't happen */
+ elog(ERROR, "invalid access method type '%c'", amtype);
+ return NULL; /* keep compiler quiet */
+ }
+}
+
+/*
+ * Convert a handler function name to an Oid. If the return type of the
+ * function doesn't match the given AM type, an error is raised.
+ *
+ * This function either return valid function Oid or throw an error.
+ */
+static Oid
+lookup_am_handler_func(List *handler_name, char amtype)
+{
+ Oid handlerOid;
+ Oid funcargtypes[1] = {INTERNALOID};
+ Oid expectedType = InvalidOid;
+
+ if (handler_name == NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("handler function is not specified")));
+
+ /* handlers have one argument of type internal */
+ handlerOid = LookupFuncName(handler_name, 1, funcargtypes, false);
+
+ /* check that handler has the correct return type */
+ switch (amtype)
+ {
+ case AMTYPE_INDEX:
+ expectedType = INDEX_AM_HANDLEROID;
+ break;
+ case AMTYPE_TABLE:
+ expectedType = TABLE_AM_HANDLEROID;
+ break;
+ default:
+ elog(ERROR, "unrecognized access method type \"%c\"", amtype);
+ }
+
+ if (get_func_rettype(handlerOid) != expectedType)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("function %s must return type %s",
+ get_func_name(handlerOid),
+ format_type_extended(expectedType, -1, 0))));
+
+ return handlerOid;
+}
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
new file mode 100644
index 0000000..01efdd5
--- /dev/null
+++ b/src/backend/commands/analyze.c
@@ -0,0 +1,3076 @@
+/*-------------------------------------------------------------------------
+ *
+ * analyze.c
+ * the Postgres statistics generator
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/analyze.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/detoast.h"
+#include "access/genam.h"
+#include "access/multixact.h"
+#include "access/relation.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "access/tableam.h"
+#include "access/transam.h"
+#include "access/tupconvert.h"
+#include "access/visibilitymap.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/index.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_statistic_ext.h"
+#include "commands/dbcommands.h"
+#include "commands/progress.h"
+#include "commands/tablecmds.h"
+#include "commands/vacuum.h"
+#include "common/pg_prng.h"
+#include "executor/executor.h"
+#include "foreign/fdwapi.h"
+#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/parse_oper.h"
+#include "parser/parse_relation.h"
+#include "pgstat.h"
+#include "postmaster/autovacuum.h"
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "utils/acl.h"
+#include "utils/attoptcache.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/pg_rusage.h"
+#include "utils/sampling.h"
+#include "utils/sortsupport.h"
+#include "utils/spccache.h"
+#include "utils/syscache.h"
+#include "utils/timestamp.h"
+
+
+/* Per-index data for ANALYZE */
+typedef struct AnlIndexData
+{
+ IndexInfo *indexInfo; /* BuildIndexInfo result */
+ double tupleFract; /* fraction of rows for partial index */
+ VacAttrStats **vacattrstats; /* index attrs to analyze */
+ int attr_cnt;
+} AnlIndexData;
+
+
+/* Default statistics target (GUC parameter) */
+int default_statistics_target = 100;
+
+/* A few variables that don't seem worth passing around as parameters */
+static MemoryContext anl_context = NULL;
+static BufferAccessStrategy vac_strategy;
+
+
+static void do_analyze_rel(Relation onerel,
+ VacuumParams *params, List *va_cols,
+ AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
+ bool inh, bool in_outer_xact, int elevel);
+static void compute_index_stats(Relation onerel, double totalrows,
+ AnlIndexData *indexdata, int nindexes,
+ HeapTuple *rows, int numrows,
+ MemoryContext col_context);
+static VacAttrStats *examine_attribute(Relation onerel, int attnum,
+ Node *index_expr);
+static int acquire_sample_rows(Relation onerel, int elevel,
+ HeapTuple *rows, int targrows,
+ double *totalrows, double *totaldeadrows);
+static int compare_rows(const void *a, const void *b, void *arg);
+static int acquire_inherited_sample_rows(Relation onerel, int elevel,
+ HeapTuple *rows, int targrows,
+ double *totalrows, double *totaldeadrows);
+static void update_attstats(Oid relid, bool inh,
+ int natts, VacAttrStats **vacattrstats);
+static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
+static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
+
+
+/*
+ * analyze_rel() -- analyze one relation
+ *
+ * relid identifies the relation to analyze. If relation is supplied, use
+ * the name therein for reporting any failure to open/lock the rel; do not
+ * use it once we've successfully opened the rel, since it might be stale.
+ */
+void
+analyze_rel(Oid relid, RangeVar *relation,
+ VacuumParams *params, List *va_cols, bool in_outer_xact,
+ BufferAccessStrategy bstrategy)
+{
+ Relation onerel;
+ int elevel;
+ AcquireSampleRowsFunc acquirefunc = NULL;
+ BlockNumber relpages = 0;
+
+ /* Select logging level */
+ if (params->options & VACOPT_VERBOSE)
+ elevel = INFO;
+ else
+ elevel = DEBUG2;
+
+ /* Set up static variables */
+ vac_strategy = bstrategy;
+
+ /*
+ * Check for user-requested abort.
+ */
+ CHECK_FOR_INTERRUPTS();
+
+ /*
+ * Open the relation, getting ShareUpdateExclusiveLock to ensure that two
+ * ANALYZEs don't run on it concurrently. (This also locks out a
+ * concurrent VACUUM, which doesn't matter much at the moment but might
+ * matter if we ever try to accumulate stats on dead tuples.) If the rel
+ * has been dropped since we last saw it, we don't need to process it.
+ *
+ * Make sure to generate only logs for ANALYZE in this case.
+ */
+ onerel = vacuum_open_relation(relid, relation, params->options & ~(VACOPT_VACUUM),
+ params->log_min_duration >= 0,
+ ShareUpdateExclusiveLock);
+
+ /* leave if relation could not be opened or locked */
+ if (!onerel)
+ return;
+
+ /*
+ * Check if relation needs to be skipped based on ownership. This check
+ * happens also when building the relation list to analyze for a manual
+ * operation, and needs to be done additionally here as ANALYZE could
+ * happen across multiple transactions where relation ownership could have
+ * changed in-between. Make sure to generate only logs for ANALYZE in
+ * this case.
+ */
+ if (!vacuum_is_relation_owner(RelationGetRelid(onerel),
+ onerel->rd_rel,
+ params->options & VACOPT_ANALYZE))
+ {
+ relation_close(onerel, ShareUpdateExclusiveLock);
+ return;
+ }
+
+ /*
+ * Silently ignore tables that are temp tables of other backends ---
+ * trying to analyze these is rather pointless, since their contents are
+ * probably not up-to-date on disk. (We don't throw a warning here; it
+ * would just lead to chatter during a database-wide ANALYZE.)
+ */
+ if (RELATION_IS_OTHER_TEMP(onerel))
+ {
+ relation_close(onerel, ShareUpdateExclusiveLock);
+ return;
+ }
+
+ /*
+ * We can ANALYZE any table except pg_statistic. See update_attstats
+ */
+ if (RelationGetRelid(onerel) == StatisticRelationId)
+ {
+ relation_close(onerel, ShareUpdateExclusiveLock);
+ return;
+ }
+
+ /*
+ * Check that it's of an analyzable relkind, and set up appropriately.
+ */
+ if (onerel->rd_rel->relkind == RELKIND_RELATION ||
+ onerel->rd_rel->relkind == RELKIND_MATVIEW)
+ {
+ /* Regular table, so we'll use the regular row acquisition function */
+ acquirefunc = acquire_sample_rows;
+ /* Also get regular table's size */
+ relpages = RelationGetNumberOfBlocks(onerel);
+ }
+ else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ {
+ /*
+ * For a foreign table, call the FDW's hook function to see whether it
+ * supports analysis.
+ */
+ FdwRoutine *fdwroutine;
+ bool ok = false;
+
+ fdwroutine = GetFdwRoutineForRelation(onerel, false);
+
+ if (fdwroutine->AnalyzeForeignTable != NULL)
+ ok = fdwroutine->AnalyzeForeignTable(onerel,
+ &acquirefunc,
+ &relpages);
+
+ if (!ok)
+ {
+ ereport(WARNING,
+ (errmsg("skipping \"%s\" --- cannot analyze this foreign table",
+ RelationGetRelationName(onerel))));
+ relation_close(onerel, ShareUpdateExclusiveLock);
+ return;
+ }
+ }
+ else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ /*
+ * For partitioned tables, we want to do the recursive ANALYZE below.
+ */
+ }
+ else
+ {
+ /* No need for a WARNING if we already complained during VACUUM */
+ if (!(params->options & VACOPT_VACUUM))
+ ereport(WARNING,
+ (errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables",
+ RelationGetRelationName(onerel))));
+ relation_close(onerel, ShareUpdateExclusiveLock);
+ return;
+ }
+
+ /*
+ * OK, let's do it. First, initialize progress reporting.
+ */
+ pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE,
+ RelationGetRelid(onerel));
+
+ /*
+ * Do the normal non-recursive ANALYZE. We can skip this for partitioned
+ * tables, which don't contain any rows.
+ */
+ if (onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ do_analyze_rel(onerel, params, va_cols, acquirefunc,
+ relpages, false, in_outer_xact, elevel);
+
+ /*
+ * If there are child tables, do recursive ANALYZE.
+ */
+ if (onerel->rd_rel->relhassubclass)
+ do_analyze_rel(onerel, params, va_cols, acquirefunc, relpages,
+ true, in_outer_xact, elevel);
+
+ /*
+ * Close source relation now, but keep lock so that no one deletes it
+ * before we commit. (If someone did, they'd fail to clean up the entries
+ * we made in pg_statistic. Also, releasing the lock before commit would
+ * expose us to concurrent-update failures in update_attstats.)
+ */
+ relation_close(onerel, NoLock);
+
+ pgstat_progress_end_command();
+}
+
+/*
+ * do_analyze_rel() -- analyze one relation, recursively or not
+ *
+ * Note that "acquirefunc" is only relevant for the non-inherited case.
+ * For the inherited case, acquire_inherited_sample_rows() determines the
+ * appropriate acquirefunc for each child table.
+ */
+static void
+do_analyze_rel(Relation onerel, VacuumParams *params,
+ List *va_cols, AcquireSampleRowsFunc acquirefunc,
+ BlockNumber relpages, bool inh, bool in_outer_xact,
+ int elevel)
+{
+ int attr_cnt,
+ tcnt,
+ i,
+ ind;
+ Relation *Irel;
+ int nindexes;
+ bool hasindex;
+ VacAttrStats **vacattrstats;
+ AnlIndexData *indexdata;
+ int targrows,
+ numrows,
+ minrows;
+ double totalrows,
+ totaldeadrows;
+ HeapTuple *rows;
+ PGRUsage ru0;
+ TimestampTz starttime = 0;
+ MemoryContext caller_context;
+ Oid save_userid;
+ int save_sec_context;
+ int save_nestlevel;
+ int64 AnalyzePageHit = VacuumPageHit;
+ int64 AnalyzePageMiss = VacuumPageMiss;
+ int64 AnalyzePageDirty = VacuumPageDirty;
+ PgStat_Counter startreadtime = 0;
+ PgStat_Counter startwritetime = 0;
+
+ if (inh)
+ ereport(elevel,
+ (errmsg("analyzing \"%s.%s\" inheritance tree",
+ get_namespace_name(RelationGetNamespace(onerel)),
+ RelationGetRelationName(onerel))));
+ else
+ ereport(elevel,
+ (errmsg("analyzing \"%s.%s\"",
+ get_namespace_name(RelationGetNamespace(onerel)),
+ RelationGetRelationName(onerel))));
+
+ /*
+ * Set up a working context so that we can easily free whatever junk gets
+ * created.
+ */
+ anl_context = AllocSetContextCreate(CurrentMemoryContext,
+ "Analyze",
+ ALLOCSET_DEFAULT_SIZES);
+ caller_context = MemoryContextSwitchTo(anl_context);
+
+ /*
+ * Switch to the table owner's userid, so that any index functions are run
+ * as that user. Also lock down security-restricted operations and
+ * arrange to make GUC variable changes local to this command.
+ */
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+ SetUserIdAndSecContext(onerel->rd_rel->relowner,
+ save_sec_context | SECURITY_RESTRICTED_OPERATION);
+ save_nestlevel = NewGUCNestLevel();
+
+ /* measure elapsed time iff autovacuum logging requires it */
+ if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
+ {
+ if (track_io_timing)
+ {
+ startreadtime = pgStatBlockReadTime;
+ startwritetime = pgStatBlockWriteTime;
+ }
+
+ pg_rusage_init(&ru0);
+ if (params->log_min_duration >= 0)
+ starttime = GetCurrentTimestamp();
+ }
+
+ /*
+ * Determine which columns to analyze
+ *
+ * Note that system attributes are never analyzed, so we just reject them
+ * at the lookup stage. We also reject duplicate column mentions. (We
+ * could alternatively ignore duplicates, but analyzing a column twice
+ * won't work; we'd end up making a conflicting update in pg_statistic.)
+ */
+ if (va_cols != NIL)
+ {
+ Bitmapset *unique_cols = NULL;
+ ListCell *le;
+
+ vacattrstats = (VacAttrStats **) palloc(list_length(va_cols) *
+ sizeof(VacAttrStats *));
+ tcnt = 0;
+ foreach(le, va_cols)
+ {
+ char *col = strVal(lfirst(le));
+
+ i = attnameAttNum(onerel, col, false);
+ if (i == InvalidAttrNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ col, RelationGetRelationName(onerel))));
+ if (bms_is_member(i, unique_cols))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" appears more than once",
+ col, RelationGetRelationName(onerel))));
+ unique_cols = bms_add_member(unique_cols, i);
+
+ vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
+ if (vacattrstats[tcnt] != NULL)
+ tcnt++;
+ }
+ attr_cnt = tcnt;
+ }
+ else
+ {
+ attr_cnt = onerel->rd_att->natts;
+ vacattrstats = (VacAttrStats **)
+ palloc(attr_cnt * sizeof(VacAttrStats *));
+ tcnt = 0;
+ for (i = 1; i <= attr_cnt; i++)
+ {
+ vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
+ if (vacattrstats[tcnt] != NULL)
+ tcnt++;
+ }
+ attr_cnt = tcnt;
+ }
+
+ /*
+ * Open all indexes of the relation, and see if there are any analyzable
+ * columns in the indexes. We do not analyze index columns if there was
+ * an explicit column list in the ANALYZE command, however.
+ *
+ * If we are doing a recursive scan, we don't want to touch the parent's
+ * indexes at all. If we're processing a partitioned table, we need to
+ * know if there are any indexes, but we don't want to process them.
+ */
+ if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ List *idxs = RelationGetIndexList(onerel);
+
+ Irel = NULL;
+ nindexes = 0;
+ hasindex = idxs != NIL;
+ list_free(idxs);
+ }
+ else if (!inh)
+ {
+ vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);
+ hasindex = nindexes > 0;
+ }
+ else
+ {
+ Irel = NULL;
+ nindexes = 0;
+ hasindex = false;
+ }
+ indexdata = NULL;
+ if (nindexes > 0)
+ {
+ indexdata = (AnlIndexData *) palloc0(nindexes * sizeof(AnlIndexData));
+ for (ind = 0; ind < nindexes; ind++)
+ {
+ AnlIndexData *thisdata = &indexdata[ind];
+ IndexInfo *indexInfo;
+
+ thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]);
+ thisdata->tupleFract = 1.0; /* fix later if partial */
+ if (indexInfo->ii_Expressions != NIL && va_cols == NIL)
+ {
+ ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
+
+ thisdata->vacattrstats = (VacAttrStats **)
+ palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *));
+ tcnt = 0;
+ for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
+ {
+ int keycol = indexInfo->ii_IndexAttrNumbers[i];
+
+ if (keycol == 0)
+ {
+ /* Found an index expression */
+ Node *indexkey;
+
+ if (indexpr_item == NULL) /* shouldn't happen */
+ elog(ERROR, "too few entries in indexprs list");
+ indexkey = (Node *) lfirst(indexpr_item);
+ indexpr_item = lnext(indexInfo->ii_Expressions,
+ indexpr_item);
+ thisdata->vacattrstats[tcnt] =
+ examine_attribute(Irel[ind], i + 1, indexkey);
+ if (thisdata->vacattrstats[tcnt] != NULL)
+ tcnt++;
+ }
+ }
+ thisdata->attr_cnt = tcnt;
+ }
+ }
+ }
+
+ /*
+ * Determine how many rows we need to sample, using the worst case from
+ * all analyzable columns. We use a lower bound of 100 rows to avoid
+ * possible overflow in Vitter's algorithm. (Note: that will also be the
+ * target in the corner case where there are no analyzable columns.)
+ */
+ targrows = 100;
+ for (i = 0; i < attr_cnt; i++)
+ {
+ if (targrows < vacattrstats[i]->minrows)
+ targrows = vacattrstats[i]->minrows;
+ }
+ for (ind = 0; ind < nindexes; ind++)
+ {
+ AnlIndexData *thisdata = &indexdata[ind];
+
+ for (i = 0; i < thisdata->attr_cnt; i++)
+ {
+ if (targrows < thisdata->vacattrstats[i]->minrows)
+ targrows = thisdata->vacattrstats[i]->minrows;
+ }
+ }
+
+ /*
+ * Look at extended statistics objects too, as those may define custom
+ * statistics target. So we may need to sample more rows and then build
+ * the statistics with enough detail.
+ */
+ minrows = ComputeExtStatisticsRows(onerel, attr_cnt, vacattrstats);
+
+ if (targrows < minrows)
+ targrows = minrows;
+
+ /*
+ * Acquire the sample rows
+ */
+ rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
+ pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
+ inh ? PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS_INH :
+ PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS);
+ if (inh)
+ numrows = acquire_inherited_sample_rows(onerel, elevel,
+ rows, targrows,
+ &totalrows, &totaldeadrows);
+ else
+ numrows = (*acquirefunc) (onerel, elevel,
+ rows, targrows,
+ &totalrows, &totaldeadrows);
+
+ /*
+ * Compute the statistics. Temporary results during the calculations for
+ * each column are stored in a child context. The calc routines are
+ * responsible to make sure that whatever they store into the VacAttrStats
+ * structure is allocated in anl_context.
+ */
+ if (numrows > 0)
+ {
+ MemoryContext col_context,
+ old_context;
+
+ pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
+ PROGRESS_ANALYZE_PHASE_COMPUTE_STATS);
+
+ col_context = AllocSetContextCreate(anl_context,
+ "Analyze Column",
+ ALLOCSET_DEFAULT_SIZES);
+ old_context = MemoryContextSwitchTo(col_context);
+
+ for (i = 0; i < attr_cnt; i++)
+ {
+ VacAttrStats *stats = vacattrstats[i];
+ AttributeOpts *aopt;
+
+ stats->rows = rows;
+ stats->tupDesc = onerel->rd_att;
+ stats->compute_stats(stats,
+ std_fetch_func,
+ numrows,
+ totalrows);
+
+ /*
+ * If the appropriate flavor of the n_distinct option is
+ * specified, override with the corresponding value.
+ */
+ aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum);
+ if (aopt != NULL)
+ {
+ float8 n_distinct;
+
+ n_distinct = inh ? aopt->n_distinct_inherited : aopt->n_distinct;
+ if (n_distinct != 0.0)
+ stats->stadistinct = n_distinct;
+ }
+
+ MemoryContextResetAndDeleteChildren(col_context);
+ }
+
+ if (nindexes > 0)
+ compute_index_stats(onerel, totalrows,
+ indexdata, nindexes,
+ rows, numrows,
+ col_context);
+
+ MemoryContextSwitchTo(old_context);
+ MemoryContextDelete(col_context);
+
+ /*
+ * Emit the completed stats rows into pg_statistic, replacing any
+ * previous statistics for the target columns. (If there are stats in
+ * pg_statistic for columns we didn't process, we leave them alone.)
+ */
+ update_attstats(RelationGetRelid(onerel), inh,
+ attr_cnt, vacattrstats);
+
+ for (ind = 0; ind < nindexes; ind++)
+ {
+ AnlIndexData *thisdata = &indexdata[ind];
+
+ update_attstats(RelationGetRelid(Irel[ind]), false,
+ thisdata->attr_cnt, thisdata->vacattrstats);
+ }
+
+ /* Build extended statistics (if there are any). */
+ BuildRelationExtStatistics(onerel, inh, totalrows, numrows, rows,
+ attr_cnt, vacattrstats);
+ }
+
+ pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
+ PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE);
+
+ /*
+ * Update pages/tuples stats in pg_class ... but not if we're doing
+ * inherited stats.
+ *
+ * We assume that VACUUM hasn't set pg_class.reltuples already, even
+ * during a VACUUM ANALYZE. Although VACUUM often updates pg_class,
+ * exceptions exist. A "VACUUM (ANALYZE, INDEX_CLEANUP OFF)" command will
+ * never update pg_class entries for index relations. It's also possible
+ * that an individual index's pg_class entry won't be updated during
+ * VACUUM if the index AM returns NULL from its amvacuumcleanup() routine.
+ */
+ if (!inh)
+ {
+ BlockNumber relallvisible;
+
+ visibilitymap_count(onerel, &relallvisible, NULL);
+
+ /* Update pg_class for table relation */
+ vac_update_relstats(onerel,
+ relpages,
+ totalrows,
+ relallvisible,
+ hasindex,
+ InvalidTransactionId,
+ InvalidMultiXactId,
+ NULL, NULL,
+ in_outer_xact);
+
+ /* Same for indexes */
+ for (ind = 0; ind < nindexes; ind++)
+ {
+ AnlIndexData *thisdata = &indexdata[ind];
+ double totalindexrows;
+
+ totalindexrows = ceil(thisdata->tupleFract * totalrows);
+ vac_update_relstats(Irel[ind],
+ RelationGetNumberOfBlocks(Irel[ind]),
+ totalindexrows,
+ 0,
+ false,
+ InvalidTransactionId,
+ InvalidMultiXactId,
+ NULL, NULL,
+ in_outer_xact);
+ }
+ }
+ else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ /*
+ * Partitioned tables don't have storage, so we don't set any fields
+ * in their pg_class entries except for reltuples and relhasindex.
+ */
+ vac_update_relstats(onerel, -1, totalrows,
+ 0, hasindex, InvalidTransactionId,
+ InvalidMultiXactId,
+ NULL, NULL,
+ in_outer_xact);
+ }
+
+ /*
+ * Now report ANALYZE to the cumulative stats system. For regular tables,
+ * we do it only if not doing inherited stats. For partitioned tables, we
+ * only do it for inherited stats. (We're never called for not-inherited
+ * stats on partitioned tables anyway.)
+ *
+ * Reset the changes_since_analyze counter only if we analyzed all
+ * columns; otherwise, there is still work for auto-analyze to do.
+ */
+ if (!inh)
+ pgstat_report_analyze(onerel, totalrows, totaldeadrows,
+ (va_cols == NIL));
+ else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ pgstat_report_analyze(onerel, 0, 0, (va_cols == NIL));
+
+ /*
+ * If this isn't part of VACUUM ANALYZE, let index AMs do cleanup.
+ *
+ * Note that most index AMs perform a no-op as a matter of policy for
+ * amvacuumcleanup() when called in ANALYZE-only mode. The only exception
+ * among core index AMs is GIN/ginvacuumcleanup().
+ */
+ if (!(params->options & VACOPT_VACUUM))
+ {
+ for (ind = 0; ind < nindexes; ind++)
+ {
+ IndexBulkDeleteResult *stats;
+ IndexVacuumInfo ivinfo;
+
+ ivinfo.index = Irel[ind];
+ ivinfo.analyze_only = true;
+ ivinfo.estimated_count = true;
+ ivinfo.message_level = elevel;
+ ivinfo.num_heap_tuples = onerel->rd_rel->reltuples;
+ ivinfo.strategy = vac_strategy;
+
+ stats = index_vacuum_cleanup(&ivinfo, NULL);
+
+ if (stats)
+ pfree(stats);
+ }
+ }
+
+ /* Done with indexes */
+ vac_close_indexes(nindexes, Irel, NoLock);
+
+ /* Log the action if appropriate */
+ if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
+ {
+ TimestampTz endtime = GetCurrentTimestamp();
+
+ if (params->log_min_duration == 0 ||
+ TimestampDifferenceExceeds(starttime, endtime,
+ params->log_min_duration))
+ {
+ long delay_in_ms;
+ double read_rate = 0;
+ double write_rate = 0;
+ StringInfoData buf;
+
+ /*
+ * Calculate the difference in the Page Hit/Miss/Dirty that
+ * happened as part of the analyze by subtracting out the
+ * pre-analyze values which we saved above.
+ */
+ AnalyzePageHit = VacuumPageHit - AnalyzePageHit;
+ AnalyzePageMiss = VacuumPageMiss - AnalyzePageMiss;
+ AnalyzePageDirty = VacuumPageDirty - AnalyzePageDirty;
+
+ /*
+ * We do not expect an analyze to take > 25 days and it simplifies
+ * things a bit to use TimestampDifferenceMilliseconds.
+ */
+ delay_in_ms = TimestampDifferenceMilliseconds(starttime, endtime);
+
+ /*
+ * Note that we are reporting these read/write rates in the same
+ * manner as VACUUM does, which means that while the 'average read
+ * rate' here actually corresponds to page misses and resulting
+ * reads which are also picked up by track_io_timing, if enabled,
+ * the 'average write rate' is actually talking about the rate of
+ * pages being dirtied, not being written out, so it's typical to
+ * have a non-zero 'avg write rate' while I/O timings only reports
+ * reads.
+ *
+ * It's not clear that an ANALYZE will ever result in
+ * FlushBuffer() being called, but we track and support reporting
+ * on I/O write time in case that changes as it's practically free
+ * to do so anyway.
+ */
+
+ if (delay_in_ms > 0)
+ {
+ read_rate = (double) BLCKSZ * AnalyzePageMiss / (1024 * 1024) /
+ (delay_in_ms / 1000.0);
+ write_rate = (double) BLCKSZ * AnalyzePageDirty / (1024 * 1024) /
+ (delay_in_ms / 1000.0);
+ }
+
+ /*
+ * We split this up so we don't emit empty I/O timing values when
+ * track_io_timing isn't enabled.
+ */
+
+ initStringInfo(&buf);
+ appendStringInfo(&buf, _("automatic analyze of table \"%s.%s.%s\"\n"),
+ get_database_name(MyDatabaseId),
+ get_namespace_name(RelationGetNamespace(onerel)),
+ RelationGetRelationName(onerel));
+ if (track_io_timing)
+ {
+ double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
+ double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
+
+ appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
+ read_ms, write_ms);
+ }
+ appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
+ read_rate, write_rate);
+ appendStringInfo(&buf, _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
+ (long long) AnalyzePageHit,
+ (long long) AnalyzePageMiss,
+ (long long) AnalyzePageDirty);
+ appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
+
+ ereport(LOG,
+ (errmsg_internal("%s", buf.data)));
+
+ pfree(buf.data);
+ }
+ }
+
+ /* Roll back any GUC changes executed by index functions */
+ AtEOXact_GUC(false, save_nestlevel);
+
+ /* Restore userid and security context */
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+
+ /* Restore current context and release memory */
+ MemoryContextSwitchTo(caller_context);
+ MemoryContextDelete(anl_context);
+ anl_context = NULL;
+}
+
+/*
+ * Compute statistics about indexes of a relation
+ */
+static void
+compute_index_stats(Relation onerel, double totalrows,
+ AnlIndexData *indexdata, int nindexes,
+ HeapTuple *rows, int numrows,
+ MemoryContext col_context)
+{
+ MemoryContext ind_context,
+ old_context;
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ int ind,
+ i;
+
+ ind_context = AllocSetContextCreate(anl_context,
+ "Analyze Index",
+ ALLOCSET_DEFAULT_SIZES);
+ old_context = MemoryContextSwitchTo(ind_context);
+
+ for (ind = 0; ind < nindexes; ind++)
+ {
+ AnlIndexData *thisdata = &indexdata[ind];
+ IndexInfo *indexInfo = thisdata->indexInfo;
+ int attr_cnt = thisdata->attr_cnt;
+ TupleTableSlot *slot;
+ EState *estate;
+ ExprContext *econtext;
+ ExprState *predicate;
+ Datum *exprvals;
+ bool *exprnulls;
+ int numindexrows,
+ tcnt,
+ rowno;
+ double totalindexrows;
+
+ /* Ignore index if no columns to analyze and not partial */
+ if (attr_cnt == 0 && indexInfo->ii_Predicate == NIL)
+ continue;
+
+ /*
+ * Need an EState for evaluation of index expressions and
+ * partial-index predicates. Create it in the per-index context to be
+ * sure it gets cleaned up at the bottom of the loop.
+ */
+ estate = CreateExecutorState();
+ econtext = GetPerTupleExprContext(estate);
+ /* Need a slot to hold the current heap tuple, too */
+ slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel),
+ &TTSOpsHeapTuple);
+
+ /* Arrange for econtext's scan tuple to be the tuple under test */
+ econtext->ecxt_scantuple = slot;
+
+ /* Set up execution state for predicate. */
+ predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+
+ /* Compute and save index expression values */
+ exprvals = (Datum *) palloc(numrows * attr_cnt * sizeof(Datum));
+ exprnulls = (bool *) palloc(numrows * attr_cnt * sizeof(bool));
+ numindexrows = 0;
+ tcnt = 0;
+ for (rowno = 0; rowno < numrows; rowno++)
+ {
+ HeapTuple heapTuple = rows[rowno];
+
+ vacuum_delay_point();
+
+ /*
+ * Reset the per-tuple context each time, to reclaim any cruft
+ * left behind by evaluating the predicate or index expressions.
+ */
+ ResetExprContext(econtext);
+
+ /* Set up for predicate or expression evaluation */
+ ExecStoreHeapTuple(heapTuple, slot, false);
+
+ /* If index is partial, check predicate */
+ if (predicate != NULL)
+ {
+ if (!ExecQual(predicate, econtext))
+ continue;
+ }
+ numindexrows++;
+
+ if (attr_cnt > 0)
+ {
+ /*
+ * Evaluate the index row to compute expression values. We
+ * could do this by hand, but FormIndexDatum is convenient.
+ */
+ FormIndexDatum(indexInfo,
+ slot,
+ estate,
+ values,
+ isnull);
+
+ /*
+ * Save just the columns we care about. We copy the values
+ * into ind_context from the estate's per-tuple context.
+ */
+ for (i = 0; i < attr_cnt; i++)
+ {
+ VacAttrStats *stats = thisdata->vacattrstats[i];
+ int attnum = stats->attr->attnum;
+
+ if (isnull[attnum - 1])
+ {
+ exprvals[tcnt] = (Datum) 0;
+ exprnulls[tcnt] = true;
+ }
+ else
+ {
+ exprvals[tcnt] = datumCopy(values[attnum - 1],
+ stats->attrtype->typbyval,
+ stats->attrtype->typlen);
+ exprnulls[tcnt] = false;
+ }
+ tcnt++;
+ }
+ }
+ }
+
+ /*
+ * Having counted the number of rows that pass the predicate in the
+ * sample, we can estimate the total number of rows in the index.
+ */
+ thisdata->tupleFract = (double) numindexrows / (double) numrows;
+ totalindexrows = ceil(thisdata->tupleFract * totalrows);
+
+ /*
+ * Now we can compute the statistics for the expression columns.
+ */
+ if (numindexrows > 0)
+ {
+ MemoryContextSwitchTo(col_context);
+ for (i = 0; i < attr_cnt; i++)
+ {
+ VacAttrStats *stats = thisdata->vacattrstats[i];
+
+ stats->exprvals = exprvals + i;
+ stats->exprnulls = exprnulls + i;
+ stats->rowstride = attr_cnt;
+ stats->compute_stats(stats,
+ ind_fetch_func,
+ numindexrows,
+ totalindexrows);
+
+ MemoryContextResetAndDeleteChildren(col_context);
+ }
+ }
+
+ /* And clean up */
+ MemoryContextSwitchTo(ind_context);
+
+ ExecDropSingleTupleTableSlot(slot);
+ FreeExecutorState(estate);
+ MemoryContextResetAndDeleteChildren(ind_context);
+ }
+
+ MemoryContextSwitchTo(old_context);
+ MemoryContextDelete(ind_context);
+}
+
+/*
+ * examine_attribute -- pre-analysis of a single column
+ *
+ * Determine whether the column is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it. If not, return NULL.
+ *
+ * If index_expr isn't NULL, then we're trying to analyze an expression index,
+ * and index_expr is the expression tree representing the column's data.
+ */
+static VacAttrStats *
+examine_attribute(Relation onerel, int attnum, Node *index_expr)
+{
+ Form_pg_attribute attr = TupleDescAttr(onerel->rd_att, attnum - 1);
+ HeapTuple typtuple;
+ VacAttrStats *stats;
+ int i;
+ bool ok;
+
+ /* Never analyze dropped columns */
+ if (attr->attisdropped)
+ return NULL;
+
+ /* Don't analyze column if user has specified not to */
+ if (attr->attstattarget == 0)
+ return NULL;
+
+ /*
+ * Create the VacAttrStats struct. Note that we only have a copy of the
+ * fixed fields of the pg_attribute tuple.
+ */
+ stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
+ stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
+ memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
+
+ /*
+ * When analyzing an expression index, believe the expression tree's type
+ * not the column datatype --- the latter might be the opckeytype storage
+ * type of the opclass, which is not interesting for our purposes. (Note:
+ * if we did anything with non-expression index columns, we'd need to
+ * figure out where to get the correct type info from, but for now that's
+ * not a problem.) It's not clear whether anyone will care about the
+ * typmod, but we store that too just in case.
+ */
+ if (index_expr)
+ {
+ stats->attrtypid = exprType(index_expr);
+ stats->attrtypmod = exprTypmod(index_expr);
+
+ /*
+ * If a collation has been specified for the index column, use that in
+ * preference to anything else; but if not, fall back to whatever we
+ * can get from the expression.
+ */
+ if (OidIsValid(onerel->rd_indcollation[attnum - 1]))
+ stats->attrcollid = onerel->rd_indcollation[attnum - 1];
+ else
+ stats->attrcollid = exprCollation(index_expr);
+ }
+ else
+ {
+ stats->attrtypid = attr->atttypid;
+ stats->attrtypmod = attr->atttypmod;
+ stats->attrcollid = attr->attcollation;
+ }
+
+ typtuple = SearchSysCacheCopy1(TYPEOID,
+ ObjectIdGetDatum(stats->attrtypid));
+ if (!HeapTupleIsValid(typtuple))
+ elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
+ stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
+ stats->anl_context = anl_context;
+ stats->tupattnum = attnum;
+
+ /*
+ * The fields describing the stats->stavalues[n] element types default to
+ * the type of the data being analyzed, but the type-specific typanalyze
+ * function can change them if it wants to store something else.
+ */
+ for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+ {
+ stats->statypid[i] = stats->attrtypid;
+ stats->statyplen[i] = stats->attrtype->typlen;
+ stats->statypbyval[i] = stats->attrtype->typbyval;
+ stats->statypalign[i] = stats->attrtype->typalign;
+ }
+
+ /*
+ * Call the type-specific typanalyze function. If none is specified, use
+ * std_typanalyze().
+ */
+ if (OidIsValid(stats->attrtype->typanalyze))
+ ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
+ PointerGetDatum(stats)));
+ else
+ ok = std_typanalyze(stats);
+
+ if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
+ {
+ heap_freetuple(typtuple);
+ pfree(stats->attr);
+ pfree(stats);
+ return NULL;
+ }
+
+ return stats;
+}
+
+/*
+ * acquire_sample_rows -- acquire a random sample of rows from the table
+ *
+ * Selected rows are returned in the caller-allocated array rows[], which
+ * must have at least targrows entries.
+ * The actual number of rows selected is returned as the function result.
+ * We also estimate the total numbers of live and dead rows in the table,
+ * and return them into *totalrows and *totaldeadrows, respectively.
+ *
+ * The returned list of tuples is in order by physical position in the table.
+ * (We will rely on this later to derive correlation estimates.)
+ *
+ * As of May 2004 we use a new two-stage method: Stage one selects up
+ * to targrows random blocks (or all blocks, if there aren't so many).
+ * Stage two scans these blocks and uses the Vitter algorithm to create
+ * a random sample of targrows rows (or less, if there are less in the
+ * sample of blocks). The two stages are executed simultaneously: each
+ * block is processed as soon as stage one returns its number and while
+ * the rows are read stage two controls which ones are to be inserted
+ * into the sample.
+ *
+ * Although every row has an equal chance of ending up in the final
+ * sample, this sampling method is not perfect: not every possible
+ * sample has an equal chance of being selected. For large relations
+ * the number of different blocks represented by the sample tends to be
+ * too small. We can live with that for now. Improvements are welcome.
+ *
+ * An important property of this sampling method is that because we do
+ * look at a statistically unbiased set of blocks, we should get
+ * unbiased estimates of the average numbers of live and dead rows per
+ * block. The previous sampling method put too much credence in the row
+ * density near the start of the table.
+ */
+static int
+acquire_sample_rows(Relation onerel, int elevel,
+ HeapTuple *rows, int targrows,
+ double *totalrows, double *totaldeadrows)
+{
+ int numrows = 0; /* # rows now in reservoir */
+ double samplerows = 0; /* total # rows collected */
+ double liverows = 0; /* # live rows seen */
+ double deadrows = 0; /* # dead rows seen */
+ double rowstoskip = -1; /* -1 means not set yet */
+ uint32 randseed; /* Seed for block sampler(s) */
+ BlockNumber totalblocks;
+ TransactionId OldestXmin;
+ BlockSamplerData bs;
+ ReservoirStateData rstate;
+ TupleTableSlot *slot;
+ TableScanDesc scan;
+ BlockNumber nblocks;
+ BlockNumber blksdone = 0;
+#ifdef USE_PREFETCH
+ int prefetch_maximum = 0; /* blocks to prefetch if enabled */
+ BlockSamplerData prefetch_bs;
+#endif
+
+ Assert(targrows > 0);
+
+ totalblocks = RelationGetNumberOfBlocks(onerel);
+
+ /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
+ OldestXmin = GetOldestNonRemovableTransactionId(onerel);
+
+ /* Prepare for sampling block numbers */
+ randseed = pg_prng_uint32(&pg_global_prng_state);
+ nblocks = BlockSampler_Init(&bs, totalblocks, targrows, randseed);
+
+#ifdef USE_PREFETCH
+ prefetch_maximum = get_tablespace_maintenance_io_concurrency(onerel->rd_rel->reltablespace);
+ /* Create another BlockSampler, using the same seed, for prefetching */
+ if (prefetch_maximum)
+ (void) BlockSampler_Init(&prefetch_bs, totalblocks, targrows, randseed);
+#endif
+
+ /* Report sampling block numbers */
+ pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_TOTAL,
+ nblocks);
+
+ /* Prepare for sampling rows */
+ reservoir_init_selection_state(&rstate, targrows);
+
+ scan = table_beginscan_analyze(onerel);
+ slot = table_slot_create(onerel, NULL);
+
+#ifdef USE_PREFETCH
+
+ /*
+ * If we are doing prefetching, then go ahead and tell the kernel about
+ * the first set of pages we are going to want. This also moves our
+ * iterator out ahead of the main one being used, where we will keep it so
+ * that we're always pre-fetching out prefetch_maximum number of blocks
+ * ahead.
+ */
+ if (prefetch_maximum)
+ {
+ for (int i = 0; i < prefetch_maximum; i++)
+ {
+ BlockNumber prefetch_block;
+
+ if (!BlockSampler_HasMore(&prefetch_bs))
+ break;
+
+ prefetch_block = BlockSampler_Next(&prefetch_bs);
+ PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, prefetch_block);
+ }
+ }
+#endif
+
+ /* Outer loop over blocks to sample */
+ while (BlockSampler_HasMore(&bs))
+ {
+ bool block_accepted;
+ BlockNumber targblock = BlockSampler_Next(&bs);
+#ifdef USE_PREFETCH
+ BlockNumber prefetch_targblock = InvalidBlockNumber;
+
+ /*
+ * Make sure that every time the main BlockSampler is moved forward
+ * that our prefetch BlockSampler also gets moved forward, so that we
+ * always stay out ahead.
+ */
+ if (prefetch_maximum && BlockSampler_HasMore(&prefetch_bs))
+ prefetch_targblock = BlockSampler_Next(&prefetch_bs);
+#endif
+
+ vacuum_delay_point();
+
+ block_accepted = table_scan_analyze_next_block(scan, targblock, vac_strategy);
+
+#ifdef USE_PREFETCH
+
+ /*
+ * When pre-fetching, after we get a block, tell the kernel about the
+ * next one we will want, if there's any left.
+ *
+ * We want to do this even if the table_scan_analyze_next_block() call
+ * above decides against analyzing the block it picked.
+ */
+ if (prefetch_maximum && prefetch_targblock != InvalidBlockNumber)
+ PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, prefetch_targblock);
+#endif
+
+ /*
+ * Don't analyze if table_scan_analyze_next_block() indicated this
+ * block is unsuitable for analyzing.
+ */
+ if (!block_accepted)
+ continue;
+
+ while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
+ {
+ /*
+ * The first targrows sample rows are simply copied into the
+ * reservoir. Then we start replacing tuples in the sample until
+ * we reach the end of the relation. This algorithm is from Jeff
+ * Vitter's paper (see full citation in utils/misc/sampling.c). It
+ * works by repeatedly computing the number of tuples to skip
+ * before selecting a tuple, which replaces a randomly chosen
+ * element of the reservoir (current set of tuples). At all times
+ * the reservoir is a true random sample of the tuples we've
+ * passed over so far, so when we fall off the end of the relation
+ * we're done.
+ */
+ if (numrows < targrows)
+ rows[numrows++] = ExecCopySlotHeapTuple(slot);
+ else
+ {
+ /*
+ * t in Vitter's paper is the number of records already
+ * processed. If we need to compute a new S value, we must
+ * use the not-yet-incremented value of samplerows as t.
+ */
+ if (rowstoskip < 0)
+ rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
+
+ if (rowstoskip <= 0)
+ {
+ /*
+ * Found a suitable tuple, so save it, replacing one old
+ * tuple at random
+ */
+ int k = (int) (targrows * sampler_random_fract(&rstate.randstate));
+
+ Assert(k >= 0 && k < targrows);
+ heap_freetuple(rows[k]);
+ rows[k] = ExecCopySlotHeapTuple(slot);
+ }
+
+ rowstoskip -= 1;
+ }
+
+ samplerows += 1;
+ }
+
+ pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_DONE,
+ ++blksdone);
+ }
+
+ ExecDropSingleTupleTableSlot(slot);
+ table_endscan(scan);
+
+ /*
+ * If we didn't find as many tuples as we wanted then we're done. No sort
+ * is needed, since they're already in order.
+ *
+ * Otherwise we need to sort the collected tuples by position
+ * (itempointer). It's not worth worrying about corner cases where the
+ * tuples are already sorted.
+ */
+ if (numrows == targrows)
+ qsort_interruptible((void *) rows, numrows, sizeof(HeapTuple),
+ compare_rows, NULL);
+
+ /*
+ * Estimate total numbers of live and dead rows in relation, extrapolating
+ * on the assumption that the average tuple density in pages we didn't
+ * scan is the same as in the pages we did scan. Since what we scanned is
+ * a random sample of the pages in the relation, this should be a good
+ * assumption.
+ */
+ if (bs.m > 0)
+ {
+ *totalrows = floor((liverows / bs.m) * totalblocks + 0.5);
+ *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
+ }
+ else
+ {
+ *totalrows = 0.0;
+ *totaldeadrows = 0.0;
+ }
+
+ /*
+ * Emit some interesting relation info
+ */
+ ereport(elevel,
+ (errmsg("\"%s\": scanned %d of %u pages, "
+ "containing %.0f live rows and %.0f dead rows; "
+ "%d rows in sample, %.0f estimated total rows",
+ RelationGetRelationName(onerel),
+ bs.m, totalblocks,
+ liverows, deadrows,
+ numrows, *totalrows)));
+
+ return numrows;
+}
+
+/*
+ * Comparator for sorting rows[] array
+ */
+static int
+compare_rows(const void *a, const void *b, void *arg)
+{
+ HeapTuple ha = *(const HeapTuple *) a;
+ HeapTuple hb = *(const HeapTuple *) b;
+ BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
+ OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
+ BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
+ OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
+
+ if (ba < bb)
+ return -1;
+ if (ba > bb)
+ return 1;
+ if (oa < ob)
+ return -1;
+ if (oa > ob)
+ return 1;
+ return 0;
+}
+
+
+/*
+ * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
+ *
+ * This has the same API as acquire_sample_rows, except that rows are
+ * collected from all inheritance children as well as the specified table.
+ * We fail and return zero if there are no inheritance children, or if all
+ * children are foreign tables that don't support ANALYZE.
+ */
+static int
+acquire_inherited_sample_rows(Relation onerel, int elevel,
+ HeapTuple *rows, int targrows,
+ double *totalrows, double *totaldeadrows)
+{
+ List *tableOIDs;
+ Relation *rels;
+ AcquireSampleRowsFunc *acquirefuncs;
+ double *relblocks;
+ double totalblocks;
+ int numrows,
+ nrels,
+ i;
+ ListCell *lc;
+ bool has_child;
+
+ /* Initialize output parameters to zero now, in case we exit early */
+ *totalrows = 0;
+ *totaldeadrows = 0;
+
+ /*
+ * Find all members of inheritance set. We only need AccessShareLock on
+ * the children.
+ */
+ tableOIDs =
+ find_all_inheritors(RelationGetRelid(onerel), AccessShareLock, NULL);
+
+ /*
+ * Check that there's at least one descendant, else fail. This could
+ * happen despite analyze_rel's relhassubclass check, if table once had a
+ * child but no longer does. In that case, we can clear the
+ * relhassubclass field so as not to make the same mistake again later.
+ * (This is safe because we hold ShareUpdateExclusiveLock.)
+ */
+ if (list_length(tableOIDs) < 2)
+ {
+ /* CCI because we already updated the pg_class row in this command */
+ CommandCounterIncrement();
+ SetRelationHasSubclass(RelationGetRelid(onerel), false);
+ ereport(elevel,
+ (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables",
+ get_namespace_name(RelationGetNamespace(onerel)),
+ RelationGetRelationName(onerel))));
+ return 0;
+ }
+
+ /*
+ * Identify acquirefuncs to use, and count blocks in all the relations.
+ * The result could overflow BlockNumber, so we use double arithmetic.
+ */
+ rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation));
+ acquirefuncs = (AcquireSampleRowsFunc *)
+ palloc(list_length(tableOIDs) * sizeof(AcquireSampleRowsFunc));
+ relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double));
+ totalblocks = 0;
+ nrels = 0;
+ has_child = false;
+ foreach(lc, tableOIDs)
+ {
+ Oid childOID = lfirst_oid(lc);
+ Relation childrel;
+ AcquireSampleRowsFunc acquirefunc = NULL;
+ BlockNumber relpages = 0;
+
+ /* We already got the needed lock */
+ childrel = table_open(childOID, NoLock);
+
+ /* Ignore if temp table of another backend */
+ if (RELATION_IS_OTHER_TEMP(childrel))
+ {
+ /* ... but release the lock on it */
+ Assert(childrel != onerel);
+ table_close(childrel, AccessShareLock);
+ continue;
+ }
+
+ /* Check table type (MATVIEW can't happen, but might as well allow) */
+ if (childrel->rd_rel->relkind == RELKIND_RELATION ||
+ childrel->rd_rel->relkind == RELKIND_MATVIEW)
+ {
+ /* Regular table, so use the regular row acquisition function */
+ acquirefunc = acquire_sample_rows;
+ relpages = RelationGetNumberOfBlocks(childrel);
+ }
+ else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ {
+ /*
+ * For a foreign table, call the FDW's hook function to see
+ * whether it supports analysis.
+ */
+ FdwRoutine *fdwroutine;
+ bool ok = false;
+
+ fdwroutine = GetFdwRoutineForRelation(childrel, false);
+
+ if (fdwroutine->AnalyzeForeignTable != NULL)
+ ok = fdwroutine->AnalyzeForeignTable(childrel,
+ &acquirefunc,
+ &relpages);
+
+ if (!ok)
+ {
+ /* ignore, but release the lock on it */
+ Assert(childrel != onerel);
+ table_close(childrel, AccessShareLock);
+ continue;
+ }
+ }
+ else
+ {
+ /*
+ * ignore, but release the lock on it. don't try to unlock the
+ * passed-in relation
+ */
+ Assert(childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+ if (childrel != onerel)
+ table_close(childrel, AccessShareLock);
+ else
+ table_close(childrel, NoLock);
+ continue;
+ }
+
+ /* OK, we'll process this child */
+ has_child = true;
+ rels[nrels] = childrel;
+ acquirefuncs[nrels] = acquirefunc;
+ relblocks[nrels] = (double) relpages;
+ totalblocks += (double) relpages;
+ nrels++;
+ }
+
+ /*
+ * If we don't have at least one child table to consider, fail. If the
+ * relation is a partitioned table, it's not counted as a child table.
+ */
+ if (!has_child)
+ {
+ ereport(elevel,
+ (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no analyzable child tables",
+ get_namespace_name(RelationGetNamespace(onerel)),
+ RelationGetRelationName(onerel))));
+ return 0;
+ }
+
+ /*
+ * Now sample rows from each relation, proportionally to its fraction of
+ * the total block count. (This might be less than desirable if the child
+ * rels have radically different free-space percentages, but it's not
+ * clear that it's worth working harder.)
+ */
+ pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_TOTAL,
+ nrels);
+ numrows = 0;
+ for (i = 0; i < nrels; i++)
+ {
+ Relation childrel = rels[i];
+ AcquireSampleRowsFunc acquirefunc = acquirefuncs[i];
+ double childblocks = relblocks[i];
+
+ /*
+ * Report progress. The sampling function will normally report blocks
+ * done/total, but we need to reset them to 0 here, so that they don't
+ * show an old value until that.
+ */
+ {
+ const int progress_index[] = {
+ PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID,
+ PROGRESS_ANALYZE_BLOCKS_DONE,
+ PROGRESS_ANALYZE_BLOCKS_TOTAL
+ };
+ const int64 progress_vals[] = {
+ RelationGetRelid(childrel),
+ 0,
+ 0,
+ };
+
+ pgstat_progress_update_multi_param(3, progress_index, progress_vals);
+ }
+
+ if (childblocks > 0)
+ {
+ int childtargrows;
+
+ childtargrows = (int) rint(targrows * childblocks / totalblocks);
+ /* Make sure we don't overrun due to roundoff error */
+ childtargrows = Min(childtargrows, targrows - numrows);
+ if (childtargrows > 0)
+ {
+ int childrows;
+ double trows,
+ tdrows;
+
+ /* Fetch a random sample of the child's rows */
+ childrows = (*acquirefunc) (childrel, elevel,
+ rows + numrows, childtargrows,
+ &trows, &tdrows);
+
+ /* We may need to convert from child's rowtype to parent's */
+ if (childrows > 0 &&
+ !equalTupleDescs(RelationGetDescr(childrel),
+ RelationGetDescr(onerel)))
+ {
+ TupleConversionMap *map;
+
+ map = convert_tuples_by_name(RelationGetDescr(childrel),
+ RelationGetDescr(onerel));
+ if (map != NULL)
+ {
+ int j;
+
+ for (j = 0; j < childrows; j++)
+ {
+ HeapTuple newtup;
+
+ newtup = execute_attr_map_tuple(rows[numrows + j], map);
+ heap_freetuple(rows[numrows + j]);
+ rows[numrows + j] = newtup;
+ }
+ free_conversion_map(map);
+ }
+ }
+
+ /* And add to counts */
+ numrows += childrows;
+ *totalrows += trows;
+ *totaldeadrows += tdrows;
+ }
+ }
+
+ /*
+ * Note: we cannot release the child-table locks, since we may have
+ * pointers to their TOAST tables in the sampled rows.
+ */
+ table_close(childrel, NoLock);
+ pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_DONE,
+ i + 1);
+ }
+
+ return numrows;
+}
+
+
+/*
+ * update_attstats() -- update attribute statistics for one relation
+ *
+ * Statistics are stored in several places: the pg_class row for the
+ * relation has stats about the whole relation, and there is a
+ * pg_statistic row for each (non-system) attribute that has ever
+ * been analyzed. The pg_class values are updated by VACUUM, not here.
+ *
+ * pg_statistic rows are just added or updated normally. This means
+ * that pg_statistic will probably contain some deleted rows at the
+ * completion of a vacuum cycle, unless it happens to get vacuumed last.
+ *
+ * To keep things simple, we punt for pg_statistic, and don't try
+ * to compute or store rows for pg_statistic itself in pg_statistic.
+ * This could possibly be made to work, but it's not worth the trouble.
+ * Note analyze_rel() has seen to it that we won't come here when
+ * vacuuming pg_statistic itself.
+ *
+ * Note: there would be a race condition here if two backends could
+ * ANALYZE the same table concurrently. Presently, we lock that out
+ * by taking a self-exclusive lock on the relation in analyze_rel().
+ */
+static void
+update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
+{
+ Relation sd;
+ int attno;
+
+ if (natts <= 0)
+ return; /* nothing to do */
+
+ sd = table_open(StatisticRelationId, RowExclusiveLock);
+
+ for (attno = 0; attno < natts; attno++)
+ {
+ VacAttrStats *stats = vacattrstats[attno];
+ HeapTuple stup,
+ oldtup;
+ int i,
+ k,
+ n;
+ Datum values[Natts_pg_statistic];
+ bool nulls[Natts_pg_statistic];
+ bool replaces[Natts_pg_statistic];
+
+ /* Ignore attr if we weren't able to collect stats */
+ if (!stats->stats_valid)
+ continue;
+
+ /*
+ * Construct a new pg_statistic tuple
+ */
+ for (i = 0; i < Natts_pg_statistic; ++i)
+ {
+ nulls[i] = false;
+ replaces[i] = true;
+ }
+
+ values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid);
+ values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum);
+ values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh);
+ values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
+ values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
+ values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
+ i = Anum_pg_statistic_stakind1 - 1;
+ for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+ {
+ values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
+ }
+ i = Anum_pg_statistic_staop1 - 1;
+ for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+ {
+ values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
+ }
+ i = Anum_pg_statistic_stacoll1 - 1;
+ for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+ {
+ values[i++] = ObjectIdGetDatum(stats->stacoll[k]); /* stacollN */
+ }
+ i = Anum_pg_statistic_stanumbers1 - 1;
+ for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+ {
+ int nnum = stats->numnumbers[k];
+
+ if (nnum > 0)
+ {
+ Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
+ ArrayType *arry;
+
+ for (n = 0; n < nnum; n++)
+ numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
+ /* XXX knows more than it should about type float4: */
+ arry = construct_array(numdatums, nnum,
+ FLOAT4OID,
+ sizeof(float4), true, TYPALIGN_INT);
+ values[i++] = PointerGetDatum(arry); /* stanumbersN */
+ }
+ else
+ {
+ nulls[i] = true;
+ values[i++] = (Datum) 0;
+ }
+ }
+ i = Anum_pg_statistic_stavalues1 - 1;
+ for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+ {
+ if (stats->numvalues[k] > 0)
+ {
+ ArrayType *arry;
+
+ arry = construct_array(stats->stavalues[k],
+ stats->numvalues[k],
+ stats->statypid[k],
+ stats->statyplen[k],
+ stats->statypbyval[k],
+ stats->statypalign[k]);
+ values[i++] = PointerGetDatum(arry); /* stavaluesN */
+ }
+ else
+ {
+ nulls[i] = true;
+ values[i++] = (Datum) 0;
+ }
+ }
+
+ /* Is there already a pg_statistic tuple for this attribute? */
+ oldtup = SearchSysCache3(STATRELATTINH,
+ ObjectIdGetDatum(relid),
+ Int16GetDatum(stats->attr->attnum),
+ BoolGetDatum(inh));
+
+ if (HeapTupleIsValid(oldtup))
+ {
+ /* Yes, replace it */
+ stup = heap_modify_tuple(oldtup,
+ RelationGetDescr(sd),
+ values,
+ nulls,
+ replaces);
+ ReleaseSysCache(oldtup);
+ CatalogTupleUpdate(sd, &stup->t_self, stup);
+ }
+ else
+ {
+ /* No, insert new tuple */
+ stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);
+ CatalogTupleInsert(sd, stup);
+ }
+
+ heap_freetuple(stup);
+ }
+
+ table_close(sd, RowExclusiveLock);
+}
+
+/*
+ * Standard fetch function for use by compute_stats subroutines.
+ *
+ * This exists to provide some insulation between compute_stats routines
+ * and the actual storage of the sample data.
+ */
+static Datum
+std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
+{
+ int attnum = stats->tupattnum;
+ HeapTuple tuple = stats->rows[rownum];
+ TupleDesc tupDesc = stats->tupDesc;
+
+ return heap_getattr(tuple, attnum, tupDesc, isNull);
+}
+
+/*
+ * Fetch function for analyzing index expressions.
+ *
+ * We have not bothered to construct index tuples, instead the data is
+ * just in Datum arrays.
+ */
+static Datum
+ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
+{
+ int i;
+
+ /* exprvals and exprnulls are already offset for proper column */
+ i = rownum * stats->rowstride;
+ *isNull = stats->exprnulls[i];
+ return stats->exprvals[i];
+}
+
+
+/*==========================================================================
+ *
+ * Code below this point represents the "standard" type-specific statistics
+ * analysis algorithms. This code can be replaced on a per-data-type basis
+ * by setting a nonzero value in pg_type.typanalyze.
+ *
+ *==========================================================================
+ */
+
+
+/*
+ * To avoid consuming too much memory during analysis and/or too much space
+ * in the resulting pg_statistic rows, we ignore varlena datums that are wider
+ * than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV
+ * and distinct-value calculations since a wide value is unlikely to be
+ * duplicated at all, much less be a most-common value. For the same reason,
+ * ignoring wide values will not affect our estimates of histogram bin
+ * boundaries very much.
+ */
+#define WIDTH_THRESHOLD 1024
+
+#define swapInt(a,b) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)
+#define swapDatum(a,b) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)
+
+/*
+ * Extra information used by the default analysis routines
+ */
+typedef struct
+{
+ int count; /* # of duplicates */
+ int first; /* values[] index of first occurrence */
+} ScalarMCVItem;
+
+typedef struct
+{
+ SortSupport ssup;
+ int *tupnoLink;
+} CompareScalarsContext;
+
+
+static void compute_trivial_stats(VacAttrStatsP stats,
+ AnalyzeAttrFetchFunc fetchfunc,
+ int samplerows,
+ double totalrows);
+static void compute_distinct_stats(VacAttrStatsP stats,
+ AnalyzeAttrFetchFunc fetchfunc,
+ int samplerows,
+ double totalrows);
+static void compute_scalar_stats(VacAttrStatsP stats,
+ AnalyzeAttrFetchFunc fetchfunc,
+ int samplerows,
+ double totalrows);
+static int compare_scalars(const void *a, const void *b, void *arg);
+static int compare_mcvs(const void *a, const void *b, void *arg);
+static int analyze_mcv_list(int *mcv_counts,
+ int num_mcv,
+ double stadistinct,
+ double stanullfrac,
+ int samplerows,
+ double totalrows);
+
+
+/*
+ * std_typanalyze -- the default type-specific typanalyze function
+ */
+bool
+std_typanalyze(VacAttrStats *stats)
+{
+ Form_pg_attribute attr = stats->attr;
+ Oid ltopr;
+ Oid eqopr;
+ StdAnalyzeData *mystats;
+
+ /* If the attstattarget column is negative, use the default value */
+ /* NB: it is okay to scribble on stats->attr since it's a copy */
+ if (attr->attstattarget < 0)
+ attr->attstattarget = default_statistics_target;
+
+ /* Look for default "<" and "=" operators for column's type */
+ get_sort_group_operators(stats->attrtypid,
+ false, false, false,
+ &ltopr, &eqopr, NULL,
+ NULL);
+
+ /* Save the operator info for compute_stats routines */
+ mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData));
+ mystats->eqopr = eqopr;
+ mystats->eqfunc = OidIsValid(eqopr) ? get_opcode(eqopr) : InvalidOid;
+ mystats->ltopr = ltopr;
+ stats->extra_data = mystats;
+
+ /*
+ * Determine which standard statistics algorithm to use
+ */
+ if (OidIsValid(eqopr) && OidIsValid(ltopr))
+ {
+ /* Seems to be a scalar datatype */
+ stats->compute_stats = compute_scalar_stats;
+ /*--------------------
+ * The following choice of minrows is based on the paper
+ * "Random sampling for histogram construction: how much is enough?"
+ * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
+ * Proceedings of ACM SIGMOD International Conference on Management
+ * of Data, 1998, Pages 436-447. Their Corollary 1 to Theorem 5
+ * says that for table size n, histogram size k, maximum relative
+ * error in bin size f, and error probability gamma, the minimum
+ * random sample size is
+ * r = 4 * k * ln(2*n/gamma) / f^2
+ * Taking f = 0.5, gamma = 0.01, n = 10^6 rows, we obtain
+ * r = 305.82 * k
+ * Note that because of the log function, the dependence on n is
+ * quite weak; even at n = 10^12, a 300*k sample gives <= 0.66
+ * bin size error with probability 0.99. So there's no real need to
+ * scale for n, which is a good thing because we don't necessarily
+ * know it at this point.
+ *--------------------
+ */
+ stats->minrows = 300 * attr->attstattarget;
+ }
+ else if (OidIsValid(eqopr))
+ {
+ /* We can still recognize distinct values */
+ stats->compute_stats = compute_distinct_stats;
+ /* Might as well use the same minrows as above */
+ stats->minrows = 300 * attr->attstattarget;
+ }
+ else
+ {
+ /* Can't do much but the trivial stuff */
+ stats->compute_stats = compute_trivial_stats;
+ /* Might as well use the same minrows as above */
+ stats->minrows = 300 * attr->attstattarget;
+ }
+
+ return true;
+}
+
+
+/*
+ * compute_trivial_stats() -- compute very basic column statistics
+ *
+ * We use this when we cannot find a hash "=" operator for the datatype.
+ *
+ * We determine the fraction of non-null rows and the average datum width.
+ */
+static void
+compute_trivial_stats(VacAttrStatsP stats,
+ AnalyzeAttrFetchFunc fetchfunc,
+ int samplerows,
+ double totalrows)
+{
+ int i;
+ int null_cnt = 0;
+ int nonnull_cnt = 0;
+ double total_width = 0;
+ bool is_varlena = (!stats->attrtype->typbyval &&
+ stats->attrtype->typlen == -1);
+ bool is_varwidth = (!stats->attrtype->typbyval &&
+ stats->attrtype->typlen < 0);
+
+ for (i = 0; i < samplerows; i++)
+ {
+ Datum value;
+ bool isnull;
+
+ vacuum_delay_point();
+
+ value = fetchfunc(stats, i, &isnull);
+
+ /* Check for null/nonnull */
+ if (isnull)
+ {
+ null_cnt++;
+ continue;
+ }
+ nonnull_cnt++;
+
+ /*
+ * If it's a variable-width field, add up widths for average width
+ * calculation. Note that if the value is toasted, we use the toasted
+ * width. We don't bother with this calculation if it's a fixed-width
+ * type.
+ */
+ if (is_varlena)
+ {
+ total_width += VARSIZE_ANY(DatumGetPointer(value));
+ }
+ else if (is_varwidth)
+ {
+ /* must be cstring */
+ total_width += strlen(DatumGetCString(value)) + 1;
+ }
+ }
+
+ /* We can only compute average width if we found some non-null values. */
+ if (nonnull_cnt > 0)
+ {
+ stats->stats_valid = true;
+ /* Do the simple null-frac and width stats */
+ stats->stanullfrac = (double) null_cnt / (double) samplerows;
+ if (is_varwidth)
+ stats->stawidth = total_width / (double) nonnull_cnt;
+ else
+ stats->stawidth = stats->attrtype->typlen;
+ stats->stadistinct = 0.0; /* "unknown" */
+ }
+ else if (null_cnt > 0)
+ {
+ /* We found only nulls; assume the column is entirely null */
+ stats->stats_valid = true;
+ stats->stanullfrac = 1.0;
+ if (is_varwidth)
+ stats->stawidth = 0; /* "unknown" */
+ else
+ stats->stawidth = stats->attrtype->typlen;
+ stats->stadistinct = 0.0; /* "unknown" */
+ }
+}
+
+
+/*
+ * compute_distinct_stats() -- compute column statistics including ndistinct
+ *
+ * We use this when we can find only an "=" operator for the datatype.
+ *
+ * We determine the fraction of non-null rows, the average width, the
+ * most common values, and the (estimated) number of distinct values.
+ *
+ * The most common values are determined by brute force: we keep a list
+ * of previously seen values, ordered by number of times seen, as we scan
+ * the samples. A newly seen value is inserted just after the last
+ * multiply-seen value, causing the bottommost (oldest) singly-seen value
+ * to drop off the list. The accuracy of this method, and also its cost,
+ * depend mainly on the length of the list we are willing to keep.
+ */
+static void
+compute_distinct_stats(VacAttrStatsP stats,
+ AnalyzeAttrFetchFunc fetchfunc,
+ int samplerows,
+ double totalrows)
+{
+ int i;
+ int null_cnt = 0;
+ int nonnull_cnt = 0;
+ int toowide_cnt = 0;
+ double total_width = 0;
+ bool is_varlena = (!stats->attrtype->typbyval &&
+ stats->attrtype->typlen == -1);
+ bool is_varwidth = (!stats->attrtype->typbyval &&
+ stats->attrtype->typlen < 0);
+ FmgrInfo f_cmpeq;
+ typedef struct
+ {
+ Datum value;
+ int count;
+ } TrackItem;
+ TrackItem *track;
+ int track_cnt,
+ track_max;
+ int num_mcv = stats->attr->attstattarget;
+ StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
+
+ /*
+ * We track up to 2*n values for an n-element MCV list; but at least 10
+ */
+ track_max = 2 * num_mcv;
+ if (track_max < 10)
+ track_max = 10;
+ track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
+ track_cnt = 0;
+
+ fmgr_info(mystats->eqfunc, &f_cmpeq);
+
+ for (i = 0; i < samplerows; i++)
+ {
+ Datum value;
+ bool isnull;
+ bool match;
+ int firstcount1,
+ j;
+
+ vacuum_delay_point();
+
+ value = fetchfunc(stats, i, &isnull);
+
+ /* Check for null/nonnull */
+ if (isnull)
+ {
+ null_cnt++;
+ continue;
+ }
+ nonnull_cnt++;
+
+ /*
+ * If it's a variable-width field, add up widths for average width
+ * calculation. Note that if the value is toasted, we use the toasted
+ * width. We don't bother with this calculation if it's a fixed-width
+ * type.
+ */
+ if (is_varlena)
+ {
+ total_width += VARSIZE_ANY(DatumGetPointer(value));
+
+ /*
+ * If the value is toasted, we want to detoast it just once to
+ * avoid repeated detoastings and resultant excess memory usage
+ * during the comparisons. Also, check to see if the value is
+ * excessively wide, and if so don't detoast at all --- just
+ * ignore the value.
+ */
+ if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
+ {
+ toowide_cnt++;
+ continue;
+ }
+ value = PointerGetDatum(PG_DETOAST_DATUM(value));
+ }
+ else if (is_varwidth)
+ {
+ /* must be cstring */
+ total_width += strlen(DatumGetCString(value)) + 1;
+ }
+
+ /*
+ * See if the value matches anything we're already tracking.
+ */
+ match = false;
+ firstcount1 = track_cnt;
+ for (j = 0; j < track_cnt; j++)
+ {
+ if (DatumGetBool(FunctionCall2Coll(&f_cmpeq,
+ stats->attrcollid,
+ value, track[j].value)))
+ {
+ match = true;
+ break;
+ }
+ if (j < firstcount1 && track[j].count == 1)
+ firstcount1 = j;
+ }
+
+ if (match)
+ {
+ /* Found a match */
+ track[j].count++;
+ /* This value may now need to "bubble up" in the track list */
+ while (j > 0 && track[j].count > track[j - 1].count)
+ {
+ swapDatum(track[j].value, track[j - 1].value);
+ swapInt(track[j].count, track[j - 1].count);
+ j--;
+ }
+ }
+ else
+ {
+ /* No match. Insert at head of count-1 list */
+ if (track_cnt < track_max)
+ track_cnt++;
+ for (j = track_cnt - 1; j > firstcount1; j--)
+ {
+ track[j].value = track[j - 1].value;
+ track[j].count = track[j - 1].count;
+ }
+ if (firstcount1 < track_cnt)
+ {
+ track[firstcount1].value = value;
+ track[firstcount1].count = 1;
+ }
+ }
+ }
+
+ /* We can only compute real stats if we found some non-null values. */
+ if (nonnull_cnt > 0)
+ {
+ int nmultiple,
+ summultiple;
+
+ stats->stats_valid = true;
+ /* Do the simple null-frac and width stats */
+ stats->stanullfrac = (double) null_cnt / (double) samplerows;
+ if (is_varwidth)
+ stats->stawidth = total_width / (double) nonnull_cnt;
+ else
+ stats->stawidth = stats->attrtype->typlen;
+
+ /* Count the number of values we found multiple times */
+ summultiple = 0;
+ for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
+ {
+ if (track[nmultiple].count == 1)
+ break;
+ summultiple += track[nmultiple].count;
+ }
+
+ if (nmultiple == 0)
+ {
+ /*
+ * If we found no repeated non-null values, assume it's a unique
+ * column; but be sure to discount for any nulls we found.
+ */
+ stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
+ }
+ else if (track_cnt < track_max && toowide_cnt == 0 &&
+ nmultiple == track_cnt)
+ {
+ /*
+ * Our track list includes every value in the sample, and every
+ * value appeared more than once. Assume the column has just
+ * these values. (This case is meant to address columns with
+ * small, fixed sets of possible values, such as boolean or enum
+ * columns. If there are any values that appear just once in the
+ * sample, including too-wide values, we should assume that that's
+ * not what we're dealing with.)
+ */
+ stats->stadistinct = track_cnt;
+ }
+ else
+ {
+ /*----------
+ * Estimate the number of distinct values using the estimator
+ * proposed by Haas and Stokes in IBM Research Report RJ 10025:
+ * n*d / (n - f1 + f1*n/N)
+ * where f1 is the number of distinct values that occurred
+ * exactly once in our sample of n rows (from a total of N),
+ * and d is the total number of distinct values in the sample.
+ * This is their Duj1 estimator; the other estimators they
+ * recommend are considerably more complex, and are numerically
+ * very unstable when n is much smaller than N.
+ *
+ * In this calculation, we consider only non-nulls. We used to
+ * include rows with null values in the n and N counts, but that
+ * leads to inaccurate answers in columns with many nulls, and
+ * it's intuitively bogus anyway considering the desired result is
+ * the number of distinct non-null values.
+ *
+ * We assume (not very reliably!) that all the multiply-occurring
+ * values are reflected in the final track[] list, and the other
+ * nonnull values all appeared but once. (XXX this usually
+ * results in a drastic overestimate of ndistinct. Can we do
+ * any better?)
+ *----------
+ */
+ int f1 = nonnull_cnt - summultiple;
+ int d = f1 + nmultiple;
+ double n = samplerows - null_cnt;
+ double N = totalrows * (1.0 - stats->stanullfrac);
+ double stadistinct;
+
+ /* N == 0 shouldn't happen, but just in case ... */
+ if (N > 0)
+ stadistinct = (n * d) / ((n - f1) + f1 * n / N);
+ else
+ stadistinct = 0;
+
+ /* Clamp to sane range in case of roundoff error */
+ if (stadistinct < d)
+ stadistinct = d;
+ if (stadistinct > N)
+ stadistinct = N;
+ /* And round to integer */
+ stats->stadistinct = floor(stadistinct + 0.5);
+ }
+
+ /*
+ * If we estimated the number of distinct values at more than 10% of
+ * the total row count (a very arbitrary limit), then assume that
+ * stadistinct should scale with the row count rather than be a fixed
+ * value.
+ */
+ if (stats->stadistinct > 0.1 * totalrows)
+ stats->stadistinct = -(stats->stadistinct / totalrows);
+
+ /*
+ * Decide how many values are worth storing as most-common values. If
+ * we are able to generate a complete MCV list (all the values in the
+ * sample will fit, and we think these are all the ones in the table),
+ * then do so. Otherwise, store only those values that are
+ * significantly more common than the values not in the list.
+ *
+ * Note: the first of these cases is meant to address columns with
+ * small, fixed sets of possible values, such as boolean or enum
+ * columns. If we can *completely* represent the column population by
+ * an MCV list that will fit into the stats target, then we should do
+ * so and thus provide the planner with complete information. But if
+ * the MCV list is not complete, it's generally worth being more
+ * selective, and not just filling it all the way up to the stats
+ * target.
+ */
+ if (track_cnt < track_max && toowide_cnt == 0 &&
+ stats->stadistinct > 0 &&
+ track_cnt <= num_mcv)
+ {
+ /* Track list includes all values seen, and all will fit */
+ num_mcv = track_cnt;
+ }
+ else
+ {
+ int *mcv_counts;
+
+ /* Incomplete list; decide how many values are worth keeping */
+ if (num_mcv > track_cnt)
+ num_mcv = track_cnt;
+
+ if (num_mcv > 0)
+ {
+ mcv_counts = (int *) palloc(num_mcv * sizeof(int));
+ for (i = 0; i < num_mcv; i++)
+ mcv_counts[i] = track[i].count;
+
+ num_mcv = analyze_mcv_list(mcv_counts, num_mcv,
+ stats->stadistinct,
+ stats->stanullfrac,
+ samplerows, totalrows);
+ }
+ }
+
+ /* Generate MCV slot entry */
+ if (num_mcv > 0)
+ {
+ MemoryContext old_context;
+ Datum *mcv_values;
+ float4 *mcv_freqs;
+
+ /* Must copy the target values into anl_context */
+ old_context = MemoryContextSwitchTo(stats->anl_context);
+ mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+ mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+ for (i = 0; i < num_mcv; i++)
+ {
+ mcv_values[i] = datumCopy(track[i].value,
+ stats->attrtype->typbyval,
+ stats->attrtype->typlen);
+ mcv_freqs[i] = (double) track[i].count / (double) samplerows;
+ }
+ MemoryContextSwitchTo(old_context);
+
+ stats->stakind[0] = STATISTIC_KIND_MCV;
+ stats->staop[0] = mystats->eqopr;
+ stats->stacoll[0] = stats->attrcollid;
+ stats->stanumbers[0] = mcv_freqs;
+ stats->numnumbers[0] = num_mcv;
+ stats->stavalues[0] = mcv_values;
+ stats->numvalues[0] = num_mcv;
+
+ /*
+ * Accept the defaults for stats->statypid and others. They have
+ * been set before we were called (see vacuum.h)
+ */
+ }
+ }
+ else if (null_cnt > 0)
+ {
+ /* We found only nulls; assume the column is entirely null */
+ stats->stats_valid = true;
+ stats->stanullfrac = 1.0;
+ if (is_varwidth)
+ stats->stawidth = 0; /* "unknown" */
+ else
+ stats->stawidth = stats->attrtype->typlen;
+ stats->stadistinct = 0.0; /* "unknown" */
+ }
+
+ /* We don't need to bother cleaning up any of our temporary palloc's */
+}
+
+
+/*
+ * compute_scalar_stats() -- compute column statistics
+ *
+ * We use this when we can find "=" and "<" operators for the datatype.
+ *
+ * We determine the fraction of non-null rows, the average width, the
+ * most common values, the (estimated) number of distinct values, the
+ * distribution histogram, and the correlation of physical to logical order.
+ *
+ * The desired stats can be determined fairly easily after sorting the
+ * data values into order.
+ */
+static void
+compute_scalar_stats(VacAttrStatsP stats,
+ AnalyzeAttrFetchFunc fetchfunc,
+ int samplerows,
+ double totalrows)
+{
+ int i;
+ int null_cnt = 0;
+ int nonnull_cnt = 0;
+ int toowide_cnt = 0;
+ double total_width = 0;
+ bool is_varlena = (!stats->attrtype->typbyval &&
+ stats->attrtype->typlen == -1);
+ bool is_varwidth = (!stats->attrtype->typbyval &&
+ stats->attrtype->typlen < 0);
+ double corr_xysum;
+ SortSupportData ssup;
+ ScalarItem *values;
+ int values_cnt = 0;
+ int *tupnoLink;
+ ScalarMCVItem *track;
+ int track_cnt = 0;
+ int num_mcv = stats->attr->attstattarget;
+ int num_bins = stats->attr->attstattarget;
+ StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
+
+ values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
+ tupnoLink = (int *) palloc(samplerows * sizeof(int));
+ track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
+
+ memset(&ssup, 0, sizeof(ssup));
+ ssup.ssup_cxt = CurrentMemoryContext;
+ ssup.ssup_collation = stats->attrcollid;
+ ssup.ssup_nulls_first = false;
+
+ /*
+ * For now, don't perform abbreviated key conversion, because full values
+ * are required for MCV slot generation. Supporting that optimization
+ * would necessitate teaching compare_scalars() to call a tie-breaker.
+ */
+ ssup.abbreviate = false;
+
+ PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
+
+ /* Initial scan to find sortable values */
+ for (i = 0; i < samplerows; i++)
+ {
+ Datum value;
+ bool isnull;
+
+ vacuum_delay_point();
+
+ value = fetchfunc(stats, i, &isnull);
+
+ /* Check for null/nonnull */
+ if (isnull)
+ {
+ null_cnt++;
+ continue;
+ }
+ nonnull_cnt++;
+
+ /*
+ * If it's a variable-width field, add up widths for average width
+ * calculation. Note that if the value is toasted, we use the toasted
+ * width. We don't bother with this calculation if it's a fixed-width
+ * type.
+ */
+ if (is_varlena)
+ {
+ total_width += VARSIZE_ANY(DatumGetPointer(value));
+
+ /*
+ * If the value is toasted, we want to detoast it just once to
+ * avoid repeated detoastings and resultant excess memory usage
+ * during the comparisons. Also, check to see if the value is
+ * excessively wide, and if so don't detoast at all --- just
+ * ignore the value.
+ */
+ if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
+ {
+ toowide_cnt++;
+ continue;
+ }
+ value = PointerGetDatum(PG_DETOAST_DATUM(value));
+ }
+ else if (is_varwidth)
+ {
+ /* must be cstring */
+ total_width += strlen(DatumGetCString(value)) + 1;
+ }
+
+ /* Add it to the list to be sorted */
+ values[values_cnt].value = value;
+ values[values_cnt].tupno = values_cnt;
+ tupnoLink[values_cnt] = values_cnt;
+ values_cnt++;
+ }
+
+ /* We can only compute real stats if we found some sortable values. */
+ if (values_cnt > 0)
+ {
+ int ndistinct, /* # distinct values in sample */
+ nmultiple, /* # that appear multiple times */
+ num_hist,
+ dups_cnt;
+ int slot_idx = 0;
+ CompareScalarsContext cxt;
+
+ /* Sort the collected values */
+ cxt.ssup = &ssup;
+ cxt.tupnoLink = tupnoLink;
+ qsort_interruptible((void *) values, values_cnt, sizeof(ScalarItem),
+ compare_scalars, (void *) &cxt);
+
+ /*
+ * Now scan the values in order, find the most common ones, and also
+ * accumulate ordering-correlation statistics.
+ *
+ * To determine which are most common, we first have to count the
+ * number of duplicates of each value. The duplicates are adjacent in
+ * the sorted list, so a brute-force approach is to compare successive
+ * datum values until we find two that are not equal. However, that
+ * requires N-1 invocations of the datum comparison routine, which are
+ * completely redundant with work that was done during the sort. (The
+ * sort algorithm must at some point have compared each pair of items
+ * that are adjacent in the sorted order; otherwise it could not know
+ * that it's ordered the pair correctly.) We exploit this by having
+ * compare_scalars remember the highest tupno index that each
+ * ScalarItem has been found equal to. At the end of the sort, a
+ * ScalarItem's tupnoLink will still point to itself if and only if it
+ * is the last item of its group of duplicates (since the group will
+ * be ordered by tupno).
+ */
+ corr_xysum = 0;
+ ndistinct = 0;
+ nmultiple = 0;
+ dups_cnt = 0;
+ for (i = 0; i < values_cnt; i++)
+ {
+ int tupno = values[i].tupno;
+
+ corr_xysum += ((double) i) * ((double) tupno);
+ dups_cnt++;
+ if (tupnoLink[tupno] == tupno)
+ {
+ /* Reached end of duplicates of this value */
+ ndistinct++;
+ if (dups_cnt > 1)
+ {
+ nmultiple++;
+ if (track_cnt < num_mcv ||
+ dups_cnt > track[track_cnt - 1].count)
+ {
+ /*
+ * Found a new item for the mcv list; find its
+ * position, bubbling down old items if needed. Loop
+ * invariant is that j points at an empty/ replaceable
+ * slot.
+ */
+ int j;
+
+ if (track_cnt < num_mcv)
+ track_cnt++;
+ for (j = track_cnt - 1; j > 0; j--)
+ {
+ if (dups_cnt <= track[j - 1].count)
+ break;
+ track[j].count = track[j - 1].count;
+ track[j].first = track[j - 1].first;
+ }
+ track[j].count = dups_cnt;
+ track[j].first = i + 1 - dups_cnt;
+ }
+ }
+ dups_cnt = 0;
+ }
+ }
+
+ stats->stats_valid = true;
+ /* Do the simple null-frac and width stats */
+ stats->stanullfrac = (double) null_cnt / (double) samplerows;
+ if (is_varwidth)
+ stats->stawidth = total_width / (double) nonnull_cnt;
+ else
+ stats->stawidth = stats->attrtype->typlen;
+
+ if (nmultiple == 0)
+ {
+ /*
+ * If we found no repeated non-null values, assume it's a unique
+ * column; but be sure to discount for any nulls we found.
+ */
+ stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
+ }
+ else if (toowide_cnt == 0 && nmultiple == ndistinct)
+ {
+ /*
+ * Every value in the sample appeared more than once. Assume the
+ * column has just these values. (This case is meant to address
+ * columns with small, fixed sets of possible values, such as
+ * boolean or enum columns. If there are any values that appear
+ * just once in the sample, including too-wide values, we should
+ * assume that that's not what we're dealing with.)
+ */
+ stats->stadistinct = ndistinct;
+ }
+ else
+ {
+ /*----------
+ * Estimate the number of distinct values using the estimator
+ * proposed by Haas and Stokes in IBM Research Report RJ 10025:
+ * n*d / (n - f1 + f1*n/N)
+ * where f1 is the number of distinct values that occurred
+ * exactly once in our sample of n rows (from a total of N),
+ * and d is the total number of distinct values in the sample.
+ * This is their Duj1 estimator; the other estimators they
+ * recommend are considerably more complex, and are numerically
+ * very unstable when n is much smaller than N.
+ *
+ * In this calculation, we consider only non-nulls. We used to
+ * include rows with null values in the n and N counts, but that
+ * leads to inaccurate answers in columns with many nulls, and
+ * it's intuitively bogus anyway considering the desired result is
+ * the number of distinct non-null values.
+ *
+ * Overwidth values are assumed to have been distinct.
+ *----------
+ */
+ int f1 = ndistinct - nmultiple + toowide_cnt;
+ int d = f1 + nmultiple;
+ double n = samplerows - null_cnt;
+ double N = totalrows * (1.0 - stats->stanullfrac);
+ double stadistinct;
+
+ /* N == 0 shouldn't happen, but just in case ... */
+ if (N > 0)
+ stadistinct = (n * d) / ((n - f1) + f1 * n / N);
+ else
+ stadistinct = 0;
+
+ /* Clamp to sane range in case of roundoff error */
+ if (stadistinct < d)
+ stadistinct = d;
+ if (stadistinct > N)
+ stadistinct = N;
+ /* And round to integer */
+ stats->stadistinct = floor(stadistinct + 0.5);
+ }
+
+ /*
+ * If we estimated the number of distinct values at more than 10% of
+ * the total row count (a very arbitrary limit), then assume that
+ * stadistinct should scale with the row count rather than be a fixed
+ * value.
+ */
+ if (stats->stadistinct > 0.1 * totalrows)
+ stats->stadistinct = -(stats->stadistinct / totalrows);
+
+ /*
+ * Decide how many values are worth storing as most-common values. If
+ * we are able to generate a complete MCV list (all the values in the
+ * sample will fit, and we think these are all the ones in the table),
+ * then do so. Otherwise, store only those values that are
+ * significantly more common than the values not in the list.
+ *
+ * Note: the first of these cases is meant to address columns with
+ * small, fixed sets of possible values, such as boolean or enum
+ * columns. If we can *completely* represent the column population by
+ * an MCV list that will fit into the stats target, then we should do
+ * so and thus provide the planner with complete information. But if
+ * the MCV list is not complete, it's generally worth being more
+ * selective, and not just filling it all the way up to the stats
+ * target.
+ */
+ if (track_cnt == ndistinct && toowide_cnt == 0 &&
+ stats->stadistinct > 0 &&
+ track_cnt <= num_mcv)
+ {
+ /* Track list includes all values seen, and all will fit */
+ num_mcv = track_cnt;
+ }
+ else
+ {
+ int *mcv_counts;
+
+ /* Incomplete list; decide how many values are worth keeping */
+ if (num_mcv > track_cnt)
+ num_mcv = track_cnt;
+
+ if (num_mcv > 0)
+ {
+ mcv_counts = (int *) palloc(num_mcv * sizeof(int));
+ for (i = 0; i < num_mcv; i++)
+ mcv_counts[i] = track[i].count;
+
+ num_mcv = analyze_mcv_list(mcv_counts, num_mcv,
+ stats->stadistinct,
+ stats->stanullfrac,
+ samplerows, totalrows);
+ }
+ }
+
+ /* Generate MCV slot entry */
+ if (num_mcv > 0)
+ {
+ MemoryContext old_context;
+ Datum *mcv_values;
+ float4 *mcv_freqs;
+
+ /* Must copy the target values into anl_context */
+ old_context = MemoryContextSwitchTo(stats->anl_context);
+ mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+ mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+ for (i = 0; i < num_mcv; i++)
+ {
+ mcv_values[i] = datumCopy(values[track[i].first].value,
+ stats->attrtype->typbyval,
+ stats->attrtype->typlen);
+ mcv_freqs[i] = (double) track[i].count / (double) samplerows;
+ }
+ MemoryContextSwitchTo(old_context);
+
+ stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
+ stats->staop[slot_idx] = mystats->eqopr;
+ stats->stacoll[slot_idx] = stats->attrcollid;
+ stats->stanumbers[slot_idx] = mcv_freqs;
+ stats->numnumbers[slot_idx] = num_mcv;
+ stats->stavalues[slot_idx] = mcv_values;
+ stats->numvalues[slot_idx] = num_mcv;
+
+ /*
+ * Accept the defaults for stats->statypid and others. They have
+ * been set before we were called (see vacuum.h)
+ */
+ slot_idx++;
+ }
+
+ /*
+ * Generate a histogram slot entry if there are at least two distinct
+ * values not accounted for in the MCV list. (This ensures the
+ * histogram won't collapse to empty or a singleton.)
+ */
+ num_hist = ndistinct - num_mcv;
+ if (num_hist > num_bins)
+ num_hist = num_bins + 1;
+ if (num_hist >= 2)
+ {
+ MemoryContext old_context;
+ Datum *hist_values;
+ int nvals;
+ int pos,
+ posfrac,
+ delta,
+ deltafrac;
+
+ /* Sort the MCV items into position order to speed next loop */
+ qsort_interruptible((void *) track, num_mcv, sizeof(ScalarMCVItem),
+ compare_mcvs, NULL);
+
+ /*
+ * Collapse out the MCV items from the values[] array.
+ *
+ * Note we destroy the values[] array here... but we don't need it
+ * for anything more. We do, however, still need values_cnt.
+ * nvals will be the number of remaining entries in values[].
+ */
+ if (num_mcv > 0)
+ {
+ int src,
+ dest;
+ int j;
+
+ src = dest = 0;
+ j = 0; /* index of next interesting MCV item */
+ while (src < values_cnt)
+ {
+ int ncopy;
+
+ if (j < num_mcv)
+ {
+ int first = track[j].first;
+
+ if (src >= first)
+ {
+ /* advance past this MCV item */
+ src = first + track[j].count;
+ j++;
+ continue;
+ }
+ ncopy = first - src;
+ }
+ else
+ ncopy = values_cnt - src;
+ memmove(&values[dest], &values[src],
+ ncopy * sizeof(ScalarItem));
+ src += ncopy;
+ dest += ncopy;
+ }
+ nvals = dest;
+ }
+ else
+ nvals = values_cnt;
+ Assert(nvals >= num_hist);
+
+ /* Must copy the target values into anl_context */
+ old_context = MemoryContextSwitchTo(stats->anl_context);
+ hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
+
+ /*
+ * The object of this loop is to copy the first and last values[]
+ * entries along with evenly-spaced values in between. So the
+ * i'th value is values[(i * (nvals - 1)) / (num_hist - 1)]. But
+ * computing that subscript directly risks integer overflow when
+ * the stats target is more than a couple thousand. Instead we
+ * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking
+ * the integral and fractional parts of the sum separately.
+ */
+ delta = (nvals - 1) / (num_hist - 1);
+ deltafrac = (nvals - 1) % (num_hist - 1);
+ pos = posfrac = 0;
+
+ for (i = 0; i < num_hist; i++)
+ {
+ hist_values[i] = datumCopy(values[pos].value,
+ stats->attrtype->typbyval,
+ stats->attrtype->typlen);
+ pos += delta;
+ posfrac += deltafrac;
+ if (posfrac >= (num_hist - 1))
+ {
+ /* fractional part exceeds 1, carry to integer part */
+ pos++;
+ posfrac -= (num_hist - 1);
+ }
+ }
+
+ MemoryContextSwitchTo(old_context);
+
+ stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
+ stats->staop[slot_idx] = mystats->ltopr;
+ stats->stacoll[slot_idx] = stats->attrcollid;
+ stats->stavalues[slot_idx] = hist_values;
+ stats->numvalues[slot_idx] = num_hist;
+
+ /*
+ * Accept the defaults for stats->statypid and others. They have
+ * been set before we were called (see vacuum.h)
+ */
+ slot_idx++;
+ }
+
+ /* Generate a correlation entry if there are multiple values */
+ if (values_cnt > 1)
+ {
+ MemoryContext old_context;
+ float4 *corrs;
+ double corr_xsum,
+ corr_x2sum;
+
+ /* Must copy the target values into anl_context */
+ old_context = MemoryContextSwitchTo(stats->anl_context);
+ corrs = (float4 *) palloc(sizeof(float4));
+ MemoryContextSwitchTo(old_context);
+
+ /*----------
+ * Since we know the x and y value sets are both
+ * 0, 1, ..., values_cnt-1
+ * we have sum(x) = sum(y) =
+ * (values_cnt-1)*values_cnt / 2
+ * and sum(x^2) = sum(y^2) =
+ * (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
+ *----------
+ */
+ corr_xsum = ((double) (values_cnt - 1)) *
+ ((double) values_cnt) / 2.0;
+ corr_x2sum = ((double) (values_cnt - 1)) *
+ ((double) values_cnt) * (double) (2 * values_cnt - 1) / 6.0;
+
+ /* And the correlation coefficient reduces to */
+ corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
+ (values_cnt * corr_x2sum - corr_xsum * corr_xsum);
+
+ stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
+ stats->staop[slot_idx] = mystats->ltopr;
+ stats->stacoll[slot_idx] = stats->attrcollid;
+ stats->stanumbers[slot_idx] = corrs;
+ stats->numnumbers[slot_idx] = 1;
+ slot_idx++;
+ }
+ }
+ else if (nonnull_cnt > 0)
+ {
+ /* We found some non-null values, but they were all too wide */
+ Assert(nonnull_cnt == toowide_cnt);
+ stats->stats_valid = true;
+ /* Do the simple null-frac and width stats */
+ stats->stanullfrac = (double) null_cnt / (double) samplerows;
+ if (is_varwidth)
+ stats->stawidth = total_width / (double) nonnull_cnt;
+ else
+ stats->stawidth = stats->attrtype->typlen;
+ /* Assume all too-wide values are distinct, so it's a unique column */
+ stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
+ }
+ else if (null_cnt > 0)
+ {
+ /* We found only nulls; assume the column is entirely null */
+ stats->stats_valid = true;
+ stats->stanullfrac = 1.0;
+ if (is_varwidth)
+ stats->stawidth = 0; /* "unknown" */
+ else
+ stats->stawidth = stats->attrtype->typlen;
+ stats->stadistinct = 0.0; /* "unknown" */
+ }
+
+ /* We don't need to bother cleaning up any of our temporary palloc's */
+}
+
+/*
+ * Comparator for sorting ScalarItems
+ *
+ * Aside from sorting the items, we update the tupnoLink[] array
+ * whenever two ScalarItems are found to contain equal datums. The array
+ * is indexed by tupno; for each ScalarItem, it contains the highest
+ * tupno that that item's datum has been found to be equal to. This allows
+ * us to avoid additional comparisons in compute_scalar_stats().
+ */
+static int
+compare_scalars(const void *a, const void *b, void *arg)
+{
+ Datum da = ((const ScalarItem *) a)->value;
+ int ta = ((const ScalarItem *) a)->tupno;
+ Datum db = ((const ScalarItem *) b)->value;
+ int tb = ((const ScalarItem *) b)->tupno;
+ CompareScalarsContext *cxt = (CompareScalarsContext *) arg;
+ int compare;
+
+ compare = ApplySortComparator(da, false, db, false, cxt->ssup);
+ if (compare != 0)
+ return compare;
+
+ /*
+ * The two datums are equal, so update cxt->tupnoLink[].
+ */
+ if (cxt->tupnoLink[ta] < tb)
+ cxt->tupnoLink[ta] = tb;
+ if (cxt->tupnoLink[tb] < ta)
+ cxt->tupnoLink[tb] = ta;
+
+ /*
+ * For equal datums, sort by tupno
+ */
+ return ta - tb;
+}
+
+/*
+ * Comparator for sorting ScalarMCVItems by position
+ */
+static int
+compare_mcvs(const void *a, const void *b, void *arg)
+{
+ int da = ((const ScalarMCVItem *) a)->first;
+ int db = ((const ScalarMCVItem *) b)->first;
+
+ return da - db;
+}
+
+/*
+ * Analyze the list of common values in the sample and decide how many are
+ * worth storing in the table's MCV list.
+ *
+ * mcv_counts is assumed to be a list of the counts of the most common values
+ * seen in the sample, starting with the most common. The return value is the
+ * number that are significantly more common than the values not in the list,
+ * and which are therefore deemed worth storing in the table's MCV list.
+ */
+static int
+analyze_mcv_list(int *mcv_counts,
+ int num_mcv,
+ double stadistinct,
+ double stanullfrac,
+ int samplerows,
+ double totalrows)
+{
+ double ndistinct_table;
+ double sumcount;
+ int i;
+
+ /*
+ * If the entire table was sampled, keep the whole list. This also
+ * protects us against division by zero in the code below.
+ */
+ if (samplerows == totalrows || totalrows <= 1.0)
+ return num_mcv;
+
+ /* Re-extract the estimated number of distinct nonnull values in table */
+ ndistinct_table = stadistinct;
+ if (ndistinct_table < 0)
+ ndistinct_table = -ndistinct_table * totalrows;
+
+ /*
+ * Exclude the least common values from the MCV list, if they are not
+ * significantly more common than the estimated selectivity they would
+ * have if they weren't in the list. All non-MCV values are assumed to be
+ * equally common, after taking into account the frequencies of all the
+ * values in the MCV list and the number of nulls (c.f. eqsel()).
+ *
+ * Here sumcount tracks the total count of all but the last (least common)
+ * value in the MCV list, allowing us to determine the effect of excluding
+ * that value from the list.
+ *
+ * Note that we deliberately do this by removing values from the full
+ * list, rather than starting with an empty list and adding values,
+ * because the latter approach can fail to add any values if all the most
+ * common values have around the same frequency and make up the majority
+ * of the table, so that the overall average frequency of all values is
+ * roughly the same as that of the common values. This would lead to any
+ * uncommon values being significantly overestimated.
+ */
+ sumcount = 0.0;
+ for (i = 0; i < num_mcv - 1; i++)
+ sumcount += mcv_counts[i];
+
+ while (num_mcv > 0)
+ {
+ double selec,
+ otherdistinct,
+ N,
+ n,
+ K,
+ variance,
+ stddev;
+
+ /*
+ * Estimated selectivity the least common value would have if it
+ * wasn't in the MCV list (c.f. eqsel()).
+ */
+ selec = 1.0 - sumcount / samplerows - stanullfrac;
+ if (selec < 0.0)
+ selec = 0.0;
+ if (selec > 1.0)
+ selec = 1.0;
+ otherdistinct = ndistinct_table - (num_mcv - 1);
+ if (otherdistinct > 1)
+ selec /= otherdistinct;
+
+ /*
+ * If the value is kept in the MCV list, its population frequency is
+ * assumed to equal its sample frequency. We use the lower end of a
+ * textbook continuity-corrected Wald-type confidence interval to
+ * determine if that is significantly more common than the non-MCV
+ * frequency --- specifically we assume the population frequency is
+ * highly likely to be within around 2 standard errors of the sample
+ * frequency, which equates to an interval of 2 standard deviations
+ * either side of the sample count, plus an additional 0.5 for the
+ * continuity correction. Since we are sampling without replacement,
+ * this is a hypergeometric distribution.
+ *
+ * XXX: Empirically, this approach seems to work quite well, but it
+ * may be worth considering more advanced techniques for estimating
+ * the confidence interval of the hypergeometric distribution.
+ */
+ N = totalrows;
+ n = samplerows;
+ K = N * mcv_counts[num_mcv - 1] / n;
+ variance = n * K * (N - K) * (N - n) / (N * N * (N - 1));
+ stddev = sqrt(variance);
+
+ if (mcv_counts[num_mcv - 1] > selec * samplerows + 2 * stddev + 0.5)
+ {
+ /*
+ * The value is significantly more common than the non-MCV
+ * selectivity would suggest. Keep it, and all the other more
+ * common values in the list.
+ */
+ break;
+ }
+ else
+ {
+ /* Discard this value and consider the next least common value */
+ num_mcv--;
+ if (num_mcv == 0)
+ break;
+ sumcount -= mcv_counts[num_mcv - 1];
+ }
+ }
+ return num_mcv;
+}
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
new file mode 100644
index 0000000..3e1b92d
--- /dev/null
+++ b/src/backend/commands/async.c
@@ -0,0 +1,2446 @@
+/*-------------------------------------------------------------------------
+ *
+ * async.c
+ * Asynchronous notification: NOTIFY, LISTEN, UNLISTEN
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/commands/async.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*-------------------------------------------------------------------------
+ * Async Notification Model as of 9.0:
+ *
+ * 1. Multiple backends on same machine. Multiple backends listening on
+ * several channels. (Channels are also called "conditions" in other
+ * parts of the code.)
+ *
+ * 2. There is one central queue in disk-based storage (directory pg_notify/),
+ * with actively-used pages mapped into shared memory by the slru.c module.
+ * All notification messages are placed in the queue and later read out
+ * by listening backends.
+ *
+ * There is no central knowledge of which backend listens on which channel;
+ * every backend has its own list of interesting channels.
+ *
+ * Although there is only one queue, notifications are treated as being
+ * database-local; this is done by including the sender's database OID
+ * in each notification message. Listening backends ignore messages
+ * that don't match their database OID. This is important because it
+ * ensures senders and receivers have the same database encoding and won't
+ * misinterpret non-ASCII text in the channel name or payload string.
+ *
+ * Since notifications are not expected to survive database crashes,
+ * we can simply clean out the pg_notify data at any reboot, and there
+ * is no need for WAL support or fsync'ing.
+ *
+ * 3. Every backend that is listening on at least one channel registers by
+ * entering its PID into the array in AsyncQueueControl. It then scans all
+ * incoming notifications in the central queue and first compares the
+ * database OID of the notification with its own database OID and then
+ * compares the notified channel with the list of channels that it listens
+ * to. In case there is a match it delivers the notification event to its
+ * frontend. Non-matching events are simply skipped.
+ *
+ * 4. The NOTIFY statement (routine Async_Notify) stores the notification in
+ * a backend-local list which will not be processed until transaction end.
+ *
+ * Duplicate notifications from the same transaction are sent out as one
+ * notification only. This is done to save work when for example a trigger
+ * on a 2 million row table fires a notification for each row that has been
+ * changed. If the application needs to receive every single notification
+ * that has been sent, it can easily add some unique string into the extra
+ * payload parameter.
+ *
+ * When the transaction is ready to commit, PreCommit_Notify() adds the
+ * pending notifications to the head of the queue. The head pointer of the
+ * queue always points to the next free position and a position is just a
+ * page number and the offset in that page. This is done before marking the
+ * transaction as committed in clog. If we run into problems writing the
+ * notifications, we can still call elog(ERROR, ...) and the transaction
+ * will roll back.
+ *
+ * Once we have put all of the notifications into the queue, we return to
+ * CommitTransaction() which will then do the actual transaction commit.
+ *
+ * After commit we are called another time (AtCommit_Notify()). Here we
+ * make any actual updates to the effective listen state (listenChannels).
+ * Then we signal any backends that may be interested in our messages
+ * (including our own backend, if listening). This is done by
+ * SignalBackends(), which scans the list of listening backends and sends a
+ * PROCSIG_NOTIFY_INTERRUPT signal to every listening backend (we don't
+ * know which backend is listening on which channel so we must signal them
+ * all). We can exclude backends that are already up to date, though, and
+ * we can also exclude backends that are in other databases (unless they
+ * are way behind and should be kicked to make them advance their
+ * pointers).
+ *
+ * Finally, after we are out of the transaction altogether and about to go
+ * idle, we scan the queue for messages that need to be sent to our
+ * frontend (which might be notifies from other backends, or self-notifies
+ * from our own). This step is not part of the CommitTransaction sequence
+ * for two important reasons. First, we could get errors while sending
+ * data to our frontend, and it's really bad for errors to happen in
+ * post-commit cleanup. Second, in cases where a procedure issues commits
+ * within a single frontend command, we don't want to send notifies to our
+ * frontend until the command is done; but notifies to other backends
+ * should go out immediately after each commit.
+ *
+ * 5. Upon receipt of a PROCSIG_NOTIFY_INTERRUPT signal, the signal handler
+ * sets the process's latch, which triggers the event to be processed
+ * immediately if this backend is idle (i.e., it is waiting for a frontend
+ * command and is not within a transaction block. C.f.
+ * ProcessClientReadInterrupt()). Otherwise the handler may only set a
+ * flag, which will cause the processing to occur just before we next go
+ * idle.
+ *
+ * Inbound-notify processing consists of reading all of the notifications
+ * that have arrived since scanning last time. We read every notification
+ * until we reach either a notification from an uncommitted transaction or
+ * the head pointer's position.
+ *
+ * 6. To avoid SLRU wraparound and limit disk space consumption, the tail
+ * pointer needs to be advanced so that old pages can be truncated.
+ * This is relatively expensive (notably, it requires an exclusive lock),
+ * so we don't want to do it often. We make sending backends do this work
+ * if they advanced the queue head into a new page, but only once every
+ * QUEUE_CLEANUP_DELAY pages.
+ *
+ * An application that listens on the same channel it notifies will get
+ * NOTIFY messages for its own NOTIFYs. These can be ignored, if not useful,
+ * by comparing be_pid in the NOTIFY message to the application's own backend's
+ * PID. (As of FE/BE protocol 2.0, the backend's PID is provided to the
+ * frontend during startup.) The above design guarantees that notifies from
+ * other backends will never be missed by ignoring self-notifies.
+ *
+ * The amount of shared memory used for notify management (NUM_NOTIFY_BUFFERS)
+ * can be varied without affecting anything but performance. The maximum
+ * amount of notification data that can be queued at one time is determined
+ * by slru.c's wraparound limit; see QUEUE_MAX_PAGE below.
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <limits.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include "access/parallel.h"
+#include "access/slru.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "catalog/pg_database.h"
+#include "commands/async.h"
+#include "common/hashfn.h"
+#include "funcapi.h"
+#include "libpq/libpq.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "storage/ipc.h"
+#include "storage/lmgr.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "storage/procsignal.h"
+#include "storage/sinval.h"
+#include "tcop/tcopprot.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/ps_status.h"
+#include "utils/snapmgr.h"
+#include "utils/timestamp.h"
+
+
+/*
+ * Maximum size of a NOTIFY payload, including terminating NULL. This
+ * must be kept small enough so that a notification message fits on one
+ * SLRU page. The magic fudge factor here is noncritical as long as it's
+ * more than AsyncQueueEntryEmptySize --- we make it significantly bigger
+ * than that, so changes in that data structure won't affect user-visible
+ * restrictions.
+ */
+#define NOTIFY_PAYLOAD_MAX_LENGTH (BLCKSZ - NAMEDATALEN - 128)
+
+/*
+ * Struct representing an entry in the global notify queue
+ *
+ * This struct declaration has the maximal length, but in a real queue entry
+ * the data area is only big enough for the actual channel and payload strings
+ * (each null-terminated). AsyncQueueEntryEmptySize is the minimum possible
+ * entry size, if both channel and payload strings are empty (but note it
+ * doesn't include alignment padding).
+ *
+ * The "length" field should always be rounded up to the next QUEUEALIGN
+ * multiple so that all fields are properly aligned.
+ */
+typedef struct AsyncQueueEntry
+{
+ int length; /* total allocated length of entry */
+ Oid dboid; /* sender's database OID */
+ TransactionId xid; /* sender's XID */
+ int32 srcPid; /* sender's PID */
+ char data[NAMEDATALEN + NOTIFY_PAYLOAD_MAX_LENGTH];
+} AsyncQueueEntry;
+
+/* Currently, no field of AsyncQueueEntry requires more than int alignment */
+#define QUEUEALIGN(len) INTALIGN(len)
+
+#define AsyncQueueEntryEmptySize (offsetof(AsyncQueueEntry, data) + 2)
+
+/*
+ * Struct describing a queue position, and assorted macros for working with it
+ */
+typedef struct QueuePosition
+{
+ int page; /* SLRU page number */
+ int offset; /* byte offset within page */
+} QueuePosition;
+
+#define QUEUE_POS_PAGE(x) ((x).page)
+#define QUEUE_POS_OFFSET(x) ((x).offset)
+
+#define SET_QUEUE_POS(x,y,z) \
+ do { \
+ (x).page = (y); \
+ (x).offset = (z); \
+ } while (0)
+
+#define QUEUE_POS_EQUAL(x,y) \
+ ((x).page == (y).page && (x).offset == (y).offset)
+
+#define QUEUE_POS_IS_ZERO(x) \
+ ((x).page == 0 && (x).offset == 0)
+
+/* choose logically smaller QueuePosition */
+#define QUEUE_POS_MIN(x,y) \
+ (asyncQueuePagePrecedes((x).page, (y).page) ? (x) : \
+ (x).page != (y).page ? (y) : \
+ (x).offset < (y).offset ? (x) : (y))
+
+/* choose logically larger QueuePosition */
+#define QUEUE_POS_MAX(x,y) \
+ (asyncQueuePagePrecedes((x).page, (y).page) ? (y) : \
+ (x).page != (y).page ? (x) : \
+ (x).offset > (y).offset ? (x) : (y))
+
+/*
+ * Parameter determining how often we try to advance the tail pointer:
+ * we do that after every QUEUE_CLEANUP_DELAY pages of NOTIFY data. This is
+ * also the distance by which a backend in another database needs to be
+ * behind before we'll decide we need to wake it up to advance its pointer.
+ *
+ * Resist the temptation to make this really large. While that would save
+ * work in some places, it would add cost in others. In particular, this
+ * should likely be less than NUM_NOTIFY_BUFFERS, to ensure that backends
+ * catch up before the pages they'll need to read fall out of SLRU cache.
+ */
+#define QUEUE_CLEANUP_DELAY 4
+
+/*
+ * Struct describing a listening backend's status
+ */
+typedef struct QueueBackendStatus
+{
+ int32 pid; /* either a PID or InvalidPid */
+ Oid dboid; /* backend's database OID, or InvalidOid */
+ BackendId nextListener; /* id of next listener, or InvalidBackendId */
+ QueuePosition pos; /* backend has read queue up to here */
+} QueueBackendStatus;
+
+/*
+ * Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff)
+ *
+ * The AsyncQueueControl structure is protected by the NotifyQueueLock and
+ * NotifyQueueTailLock.
+ *
+ * When holding NotifyQueueLock in SHARED mode, backends may only inspect
+ * their own entries as well as the head and tail pointers. Consequently we
+ * can allow a backend to update its own record while holding only SHARED lock
+ * (since no other backend will inspect it).
+ *
+ * When holding NotifyQueueLock in EXCLUSIVE mode, backends can inspect the
+ * entries of other backends and also change the head pointer. When holding
+ * both NotifyQueueLock and NotifyQueueTailLock in EXCLUSIVE mode, backends
+ * can change the tail pointers.
+ *
+ * NotifySLRULock is used as the control lock for the pg_notify SLRU buffers.
+ * In order to avoid deadlocks, whenever we need multiple locks, we first get
+ * NotifyQueueTailLock, then NotifyQueueLock, and lastly NotifySLRULock.
+ *
+ * Each backend uses the backend[] array entry with index equal to its
+ * BackendId (which can range from 1 to MaxBackends). We rely on this to make
+ * SendProcSignal fast.
+ *
+ * The backend[] array entries for actively-listening backends are threaded
+ * together using firstListener and the nextListener links, so that we can
+ * scan them without having to iterate over inactive entries. We keep this
+ * list in order by BackendId so that the scan is cache-friendly when there
+ * are many active entries.
+ */
+typedef struct AsyncQueueControl
+{
+ QueuePosition head; /* head points to the next free location */
+ QueuePosition tail; /* tail must be <= the queue position of every
+ * listening backend */
+ int stopPage; /* oldest unrecycled page; must be <=
+ * tail.page */
+ BackendId firstListener; /* id of first listener, or InvalidBackendId */
+ TimestampTz lastQueueFillWarn; /* time of last queue-full msg */
+ QueueBackendStatus backend[FLEXIBLE_ARRAY_MEMBER];
+ /* backend[0] is not used; used entries are from [1] to [MaxBackends] */
+} AsyncQueueControl;
+
+static AsyncQueueControl *asyncQueueControl;
+
+#define QUEUE_HEAD (asyncQueueControl->head)
+#define QUEUE_TAIL (asyncQueueControl->tail)
+#define QUEUE_STOP_PAGE (asyncQueueControl->stopPage)
+#define QUEUE_FIRST_LISTENER (asyncQueueControl->firstListener)
+#define QUEUE_BACKEND_PID(i) (asyncQueueControl->backend[i].pid)
+#define QUEUE_BACKEND_DBOID(i) (asyncQueueControl->backend[i].dboid)
+#define QUEUE_NEXT_LISTENER(i) (asyncQueueControl->backend[i].nextListener)
+#define QUEUE_BACKEND_POS(i) (asyncQueueControl->backend[i].pos)
+
+/*
+ * The SLRU buffer area through which we access the notification queue
+ */
+static SlruCtlData NotifyCtlData;
+
+#define NotifyCtl (&NotifyCtlData)
+#define QUEUE_PAGESIZE BLCKSZ
+#define QUEUE_FULL_WARN_INTERVAL 5000 /* warn at most once every 5s */
+
+/*
+ * Use segments 0000 through FFFF. Each contains SLRU_PAGES_PER_SEGMENT pages
+ * which gives us the pages from 0 to SLRU_PAGES_PER_SEGMENT * 0x10000 - 1.
+ * We could use as many segments as SlruScanDirectory() allows, but this gives
+ * us so much space already that it doesn't seem worth the trouble.
+ *
+ * The most data we can have in the queue at a time is QUEUE_MAX_PAGE/2
+ * pages, because more than that would confuse slru.c into thinking there
+ * was a wraparound condition. With the default BLCKSZ this means there
+ * can be up to 8GB of queued-and-not-read data.
+ *
+ * Note: it's possible to redefine QUEUE_MAX_PAGE with a smaller multiple of
+ * SLRU_PAGES_PER_SEGMENT, for easier testing of queue-full behaviour.
+ */
+#define QUEUE_MAX_PAGE (SLRU_PAGES_PER_SEGMENT * 0x10000 - 1)
+
+/*
+ * listenChannels identifies the channels we are actually listening to
+ * (ie, have committed a LISTEN on). It is a simple list of channel names,
+ * allocated in TopMemoryContext.
+ */
+static List *listenChannels = NIL; /* list of C strings */
+
+/*
+ * State for pending LISTEN/UNLISTEN actions consists of an ordered list of
+ * all actions requested in the current transaction. As explained above,
+ * we don't actually change listenChannels until we reach transaction commit.
+ *
+ * The list is kept in CurTransactionContext. In subtransactions, each
+ * subtransaction has its own list in its own CurTransactionContext, but
+ * successful subtransactions attach their lists to their parent's list.
+ * Failed subtransactions simply discard their lists.
+ */
+typedef enum
+{
+ LISTEN_LISTEN,
+ LISTEN_UNLISTEN,
+ LISTEN_UNLISTEN_ALL
+} ListenActionKind;
+
+typedef struct
+{
+ ListenActionKind action;
+ char channel[FLEXIBLE_ARRAY_MEMBER]; /* nul-terminated string */
+} ListenAction;
+
+typedef struct ActionList
+{
+ int nestingLevel; /* current transaction nesting depth */
+ List *actions; /* list of ListenAction structs */
+ struct ActionList *upper; /* details for upper transaction levels */
+} ActionList;
+
+static ActionList *pendingActions = NULL;
+
+/*
+ * State for outbound notifies consists of a list of all channels+payloads
+ * NOTIFYed in the current transaction. We do not actually perform a NOTIFY
+ * until and unless the transaction commits. pendingNotifies is NULL if no
+ * NOTIFYs have been done in the current (sub) transaction.
+ *
+ * We discard duplicate notify events issued in the same transaction.
+ * Hence, in addition to the list proper (which we need to track the order
+ * of the events, since we guarantee to deliver them in order), we build a
+ * hash table which we can probe to detect duplicates. Since building the
+ * hash table is somewhat expensive, we do so only once we have at least
+ * MIN_HASHABLE_NOTIFIES events queued in the current (sub) transaction;
+ * before that we just scan the events linearly.
+ *
+ * The list is kept in CurTransactionContext. In subtransactions, each
+ * subtransaction has its own list in its own CurTransactionContext, but
+ * successful subtransactions add their entries to their parent's list.
+ * Failed subtransactions simply discard their lists. Since these lists
+ * are independent, there may be notify events in a subtransaction's list
+ * that duplicate events in some ancestor (sub) transaction; we get rid of
+ * the dups when merging the subtransaction's list into its parent's.
+ *
+ * Note: the action and notify lists do not interact within a transaction.
+ * In particular, if a transaction does NOTIFY and then LISTEN on the same
+ * condition name, it will get a self-notify at commit. This is a bit odd
+ * but is consistent with our historical behavior.
+ */
+typedef struct Notification
+{
+ uint16 channel_len; /* length of channel-name string */
+ uint16 payload_len; /* length of payload string */
+ /* null-terminated channel name, then null-terminated payload follow */
+ char data[FLEXIBLE_ARRAY_MEMBER];
+} Notification;
+
+typedef struct NotificationList
+{
+ int nestingLevel; /* current transaction nesting depth */
+ List *events; /* list of Notification structs */
+ HTAB *hashtab; /* hash of NotificationHash structs, or NULL */
+ struct NotificationList *upper; /* details for upper transaction levels */
+} NotificationList;
+
+#define MIN_HASHABLE_NOTIFIES 16 /* threshold to build hashtab */
+
+typedef struct NotificationHash
+{
+ Notification *event; /* => the actual Notification struct */
+} NotificationHash;
+
+static NotificationList *pendingNotifies = NULL;
+
+/*
+ * Inbound notifications are initially processed by HandleNotifyInterrupt(),
+ * called from inside a signal handler. That just sets the
+ * notifyInterruptPending flag and sets the process
+ * latch. ProcessNotifyInterrupt() will then be called whenever it's safe to
+ * actually deal with the interrupt.
+ */
+volatile sig_atomic_t notifyInterruptPending = false;
+
+/* True if we've registered an on_shmem_exit cleanup */
+static bool unlistenExitRegistered = false;
+
+/* True if we're currently registered as a listener in asyncQueueControl */
+static bool amRegisteredListener = false;
+
+/* have we advanced to a page that's a multiple of QUEUE_CLEANUP_DELAY? */
+static bool tryAdvanceTail = false;
+
+/* GUC parameter */
+bool Trace_notify = false;
+
+/* local function prototypes */
+static int asyncQueuePageDiff(int p, int q);
+static bool asyncQueuePagePrecedes(int p, int q);
+static void queue_listen(ListenActionKind action, const char *channel);
+static void Async_UnlistenOnExit(int code, Datum arg);
+static void Exec_ListenPreCommit(void);
+static void Exec_ListenCommit(const char *channel);
+static void Exec_UnlistenCommit(const char *channel);
+static void Exec_UnlistenAllCommit(void);
+static bool IsListeningOn(const char *channel);
+static void asyncQueueUnregister(void);
+static bool asyncQueueIsFull(void);
+static bool asyncQueueAdvance(volatile QueuePosition *position, int entryLength);
+static void asyncQueueNotificationToEntry(Notification *n, AsyncQueueEntry *qe);
+static ListCell *asyncQueueAddEntries(ListCell *nextNotify);
+static double asyncQueueUsage(void);
+static void asyncQueueFillWarning(void);
+static void SignalBackends(void);
+static void asyncQueueReadAllNotifications(void);
+static bool asyncQueueProcessPageEntries(volatile QueuePosition *current,
+ QueuePosition stop,
+ char *page_buffer,
+ Snapshot snapshot);
+static void asyncQueueAdvanceTail(void);
+static void ProcessIncomingNotify(bool flush);
+static bool AsyncExistsPendingNotify(Notification *n);
+static void AddEventToPendingNotifies(Notification *n);
+static uint32 notification_hash(const void *key, Size keysize);
+static int notification_match(const void *key1, const void *key2, Size keysize);
+static void ClearPendingActionsAndNotifies(void);
+
+/*
+ * Compute the difference between two queue page numbers (i.e., p - q),
+ * accounting for wraparound.
+ */
+static int
+asyncQueuePageDiff(int p, int q)
+{
+ int diff;
+
+ /*
+ * We have to compare modulo (QUEUE_MAX_PAGE+1)/2. Both inputs should be
+ * in the range 0..QUEUE_MAX_PAGE.
+ */
+ Assert(p >= 0 && p <= QUEUE_MAX_PAGE);
+ Assert(q >= 0 && q <= QUEUE_MAX_PAGE);
+
+ diff = p - q;
+ if (diff >= ((QUEUE_MAX_PAGE + 1) / 2))
+ diff -= QUEUE_MAX_PAGE + 1;
+ else if (diff < -((QUEUE_MAX_PAGE + 1) / 2))
+ diff += QUEUE_MAX_PAGE + 1;
+ return diff;
+}
+
+/*
+ * Is p < q, accounting for wraparound?
+ *
+ * Since asyncQueueIsFull() blocks creation of a page that could precede any
+ * extant page, we need not assess entries within a page.
+ */
+static bool
+asyncQueuePagePrecedes(int p, int q)
+{
+ return asyncQueuePageDiff(p, q) < 0;
+}
+
+/*
+ * Report space needed for our shared memory area
+ */
+Size
+AsyncShmemSize(void)
+{
+ Size size;
+
+ /* This had better match AsyncShmemInit */
+ size = mul_size(MaxBackends + 1, sizeof(QueueBackendStatus));
+ size = add_size(size, offsetof(AsyncQueueControl, backend));
+
+ size = add_size(size, SimpleLruShmemSize(NUM_NOTIFY_BUFFERS, 0));
+
+ return size;
+}
+
+/*
+ * Initialize our shared memory area
+ */
+void
+AsyncShmemInit(void)
+{
+ bool found;
+ Size size;
+
+ /*
+ * Create or attach to the AsyncQueueControl structure.
+ *
+ * The used entries in the backend[] array run from 1 to MaxBackends; the
+ * zero'th entry is unused but must be allocated.
+ */
+ size = mul_size(MaxBackends + 1, sizeof(QueueBackendStatus));
+ size = add_size(size, offsetof(AsyncQueueControl, backend));
+
+ asyncQueueControl = (AsyncQueueControl *)
+ ShmemInitStruct("Async Queue Control", size, &found);
+
+ if (!found)
+ {
+ /* First time through, so initialize it */
+ SET_QUEUE_POS(QUEUE_HEAD, 0, 0);
+ SET_QUEUE_POS(QUEUE_TAIL, 0, 0);
+ QUEUE_STOP_PAGE = 0;
+ QUEUE_FIRST_LISTENER = InvalidBackendId;
+ asyncQueueControl->lastQueueFillWarn = 0;
+ /* zero'th entry won't be used, but let's initialize it anyway */
+ for (int i = 0; i <= MaxBackends; i++)
+ {
+ QUEUE_BACKEND_PID(i) = InvalidPid;
+ QUEUE_BACKEND_DBOID(i) = InvalidOid;
+ QUEUE_NEXT_LISTENER(i) = InvalidBackendId;
+ SET_QUEUE_POS(QUEUE_BACKEND_POS(i), 0, 0);
+ }
+ }
+
+ /*
+ * Set up SLRU management of the pg_notify data.
+ */
+ NotifyCtl->PagePrecedes = asyncQueuePagePrecedes;
+ SimpleLruInit(NotifyCtl, "Notify", NUM_NOTIFY_BUFFERS, 0,
+ NotifySLRULock, "pg_notify", LWTRANCHE_NOTIFY_BUFFER,
+ SYNC_HANDLER_NONE);
+
+ if (!found)
+ {
+ /*
+ * During start or reboot, clean out the pg_notify directory.
+ */
+ (void) SlruScanDirectory(NotifyCtl, SlruScanDirCbDeleteAll, NULL);
+ }
+}
+
+
+/*
+ * pg_notify -
+ * SQL function to send a notification event
+ */
+Datum
+pg_notify(PG_FUNCTION_ARGS)
+{
+ const char *channel;
+ const char *payload;
+
+ if (PG_ARGISNULL(0))
+ channel = "";
+ else
+ channel = text_to_cstring(PG_GETARG_TEXT_PP(0));
+
+ if (PG_ARGISNULL(1))
+ payload = "";
+ else
+ payload = text_to_cstring(PG_GETARG_TEXT_PP(1));
+
+ /* For NOTIFY as a statement, this is checked in ProcessUtility */
+ PreventCommandDuringRecovery("NOTIFY");
+
+ Async_Notify(channel, payload);
+
+ PG_RETURN_VOID();
+}
+
+
+/*
+ * Async_Notify
+ *
+ * This is executed by the SQL notify command.
+ *
+ * Adds the message to the list of pending notifies.
+ * Actual notification happens during transaction commit.
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ */
+void
+Async_Notify(const char *channel, const char *payload)
+{
+ int my_level = GetCurrentTransactionNestLevel();
+ size_t channel_len;
+ size_t payload_len;
+ Notification *n;
+ MemoryContext oldcontext;
+
+ if (IsParallelWorker())
+ elog(ERROR, "cannot send notifications from a parallel worker");
+
+ if (Trace_notify)
+ elog(DEBUG1, "Async_Notify(%s)", channel);
+
+ channel_len = channel ? strlen(channel) : 0;
+ payload_len = payload ? strlen(payload) : 0;
+
+ /* a channel name must be specified */
+ if (channel_len == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("channel name cannot be empty")));
+
+ /* enforce length limits */
+ if (channel_len >= NAMEDATALEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("channel name too long")));
+
+ if (payload_len >= NOTIFY_PAYLOAD_MAX_LENGTH)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("payload string too long")));
+
+ /*
+ * We must construct the Notification entry, even if we end up not using
+ * it, in order to compare it cheaply to existing list entries.
+ *
+ * The notification list needs to live until end of transaction, so store
+ * it in the transaction context.
+ */
+ oldcontext = MemoryContextSwitchTo(CurTransactionContext);
+
+ n = (Notification *) palloc(offsetof(Notification, data) +
+ channel_len + payload_len + 2);
+ n->channel_len = channel_len;
+ n->payload_len = payload_len;
+ strcpy(n->data, channel);
+ if (payload)
+ strcpy(n->data + channel_len + 1, payload);
+ else
+ n->data[channel_len + 1] = '\0';
+
+ if (pendingNotifies == NULL || my_level > pendingNotifies->nestingLevel)
+ {
+ NotificationList *notifies;
+
+ /*
+ * First notify event in current (sub)xact. Note that we allocate the
+ * NotificationList in TopTransactionContext; the nestingLevel might
+ * get changed later by AtSubCommit_Notify.
+ */
+ notifies = (NotificationList *)
+ MemoryContextAlloc(TopTransactionContext,
+ sizeof(NotificationList));
+ notifies->nestingLevel = my_level;
+ notifies->events = list_make1(n);
+ /* We certainly don't need a hashtable yet */
+ notifies->hashtab = NULL;
+ notifies->upper = pendingNotifies;
+ pendingNotifies = notifies;
+ }
+ else
+ {
+ /* Now check for duplicates */
+ if (AsyncExistsPendingNotify(n))
+ {
+ /* It's a dup, so forget it */
+ pfree(n);
+ MemoryContextSwitchTo(oldcontext);
+ return;
+ }
+
+ /* Append more events to existing list */
+ AddEventToPendingNotifies(n);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * queue_listen
+ * Common code for listen, unlisten, unlisten all commands.
+ *
+ * Adds the request to the list of pending actions.
+ * Actual update of the listenChannels list happens during transaction
+ * commit.
+ */
+static void
+queue_listen(ListenActionKind action, const char *channel)
+{
+ MemoryContext oldcontext;
+ ListenAction *actrec;
+ int my_level = GetCurrentTransactionNestLevel();
+
+ /*
+ * Unlike Async_Notify, we don't try to collapse out duplicates. It would
+ * be too complicated to ensure we get the right interactions of
+ * conflicting LISTEN/UNLISTEN/UNLISTEN_ALL, and it's unlikely that there
+ * would be any performance benefit anyway in sane applications.
+ */
+ oldcontext = MemoryContextSwitchTo(CurTransactionContext);
+
+ /* space for terminating null is included in sizeof(ListenAction) */
+ actrec = (ListenAction *) palloc(offsetof(ListenAction, channel) +
+ strlen(channel) + 1);
+ actrec->action = action;
+ strcpy(actrec->channel, channel);
+
+ if (pendingActions == NULL || my_level > pendingActions->nestingLevel)
+ {
+ ActionList *actions;
+
+ /*
+ * First action in current sub(xact). Note that we allocate the
+ * ActionList in TopTransactionContext; the nestingLevel might get
+ * changed later by AtSubCommit_Notify.
+ */
+ actions = (ActionList *)
+ MemoryContextAlloc(TopTransactionContext, sizeof(ActionList));
+ actions->nestingLevel = my_level;
+ actions->actions = list_make1(actrec);
+ actions->upper = pendingActions;
+ pendingActions = actions;
+ }
+ else
+ pendingActions->actions = lappend(pendingActions->actions, actrec);
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Async_Listen
+ *
+ * This is executed by the SQL listen command.
+ */
+void
+Async_Listen(const char *channel)
+{
+ if (Trace_notify)
+ elog(DEBUG1, "Async_Listen(%s,%d)", channel, MyProcPid);
+
+ queue_listen(LISTEN_LISTEN, channel);
+}
+
+/*
+ * Async_Unlisten
+ *
+ * This is executed by the SQL unlisten command.
+ */
+void
+Async_Unlisten(const char *channel)
+{
+ if (Trace_notify)
+ elog(DEBUG1, "Async_Unlisten(%s,%d)", channel, MyProcPid);
+
+ /* If we couldn't possibly be listening, no need to queue anything */
+ if (pendingActions == NULL && !unlistenExitRegistered)
+ return;
+
+ queue_listen(LISTEN_UNLISTEN, channel);
+}
+
+/*
+ * Async_UnlistenAll
+ *
+ * This is invoked by UNLISTEN * command, and also at backend exit.
+ */
+void
+Async_UnlistenAll(void)
+{
+ if (Trace_notify)
+ elog(DEBUG1, "Async_UnlistenAll(%d)", MyProcPid);
+
+ /* If we couldn't possibly be listening, no need to queue anything */
+ if (pendingActions == NULL && !unlistenExitRegistered)
+ return;
+
+ queue_listen(LISTEN_UNLISTEN_ALL, "");
+}
+
+/*
+ * SQL function: return a set of the channel names this backend is actively
+ * listening to.
+ *
+ * Note: this coding relies on the fact that the listenChannels list cannot
+ * change within a transaction.
+ */
+Datum
+pg_listening_channels(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ if (funcctx->call_cntr < list_length(listenChannels))
+ {
+ char *channel = (char *) list_nth(listenChannels,
+ funcctx->call_cntr);
+
+ SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(channel));
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * Async_UnlistenOnExit
+ *
+ * This is executed at backend exit if we have done any LISTENs in this
+ * backend. It might not be necessary anymore, if the user UNLISTENed
+ * everything, but we don't try to detect that case.
+ */
+static void
+Async_UnlistenOnExit(int code, Datum arg)
+{
+ Exec_UnlistenAllCommit();
+ asyncQueueUnregister();
+}
+
+/*
+ * AtPrepare_Notify
+ *
+ * This is called at the prepare phase of a two-phase
+ * transaction. Save the state for possible commit later.
+ */
+void
+AtPrepare_Notify(void)
+{
+ /* It's not allowed to have any pending LISTEN/UNLISTEN/NOTIFY actions */
+ if (pendingActions || pendingNotifies)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot PREPARE a transaction that has executed LISTEN, UNLISTEN, or NOTIFY")));
+}
+
+/*
+ * PreCommit_Notify
+ *
+ * This is called at transaction commit, before actually committing to
+ * clog.
+ *
+ * If there are pending LISTEN actions, make sure we are listed in the
+ * shared-memory listener array. This must happen before commit to
+ * ensure we don't miss any notifies from transactions that commit
+ * just after ours.
+ *
+ * If there are outbound notify requests in the pendingNotifies list,
+ * add them to the global queue. We do that before commit so that
+ * we can still throw error if we run out of queue space.
+ */
+void
+PreCommit_Notify(void)
+{
+ ListCell *p;
+
+ if (!pendingActions && !pendingNotifies)
+ return; /* no relevant statements in this xact */
+
+ if (Trace_notify)
+ elog(DEBUG1, "PreCommit_Notify");
+
+ /* Preflight for any pending listen/unlisten actions */
+ if (pendingActions != NULL)
+ {
+ foreach(p, pendingActions->actions)
+ {
+ ListenAction *actrec = (ListenAction *) lfirst(p);
+
+ switch (actrec->action)
+ {
+ case LISTEN_LISTEN:
+ Exec_ListenPreCommit();
+ break;
+ case LISTEN_UNLISTEN:
+ /* there is no Exec_UnlistenPreCommit() */
+ break;
+ case LISTEN_UNLISTEN_ALL:
+ /* there is no Exec_UnlistenAllPreCommit() */
+ break;
+ }
+ }
+ }
+
+ /* Queue any pending notifies (must happen after the above) */
+ if (pendingNotifies)
+ {
+ ListCell *nextNotify;
+
+ /*
+ * Make sure that we have an XID assigned to the current transaction.
+ * GetCurrentTransactionId is cheap if we already have an XID, but not
+ * so cheap if we don't, and we'd prefer not to do that work while
+ * holding NotifyQueueLock.
+ */
+ (void) GetCurrentTransactionId();
+
+ /*
+ * Serialize writers by acquiring a special lock that we hold till
+ * after commit. This ensures that queue entries appear in commit
+ * order, and in particular that there are never uncommitted queue
+ * entries ahead of committed ones, so an uncommitted transaction
+ * can't block delivery of deliverable notifications.
+ *
+ * We use a heavyweight lock so that it'll automatically be released
+ * after either commit or abort. This also allows deadlocks to be
+ * detected, though really a deadlock shouldn't be possible here.
+ *
+ * The lock is on "database 0", which is pretty ugly but it doesn't
+ * seem worth inventing a special locktag category just for this.
+ * (Historical note: before PG 9.0, a similar lock on "database 0" was
+ * used by the flatfiles mechanism.)
+ */
+ LockSharedObject(DatabaseRelationId, InvalidOid, 0,
+ AccessExclusiveLock);
+
+ /* Now push the notifications into the queue */
+ nextNotify = list_head(pendingNotifies->events);
+ while (nextNotify != NULL)
+ {
+ /*
+ * Add the pending notifications to the queue. We acquire and
+ * release NotifyQueueLock once per page, which might be overkill
+ * but it does allow readers to get in while we're doing this.
+ *
+ * A full queue is very uncommon and should really not happen,
+ * given that we have so much space available in the SLRU pages.
+ * Nevertheless we need to deal with this possibility. Note that
+ * when we get here we are in the process of committing our
+ * transaction, but we have not yet committed to clog, so at this
+ * point in time we can still roll the transaction back.
+ */
+ LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
+ asyncQueueFillWarning();
+ if (asyncQueueIsFull())
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("too many notifications in the NOTIFY queue")));
+ nextNotify = asyncQueueAddEntries(nextNotify);
+ LWLockRelease(NotifyQueueLock);
+ }
+
+ /* Note that we don't clear pendingNotifies; AtCommit_Notify will. */
+ }
+}
+
+/*
+ * AtCommit_Notify
+ *
+ * This is called at transaction commit, after committing to clog.
+ *
+ * Update listenChannels and clear transaction-local state.
+ *
+ * If we issued any notifications in the transaction, send signals to
+ * listening backends (possibly including ourselves) to process them.
+ * Also, if we filled enough queue pages with new notifies, try to
+ * advance the queue tail pointer.
+ */
+void
+AtCommit_Notify(void)
+{
+ ListCell *p;
+
+ /*
+ * Allow transactions that have not executed LISTEN/UNLISTEN/NOTIFY to
+ * return as soon as possible
+ */
+ if (!pendingActions && !pendingNotifies)
+ return;
+
+ if (Trace_notify)
+ elog(DEBUG1, "AtCommit_Notify");
+
+ /* Perform any pending listen/unlisten actions */
+ if (pendingActions != NULL)
+ {
+ foreach(p, pendingActions->actions)
+ {
+ ListenAction *actrec = (ListenAction *) lfirst(p);
+
+ switch (actrec->action)
+ {
+ case LISTEN_LISTEN:
+ Exec_ListenCommit(actrec->channel);
+ break;
+ case LISTEN_UNLISTEN:
+ Exec_UnlistenCommit(actrec->channel);
+ break;
+ case LISTEN_UNLISTEN_ALL:
+ Exec_UnlistenAllCommit();
+ break;
+ }
+ }
+ }
+
+ /* If no longer listening to anything, get out of listener array */
+ if (amRegisteredListener && listenChannels == NIL)
+ asyncQueueUnregister();
+
+ /*
+ * Send signals to listening backends. We need do this only if there are
+ * pending notifies, which were previously added to the shared queue by
+ * PreCommit_Notify().
+ */
+ if (pendingNotifies != NULL)
+ SignalBackends();
+
+ /*
+ * If it's time to try to advance the global tail pointer, do that.
+ *
+ * (It might seem odd to do this in the sender, when more than likely the
+ * listeners won't yet have read the messages we just sent. However,
+ * there's less contention if only the sender does it, and there is little
+ * need for urgency in advancing the global tail. So this typically will
+ * be clearing out messages that were sent some time ago.)
+ */
+ if (tryAdvanceTail)
+ {
+ tryAdvanceTail = false;
+ asyncQueueAdvanceTail();
+ }
+
+ /* And clean up */
+ ClearPendingActionsAndNotifies();
+}
+
+/*
+ * Exec_ListenPreCommit --- subroutine for PreCommit_Notify
+ *
+ * This function must make sure we are ready to catch any incoming messages.
+ */
+static void
+Exec_ListenPreCommit(void)
+{
+ QueuePosition head;
+ QueuePosition max;
+ BackendId prevListener;
+
+ /*
+ * Nothing to do if we are already listening to something, nor if we
+ * already ran this routine in this transaction.
+ */
+ if (amRegisteredListener)
+ return;
+
+ if (Trace_notify)
+ elog(DEBUG1, "Exec_ListenPreCommit(%d)", MyProcPid);
+
+ /*
+ * Before registering, make sure we will unlisten before dying. (Note:
+ * this action does not get undone if we abort later.)
+ */
+ if (!unlistenExitRegistered)
+ {
+ before_shmem_exit(Async_UnlistenOnExit, 0);
+ unlistenExitRegistered = true;
+ }
+
+ /*
+ * This is our first LISTEN, so establish our pointer.
+ *
+ * We set our pointer to the global tail pointer and then move it forward
+ * over already-committed notifications. This ensures we cannot miss any
+ * not-yet-committed notifications. We might get a few more but that
+ * doesn't hurt.
+ *
+ * In some scenarios there might be a lot of committed notifications that
+ * have not yet been pruned away (because some backend is being lazy about
+ * reading them). To reduce our startup time, we can look at other
+ * backends and adopt the maximum "pos" pointer of any backend that's in
+ * our database; any notifications it's already advanced over are surely
+ * committed and need not be re-examined by us. (We must consider only
+ * backends connected to our DB, because others will not have bothered to
+ * check committed-ness of notifications in our DB.)
+ *
+ * We need exclusive lock here so we can look at other backends' entries
+ * and manipulate the list links.
+ */
+ LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
+ head = QUEUE_HEAD;
+ max = QUEUE_TAIL;
+ prevListener = InvalidBackendId;
+ for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i))
+ {
+ if (QUEUE_BACKEND_DBOID(i) == MyDatabaseId)
+ max = QUEUE_POS_MAX(max, QUEUE_BACKEND_POS(i));
+ /* Also find last listening backend before this one */
+ if (i < MyBackendId)
+ prevListener = i;
+ }
+ QUEUE_BACKEND_POS(MyBackendId) = max;
+ QUEUE_BACKEND_PID(MyBackendId) = MyProcPid;
+ QUEUE_BACKEND_DBOID(MyBackendId) = MyDatabaseId;
+ /* Insert backend into list of listeners at correct position */
+ if (prevListener > 0)
+ {
+ QUEUE_NEXT_LISTENER(MyBackendId) = QUEUE_NEXT_LISTENER(prevListener);
+ QUEUE_NEXT_LISTENER(prevListener) = MyBackendId;
+ }
+ else
+ {
+ QUEUE_NEXT_LISTENER(MyBackendId) = QUEUE_FIRST_LISTENER;
+ QUEUE_FIRST_LISTENER = MyBackendId;
+ }
+ LWLockRelease(NotifyQueueLock);
+
+ /* Now we are listed in the global array, so remember we're listening */
+ amRegisteredListener = true;
+
+ /*
+ * Try to move our pointer forward as far as possible. This will skip
+ * over already-committed notifications, which we want to do because they
+ * might be quite stale. Note that we are not yet listening on anything,
+ * so we won't deliver such notifications to our frontend. Also, although
+ * our transaction might have executed NOTIFY, those message(s) aren't
+ * queued yet so we won't skip them here.
+ */
+ if (!QUEUE_POS_EQUAL(max, head))
+ asyncQueueReadAllNotifications();
+}
+
+/*
+ * Exec_ListenCommit --- subroutine for AtCommit_Notify
+ *
+ * Add the channel to the list of channels we are listening on.
+ */
+static void
+Exec_ListenCommit(const char *channel)
+{
+ MemoryContext oldcontext;
+
+ /* Do nothing if we are already listening on this channel */
+ if (IsListeningOn(channel))
+ return;
+
+ /*
+ * Add the new channel name to listenChannels.
+ *
+ * XXX It is theoretically possible to get an out-of-memory failure here,
+ * which would be bad because we already committed. For the moment it
+ * doesn't seem worth trying to guard against that, but maybe improve this
+ * later.
+ */
+ oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+ listenChannels = lappend(listenChannels, pstrdup(channel));
+ MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Exec_UnlistenCommit --- subroutine for AtCommit_Notify
+ *
+ * Remove the specified channel name from listenChannels.
+ */
+static void
+Exec_UnlistenCommit(const char *channel)
+{
+ ListCell *q;
+
+ if (Trace_notify)
+ elog(DEBUG1, "Exec_UnlistenCommit(%s,%d)", channel, MyProcPid);
+
+ foreach(q, listenChannels)
+ {
+ char *lchan = (char *) lfirst(q);
+
+ if (strcmp(lchan, channel) == 0)
+ {
+ listenChannels = foreach_delete_current(listenChannels, q);
+ pfree(lchan);
+ break;
+ }
+ }
+
+ /*
+ * We do not complain about unlistening something not being listened;
+ * should we?
+ */
+}
+
+/*
+ * Exec_UnlistenAllCommit --- subroutine for AtCommit_Notify
+ *
+ * Unlisten on all channels for this backend.
+ */
+static void
+Exec_UnlistenAllCommit(void)
+{
+ if (Trace_notify)
+ elog(DEBUG1, "Exec_UnlistenAllCommit(%d)", MyProcPid);
+
+ list_free_deep(listenChannels);
+ listenChannels = NIL;
+}
+
+/*
+ * Test whether we are actively listening on the given channel name.
+ *
+ * Note: this function is executed for every notification found in the queue.
+ * Perhaps it is worth further optimization, eg convert the list to a sorted
+ * array so we can binary-search it. In practice the list is likely to be
+ * fairly short, though.
+ */
+static bool
+IsListeningOn(const char *channel)
+{
+ ListCell *p;
+
+ foreach(p, listenChannels)
+ {
+ char *lchan = (char *) lfirst(p);
+
+ if (strcmp(lchan, channel) == 0)
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Remove our entry from the listeners array when we are no longer listening
+ * on any channel. NB: must not fail if we're already not listening.
+ */
+static void
+asyncQueueUnregister(void)
+{
+ Assert(listenChannels == NIL); /* else caller error */
+
+ if (!amRegisteredListener) /* nothing to do */
+ return;
+
+ /*
+ * Need exclusive lock here to manipulate list links.
+ */
+ LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
+ /* Mark our entry as invalid */
+ QUEUE_BACKEND_PID(MyBackendId) = InvalidPid;
+ QUEUE_BACKEND_DBOID(MyBackendId) = InvalidOid;
+ /* and remove it from the list */
+ if (QUEUE_FIRST_LISTENER == MyBackendId)
+ QUEUE_FIRST_LISTENER = QUEUE_NEXT_LISTENER(MyBackendId);
+ else
+ {
+ for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i))
+ {
+ if (QUEUE_NEXT_LISTENER(i) == MyBackendId)
+ {
+ QUEUE_NEXT_LISTENER(i) = QUEUE_NEXT_LISTENER(MyBackendId);
+ break;
+ }
+ }
+ }
+ QUEUE_NEXT_LISTENER(MyBackendId) = InvalidBackendId;
+ LWLockRelease(NotifyQueueLock);
+
+ /* mark ourselves as no longer listed in the global array */
+ amRegisteredListener = false;
+}
+
+/*
+ * Test whether there is room to insert more notification messages.
+ *
+ * Caller must hold at least shared NotifyQueueLock.
+ */
+static bool
+asyncQueueIsFull(void)
+{
+ int nexthead;
+ int boundary;
+
+ /*
+ * The queue is full if creating a new head page would create a page that
+ * logically precedes the current global tail pointer, ie, the head
+ * pointer would wrap around compared to the tail. We cannot create such
+ * a head page for fear of confusing slru.c. For safety we round the tail
+ * pointer back to a segment boundary (truncation logic in
+ * asyncQueueAdvanceTail does not do this, so doing it here is optional).
+ *
+ * Note that this test is *not* dependent on how much space there is on
+ * the current head page. This is necessary because asyncQueueAddEntries
+ * might try to create the next head page in any case.
+ */
+ nexthead = QUEUE_POS_PAGE(QUEUE_HEAD) + 1;
+ if (nexthead > QUEUE_MAX_PAGE)
+ nexthead = 0; /* wrap around */
+ boundary = QUEUE_STOP_PAGE;
+ boundary -= boundary % SLRU_PAGES_PER_SEGMENT;
+ return asyncQueuePagePrecedes(nexthead, boundary);
+}
+
+/*
+ * Advance the QueuePosition to the next entry, assuming that the current
+ * entry is of length entryLength. If we jump to a new page the function
+ * returns true, else false.
+ */
+static bool
+asyncQueueAdvance(volatile QueuePosition *position, int entryLength)
+{
+ int pageno = QUEUE_POS_PAGE(*position);
+ int offset = QUEUE_POS_OFFSET(*position);
+ bool pageJump = false;
+
+ /*
+ * Move to the next writing position: First jump over what we have just
+ * written or read.
+ */
+ offset += entryLength;
+ Assert(offset <= QUEUE_PAGESIZE);
+
+ /*
+ * In a second step check if another entry can possibly be written to the
+ * page. If so, stay here, we have reached the next position. If not, then
+ * we need to move on to the next page.
+ */
+ if (offset + QUEUEALIGN(AsyncQueueEntryEmptySize) > QUEUE_PAGESIZE)
+ {
+ pageno++;
+ if (pageno > QUEUE_MAX_PAGE)
+ pageno = 0; /* wrap around */
+ offset = 0;
+ pageJump = true;
+ }
+
+ SET_QUEUE_POS(*position, pageno, offset);
+ return pageJump;
+}
+
+/*
+ * Fill the AsyncQueueEntry at *qe with an outbound notification message.
+ */
+static void
+asyncQueueNotificationToEntry(Notification *n, AsyncQueueEntry *qe)
+{
+ size_t channellen = n->channel_len;
+ size_t payloadlen = n->payload_len;
+ int entryLength;
+
+ Assert(channellen < NAMEDATALEN);
+ Assert(payloadlen < NOTIFY_PAYLOAD_MAX_LENGTH);
+
+ /* The terminators are already included in AsyncQueueEntryEmptySize */
+ entryLength = AsyncQueueEntryEmptySize + payloadlen + channellen;
+ entryLength = QUEUEALIGN(entryLength);
+ qe->length = entryLength;
+ qe->dboid = MyDatabaseId;
+ qe->xid = GetCurrentTransactionId();
+ qe->srcPid = MyProcPid;
+ memcpy(qe->data, n->data, channellen + payloadlen + 2);
+}
+
+/*
+ * Add pending notifications to the queue.
+ *
+ * We go page by page here, i.e. we stop once we have to go to a new page but
+ * we will be called again and then fill that next page. If an entry does not
+ * fit into the current page, we write a dummy entry with an InvalidOid as the
+ * database OID in order to fill the page. So every page is always used up to
+ * the last byte which simplifies reading the page later.
+ *
+ * We are passed the list cell (in pendingNotifies->events) containing the next
+ * notification to write and return the first still-unwritten cell back.
+ * Eventually we will return NULL indicating all is done.
+ *
+ * We are holding NotifyQueueLock already from the caller and grab
+ * NotifySLRULock locally in this function.
+ */
+static ListCell *
+asyncQueueAddEntries(ListCell *nextNotify)
+{
+ AsyncQueueEntry qe;
+ QueuePosition queue_head;
+ int pageno;
+ int offset;
+ int slotno;
+
+ /* We hold both NotifyQueueLock and NotifySLRULock during this operation */
+ LWLockAcquire(NotifySLRULock, LW_EXCLUSIVE);
+
+ /*
+ * We work with a local copy of QUEUE_HEAD, which we write back to shared
+ * memory upon exiting. The reason for this is that if we have to advance
+ * to a new page, SimpleLruZeroPage might fail (out of disk space, for
+ * instance), and we must not advance QUEUE_HEAD if it does. (Otherwise,
+ * subsequent insertions would try to put entries into a page that slru.c
+ * thinks doesn't exist yet.) So, use a local position variable. Note
+ * that if we do fail, any already-inserted queue entries are forgotten;
+ * this is okay, since they'd be useless anyway after our transaction
+ * rolls back.
+ */
+ queue_head = QUEUE_HEAD;
+
+ /*
+ * If this is the first write since the postmaster started, we need to
+ * initialize the first page of the async SLRU. Otherwise, the current
+ * page should be initialized already, so just fetch it.
+ *
+ * (We could also take the first path when the SLRU position has just
+ * wrapped around, but re-zeroing the page is harmless in that case.)
+ */
+ pageno = QUEUE_POS_PAGE(queue_head);
+ if (QUEUE_POS_IS_ZERO(queue_head))
+ slotno = SimpleLruZeroPage(NotifyCtl, pageno);
+ else
+ slotno = SimpleLruReadPage(NotifyCtl, pageno, true,
+ InvalidTransactionId);
+
+ /* Note we mark the page dirty before writing in it */
+ NotifyCtl->shared->page_dirty[slotno] = true;
+
+ while (nextNotify != NULL)
+ {
+ Notification *n = (Notification *) lfirst(nextNotify);
+
+ /* Construct a valid queue entry in local variable qe */
+ asyncQueueNotificationToEntry(n, &qe);
+
+ offset = QUEUE_POS_OFFSET(queue_head);
+
+ /* Check whether the entry really fits on the current page */
+ if (offset + qe.length <= QUEUE_PAGESIZE)
+ {
+ /* OK, so advance nextNotify past this item */
+ nextNotify = lnext(pendingNotifies->events, nextNotify);
+ }
+ else
+ {
+ /*
+ * Write a dummy entry to fill up the page. Actually readers will
+ * only check dboid and since it won't match any reader's database
+ * OID, they will ignore this entry and move on.
+ */
+ qe.length = QUEUE_PAGESIZE - offset;
+ qe.dboid = InvalidOid;
+ qe.data[0] = '\0'; /* empty channel */
+ qe.data[1] = '\0'; /* empty payload */
+ }
+
+ /* Now copy qe into the shared buffer page */
+ memcpy(NotifyCtl->shared->page_buffer[slotno] + offset,
+ &qe,
+ qe.length);
+
+ /* Advance queue_head appropriately, and detect if page is full */
+ if (asyncQueueAdvance(&(queue_head), qe.length))
+ {
+ /*
+ * Page is full, so we're done here, but first fill the next page
+ * with zeroes. The reason to do this is to ensure that slru.c's
+ * idea of the head page is always the same as ours, which avoids
+ * boundary problems in SimpleLruTruncate. The test in
+ * asyncQueueIsFull() ensured that there is room to create this
+ * page without overrunning the queue.
+ */
+ slotno = SimpleLruZeroPage(NotifyCtl, QUEUE_POS_PAGE(queue_head));
+
+ /*
+ * If the new page address is a multiple of QUEUE_CLEANUP_DELAY,
+ * set flag to remember that we should try to advance the tail
+ * pointer (we don't want to actually do that right here).
+ */
+ if (QUEUE_POS_PAGE(queue_head) % QUEUE_CLEANUP_DELAY == 0)
+ tryAdvanceTail = true;
+
+ /* And exit the loop */
+ break;
+ }
+ }
+
+ /* Success, so update the global QUEUE_HEAD */
+ QUEUE_HEAD = queue_head;
+
+ LWLockRelease(NotifySLRULock);
+
+ return nextNotify;
+}
+
+/*
+ * SQL function to return the fraction of the notification queue currently
+ * occupied.
+ */
+Datum
+pg_notification_queue_usage(PG_FUNCTION_ARGS)
+{
+ double usage;
+
+ /* Advance the queue tail so we don't report a too-large result */
+ asyncQueueAdvanceTail();
+
+ LWLockAcquire(NotifyQueueLock, LW_SHARED);
+ usage = asyncQueueUsage();
+ LWLockRelease(NotifyQueueLock);
+
+ PG_RETURN_FLOAT8(usage);
+}
+
+/*
+ * Return the fraction of the queue that is currently occupied.
+ *
+ * The caller must hold NotifyQueueLock in (at least) shared mode.
+ *
+ * Note: we measure the distance to the logical tail page, not the physical
+ * tail page. In some sense that's wrong, but the relative position of the
+ * physical tail is affected by details such as SLRU segment boundaries,
+ * so that a result based on that is unpleasantly unstable.
+ */
+static double
+asyncQueueUsage(void)
+{
+ int headPage = QUEUE_POS_PAGE(QUEUE_HEAD);
+ int tailPage = QUEUE_POS_PAGE(QUEUE_TAIL);
+ int occupied;
+
+ occupied = headPage - tailPage;
+
+ if (occupied == 0)
+ return (double) 0; /* fast exit for common case */
+
+ if (occupied < 0)
+ {
+ /* head has wrapped around, tail not yet */
+ occupied += QUEUE_MAX_PAGE + 1;
+ }
+
+ return (double) occupied / (double) ((QUEUE_MAX_PAGE + 1) / 2);
+}
+
+/*
+ * Check whether the queue is at least half full, and emit a warning if so.
+ *
+ * This is unlikely given the size of the queue, but possible.
+ * The warnings show up at most once every QUEUE_FULL_WARN_INTERVAL.
+ *
+ * Caller must hold exclusive NotifyQueueLock.
+ */
+static void
+asyncQueueFillWarning(void)
+{
+ double fillDegree;
+ TimestampTz t;
+
+ fillDegree = asyncQueueUsage();
+ if (fillDegree < 0.5)
+ return;
+
+ t = GetCurrentTimestamp();
+
+ if (TimestampDifferenceExceeds(asyncQueueControl->lastQueueFillWarn,
+ t, QUEUE_FULL_WARN_INTERVAL))
+ {
+ QueuePosition min = QUEUE_HEAD;
+ int32 minPid = InvalidPid;
+
+ for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i))
+ {
+ Assert(QUEUE_BACKEND_PID(i) != InvalidPid);
+ min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i));
+ if (QUEUE_POS_EQUAL(min, QUEUE_BACKEND_POS(i)))
+ minPid = QUEUE_BACKEND_PID(i);
+ }
+
+ ereport(WARNING,
+ (errmsg("NOTIFY queue is %.0f%% full", fillDegree * 100),
+ (minPid != InvalidPid ?
+ errdetail("The server process with PID %d is among those with the oldest transactions.", minPid)
+ : 0),
+ (minPid != InvalidPid ?
+ errhint("The NOTIFY queue cannot be emptied until that process ends its current transaction.")
+ : 0)));
+
+ asyncQueueControl->lastQueueFillWarn = t;
+ }
+}
+
+/*
+ * Send signals to listening backends.
+ *
+ * Normally we signal only backends in our own database, since only those
+ * backends could be interested in notifies we send. However, if there's
+ * notify traffic in our database but no traffic in another database that
+ * does have listener(s), those listeners will fall further and further
+ * behind. Waken them anyway if they're far enough behind, so that they'll
+ * advance their queue position pointers, allowing the global tail to advance.
+ *
+ * Since we know the BackendId and the Pid the signaling is quite cheap.
+ *
+ * This is called during CommitTransaction(), so it's important for it
+ * to have very low probability of failure.
+ */
+static void
+SignalBackends(void)
+{
+ int32 *pids;
+ BackendId *ids;
+ int count;
+
+ /*
+ * Identify backends that we need to signal. We don't want to send
+ * signals while holding the NotifyQueueLock, so this loop just builds a
+ * list of target PIDs.
+ *
+ * XXX in principle these pallocs could fail, which would be bad. Maybe
+ * preallocate the arrays? They're not that large, though.
+ */
+ pids = (int32 *) palloc(MaxBackends * sizeof(int32));
+ ids = (BackendId *) palloc(MaxBackends * sizeof(BackendId));
+ count = 0;
+
+ LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
+ for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i))
+ {
+ int32 pid = QUEUE_BACKEND_PID(i);
+ QueuePosition pos;
+
+ Assert(pid != InvalidPid);
+ pos = QUEUE_BACKEND_POS(i);
+ if (QUEUE_BACKEND_DBOID(i) == MyDatabaseId)
+ {
+ /*
+ * Always signal listeners in our own database, unless they're
+ * already caught up (unlikely, but possible).
+ */
+ if (QUEUE_POS_EQUAL(pos, QUEUE_HEAD))
+ continue;
+ }
+ else
+ {
+ /*
+ * Listeners in other databases should be signaled only if they
+ * are far behind.
+ */
+ if (asyncQueuePageDiff(QUEUE_POS_PAGE(QUEUE_HEAD),
+ QUEUE_POS_PAGE(pos)) < QUEUE_CLEANUP_DELAY)
+ continue;
+ }
+ /* OK, need to signal this one */
+ pids[count] = pid;
+ ids[count] = i;
+ count++;
+ }
+ LWLockRelease(NotifyQueueLock);
+
+ /* Now send signals */
+ for (int i = 0; i < count; i++)
+ {
+ int32 pid = pids[i];
+
+ /*
+ * If we are signaling our own process, no need to involve the kernel;
+ * just set the flag directly.
+ */
+ if (pid == MyProcPid)
+ {
+ notifyInterruptPending = true;
+ continue;
+ }
+
+ /*
+ * Note: assuming things aren't broken, a signal failure here could
+ * only occur if the target backend exited since we released
+ * NotifyQueueLock; which is unlikely but certainly possible. So we
+ * just log a low-level debug message if it happens.
+ */
+ if (SendProcSignal(pid, PROCSIG_NOTIFY_INTERRUPT, ids[i]) < 0)
+ elog(DEBUG3, "could not signal backend with PID %d: %m", pid);
+ }
+
+ pfree(pids);
+ pfree(ids);
+}
+
+/*
+ * AtAbort_Notify
+ *
+ * This is called at transaction abort.
+ *
+ * Gets rid of pending actions and outbound notifies that we would have
+ * executed if the transaction got committed.
+ */
+void
+AtAbort_Notify(void)
+{
+ /*
+ * If we LISTEN but then roll back the transaction after PreCommit_Notify,
+ * we have registered as a listener but have not made any entry in
+ * listenChannels. In that case, deregister again.
+ */
+ if (amRegisteredListener && listenChannels == NIL)
+ asyncQueueUnregister();
+
+ /* And clean up */
+ ClearPendingActionsAndNotifies();
+}
+
+/*
+ * AtSubCommit_Notify() --- Take care of subtransaction commit.
+ *
+ * Reassign all items in the pending lists to the parent transaction.
+ */
+void
+AtSubCommit_Notify(void)
+{
+ int my_level = GetCurrentTransactionNestLevel();
+
+ /* If there are actions at our nesting level, we must reparent them. */
+ if (pendingActions != NULL &&
+ pendingActions->nestingLevel >= my_level)
+ {
+ if (pendingActions->upper == NULL ||
+ pendingActions->upper->nestingLevel < my_level - 1)
+ {
+ /* nothing to merge; give the whole thing to the parent */
+ --pendingActions->nestingLevel;
+ }
+ else
+ {
+ ActionList *childPendingActions = pendingActions;
+
+ pendingActions = pendingActions->upper;
+
+ /*
+ * Mustn't try to eliminate duplicates here --- see queue_listen()
+ */
+ pendingActions->actions =
+ list_concat(pendingActions->actions,
+ childPendingActions->actions);
+ pfree(childPendingActions);
+ }
+ }
+
+ /* If there are notifies at our nesting level, we must reparent them. */
+ if (pendingNotifies != NULL &&
+ pendingNotifies->nestingLevel >= my_level)
+ {
+ Assert(pendingNotifies->nestingLevel == my_level);
+
+ if (pendingNotifies->upper == NULL ||
+ pendingNotifies->upper->nestingLevel < my_level - 1)
+ {
+ /* nothing to merge; give the whole thing to the parent */
+ --pendingNotifies->nestingLevel;
+ }
+ else
+ {
+ /*
+ * Formerly, we didn't bother to eliminate duplicates here, but
+ * now we must, else we fall foul of "Assert(!found)", either here
+ * or during a later attempt to build the parent-level hashtable.
+ */
+ NotificationList *childPendingNotifies = pendingNotifies;
+ ListCell *l;
+
+ pendingNotifies = pendingNotifies->upper;
+ /* Insert all the subxact's events into parent, except for dups */
+ foreach(l, childPendingNotifies->events)
+ {
+ Notification *childn = (Notification *) lfirst(l);
+
+ if (!AsyncExistsPendingNotify(childn))
+ AddEventToPendingNotifies(childn);
+ }
+ pfree(childPendingNotifies);
+ }
+ }
+}
+
+/*
+ * AtSubAbort_Notify() --- Take care of subtransaction abort.
+ */
+void
+AtSubAbort_Notify(void)
+{
+ int my_level = GetCurrentTransactionNestLevel();
+
+ /*
+ * All we have to do is pop the stack --- the actions/notifies made in
+ * this subxact are no longer interesting, and the space will be freed
+ * when CurTransactionContext is recycled. We still have to free the
+ * ActionList and NotificationList objects themselves, though, because
+ * those are allocated in TopTransactionContext.
+ *
+ * Note that there might be no entries at all, or no entries for the
+ * current subtransaction level, either because none were ever created, or
+ * because we reentered this routine due to trouble during subxact abort.
+ */
+ while (pendingActions != NULL &&
+ pendingActions->nestingLevel >= my_level)
+ {
+ ActionList *childPendingActions = pendingActions;
+
+ pendingActions = pendingActions->upper;
+ pfree(childPendingActions);
+ }
+
+ while (pendingNotifies != NULL &&
+ pendingNotifies->nestingLevel >= my_level)
+ {
+ NotificationList *childPendingNotifies = pendingNotifies;
+
+ pendingNotifies = pendingNotifies->upper;
+ pfree(childPendingNotifies);
+ }
+}
+
+/*
+ * HandleNotifyInterrupt
+ *
+ * Signal handler portion of interrupt handling. Let the backend know
+ * that there's a pending notify interrupt. If we're currently reading
+ * from the client, this will interrupt the read and
+ * ProcessClientReadInterrupt() will call ProcessNotifyInterrupt().
+ */
+void
+HandleNotifyInterrupt(void)
+{
+ /*
+ * Note: this is called by a SIGNAL HANDLER. You must be very wary what
+ * you do here.
+ */
+
+ /* signal that work needs to be done */
+ notifyInterruptPending = true;
+
+ /* make sure the event is processed in due course */
+ SetLatch(MyLatch);
+}
+
+/*
+ * ProcessNotifyInterrupt
+ *
+ * This is called if we see notifyInterruptPending set, just before
+ * transmitting ReadyForQuery at the end of a frontend command, and
+ * also if a notify signal occurs while reading from the frontend.
+ * HandleNotifyInterrupt() will cause the read to be interrupted
+ * via the process's latch, and this routine will get called.
+ * If we are truly idle (ie, *not* inside a transaction block),
+ * process the incoming notifies.
+ *
+ * If "flush" is true, force any frontend messages out immediately.
+ * This can be false when being called at the end of a frontend command,
+ * since we'll flush after sending ReadyForQuery.
+ */
+void
+ProcessNotifyInterrupt(bool flush)
+{
+ if (IsTransactionOrTransactionBlock())
+ return; /* not really idle */
+
+ /* Loop in case another signal arrives while sending messages */
+ while (notifyInterruptPending)
+ ProcessIncomingNotify(flush);
+}
+
+
+/*
+ * Read all pending notifications from the queue, and deliver appropriate
+ * ones to my frontend. Stop when we reach queue head or an uncommitted
+ * notification.
+ */
+static void
+asyncQueueReadAllNotifications(void)
+{
+ volatile QueuePosition pos;
+ QueuePosition head;
+ Snapshot snapshot;
+
+ /* page_buffer must be adequately aligned, so use a union */
+ union
+ {
+ char buf[QUEUE_PAGESIZE];
+ AsyncQueueEntry align;
+ } page_buffer;
+
+ /* Fetch current state */
+ LWLockAcquire(NotifyQueueLock, LW_SHARED);
+ /* Assert checks that we have a valid state entry */
+ Assert(MyProcPid == QUEUE_BACKEND_PID(MyBackendId));
+ pos = QUEUE_BACKEND_POS(MyBackendId);
+ head = QUEUE_HEAD;
+ LWLockRelease(NotifyQueueLock);
+
+ if (QUEUE_POS_EQUAL(pos, head))
+ {
+ /* Nothing to do, we have read all notifications already. */
+ return;
+ }
+
+ /*----------
+ * Get snapshot we'll use to decide which xacts are still in progress.
+ * This is trickier than it might seem, because of race conditions.
+ * Consider the following example:
+ *
+ * Backend 1: Backend 2:
+ *
+ * transaction starts
+ * UPDATE foo SET ...;
+ * NOTIFY foo;
+ * commit starts
+ * queue the notify message
+ * transaction starts
+ * LISTEN foo; -- first LISTEN in session
+ * SELECT * FROM foo WHERE ...;
+ * commit to clog
+ * commit starts
+ * add backend 2 to array of listeners
+ * advance to queue head (this code)
+ * commit to clog
+ *
+ * Transaction 2's SELECT has not seen the UPDATE's effects, since that
+ * wasn't committed yet. Ideally we'd ensure that client 2 would
+ * eventually get transaction 1's notify message, but there's no way
+ * to do that; until we're in the listener array, there's no guarantee
+ * that the notify message doesn't get removed from the queue.
+ *
+ * Therefore the coding technique transaction 2 is using is unsafe:
+ * applications must commit a LISTEN before inspecting database state,
+ * if they want to ensure they will see notifications about subsequent
+ * changes to that state.
+ *
+ * What we do guarantee is that we'll see all notifications from
+ * transactions committing after the snapshot we take here.
+ * Exec_ListenPreCommit has already added us to the listener array,
+ * so no not-yet-committed messages can be removed from the queue
+ * before we see them.
+ *----------
+ */
+ snapshot = RegisterSnapshot(GetLatestSnapshot());
+
+ /*
+ * It is possible that we fail while trying to send a message to our
+ * frontend (for example, because of encoding conversion failure). If
+ * that happens it is critical that we not try to send the same message
+ * over and over again. Therefore, we place a PG_TRY block here that will
+ * forcibly advance our queue position before we lose control to an error.
+ * (We could alternatively retake NotifyQueueLock and move the position
+ * before handling each individual message, but that seems like too much
+ * lock traffic.)
+ */
+ PG_TRY();
+ {
+ bool reachedStop;
+
+ do
+ {
+ int curpage = QUEUE_POS_PAGE(pos);
+ int curoffset = QUEUE_POS_OFFSET(pos);
+ int slotno;
+ int copysize;
+
+ /*
+ * We copy the data from SLRU into a local buffer, so as to avoid
+ * holding the NotifySLRULock while we are examining the entries
+ * and possibly transmitting them to our frontend. Copy only the
+ * part of the page we will actually inspect.
+ */
+ slotno = SimpleLruReadPage_ReadOnly(NotifyCtl, curpage,
+ InvalidTransactionId);
+ if (curpage == QUEUE_POS_PAGE(head))
+ {
+ /* we only want to read as far as head */
+ copysize = QUEUE_POS_OFFSET(head) - curoffset;
+ if (copysize < 0)
+ copysize = 0; /* just for safety */
+ }
+ else
+ {
+ /* fetch all the rest of the page */
+ copysize = QUEUE_PAGESIZE - curoffset;
+ }
+ memcpy(page_buffer.buf + curoffset,
+ NotifyCtl->shared->page_buffer[slotno] + curoffset,
+ copysize);
+ /* Release lock that we got from SimpleLruReadPage_ReadOnly() */
+ LWLockRelease(NotifySLRULock);
+
+ /*
+ * Process messages up to the stop position, end of page, or an
+ * uncommitted message.
+ *
+ * Our stop position is what we found to be the head's position
+ * when we entered this function. It might have changed already.
+ * But if it has, we will receive (or have already received and
+ * queued) another signal and come here again.
+ *
+ * We are not holding NotifyQueueLock here! The queue can only
+ * extend beyond the head pointer (see above) and we leave our
+ * backend's pointer where it is so nobody will truncate or
+ * rewrite pages under us. Especially we don't want to hold a lock
+ * while sending the notifications to the frontend.
+ */
+ reachedStop = asyncQueueProcessPageEntries(&pos, head,
+ page_buffer.buf,
+ snapshot);
+ } while (!reachedStop);
+ }
+ PG_FINALLY();
+ {
+ /* Update shared state */
+ LWLockAcquire(NotifyQueueLock, LW_SHARED);
+ QUEUE_BACKEND_POS(MyBackendId) = pos;
+ LWLockRelease(NotifyQueueLock);
+ }
+ PG_END_TRY();
+
+ /* Done with snapshot */
+ UnregisterSnapshot(snapshot);
+}
+
+/*
+ * Fetch notifications from the shared queue, beginning at position current,
+ * and deliver relevant ones to my frontend.
+ *
+ * The current page must have been fetched into page_buffer from shared
+ * memory. (We could access the page right in shared memory, but that
+ * would imply holding the NotifySLRULock throughout this routine.)
+ *
+ * We stop if we reach the "stop" position, or reach a notification from an
+ * uncommitted transaction, or reach the end of the page.
+ *
+ * The function returns true once we have reached the stop position or an
+ * uncommitted notification, and false if we have finished with the page.
+ * In other words: once it returns true there is no need to look further.
+ * The QueuePosition *current is advanced past all processed messages.
+ */
+static bool
+asyncQueueProcessPageEntries(volatile QueuePosition *current,
+ QueuePosition stop,
+ char *page_buffer,
+ Snapshot snapshot)
+{
+ bool reachedStop = false;
+ bool reachedEndOfPage;
+ AsyncQueueEntry *qe;
+
+ do
+ {
+ QueuePosition thisentry = *current;
+
+ if (QUEUE_POS_EQUAL(thisentry, stop))
+ break;
+
+ qe = (AsyncQueueEntry *) (page_buffer + QUEUE_POS_OFFSET(thisentry));
+
+ /*
+ * Advance *current over this message, possibly to the next page. As
+ * noted in the comments for asyncQueueReadAllNotifications, we must
+ * do this before possibly failing while processing the message.
+ */
+ reachedEndOfPage = asyncQueueAdvance(current, qe->length);
+
+ /* Ignore messages destined for other databases */
+ if (qe->dboid == MyDatabaseId)
+ {
+ if (XidInMVCCSnapshot(qe->xid, snapshot))
+ {
+ /*
+ * The source transaction is still in progress, so we can't
+ * process this message yet. Break out of the loop, but first
+ * back up *current so we will reprocess the message next
+ * time. (Note: it is unlikely but not impossible for
+ * TransactionIdDidCommit to fail, so we can't really avoid
+ * this advance-then-back-up behavior when dealing with an
+ * uncommitted message.)
+ *
+ * Note that we must test XidInMVCCSnapshot before we test
+ * TransactionIdDidCommit, else we might return a message from
+ * a transaction that is not yet visible to snapshots; compare
+ * the comments at the head of heapam_visibility.c.
+ *
+ * Also, while our own xact won't be listed in the snapshot,
+ * we need not check for TransactionIdIsCurrentTransactionId
+ * because our transaction cannot (yet) have queued any
+ * messages.
+ */
+ *current = thisentry;
+ reachedStop = true;
+ break;
+ }
+ else if (TransactionIdDidCommit(qe->xid))
+ {
+ /* qe->data is the null-terminated channel name */
+ char *channel = qe->data;
+
+ if (IsListeningOn(channel))
+ {
+ /* payload follows channel name */
+ char *payload = qe->data + strlen(channel) + 1;
+
+ NotifyMyFrontEnd(channel, payload, qe->srcPid);
+ }
+ }
+ else
+ {
+ /*
+ * The source transaction aborted or crashed, so we just
+ * ignore its notifications.
+ */
+ }
+ }
+
+ /* Loop back if we're not at end of page */
+ } while (!reachedEndOfPage);
+
+ if (QUEUE_POS_EQUAL(*current, stop))
+ reachedStop = true;
+
+ return reachedStop;
+}
+
+/*
+ * Advance the shared queue tail variable to the minimum of all the
+ * per-backend tail pointers. Truncate pg_notify space if possible.
+ *
+ * This is (usually) called during CommitTransaction(), so it's important for
+ * it to have very low probability of failure.
+ */
+static void
+asyncQueueAdvanceTail(void)
+{
+ QueuePosition min;
+ int oldtailpage;
+ int newtailpage;
+ int boundary;
+
+ /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
+ LWLockAcquire(NotifyQueueTailLock, LW_EXCLUSIVE);
+
+ /*
+ * Compute the new tail. Pre-v13, it's essential that QUEUE_TAIL be exact
+ * (ie, exactly match at least one backend's queue position), so it must
+ * be updated atomically with the actual computation. Since v13, we could
+ * get away with not doing it like that, but it seems prudent to keep it
+ * so.
+ *
+ * Also, because incoming backends will scan forward from QUEUE_TAIL, that
+ * must be advanced before we can truncate any data. Thus, QUEUE_TAIL is
+ * the logical tail, while QUEUE_STOP_PAGE is the physical tail, or oldest
+ * un-truncated page. When QUEUE_STOP_PAGE != QUEUE_POS_PAGE(QUEUE_TAIL),
+ * there are pages we can truncate but haven't yet finished doing so.
+ *
+ * For concurrency's sake, we don't want to hold NotifyQueueLock while
+ * performing SimpleLruTruncate. This is OK because no backend will try
+ * to access the pages we are in the midst of truncating.
+ */
+ LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
+ min = QUEUE_HEAD;
+ for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i))
+ {
+ Assert(QUEUE_BACKEND_PID(i) != InvalidPid);
+ min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i));
+ }
+ QUEUE_TAIL = min;
+ oldtailpage = QUEUE_STOP_PAGE;
+ LWLockRelease(NotifyQueueLock);
+
+ /*
+ * We can truncate something if the global tail advanced across an SLRU
+ * segment boundary.
+ *
+ * XXX it might be better to truncate only once every several segments, to
+ * reduce the number of directory scans.
+ */
+ newtailpage = QUEUE_POS_PAGE(min);
+ boundary = newtailpage - (newtailpage % SLRU_PAGES_PER_SEGMENT);
+ if (asyncQueuePagePrecedes(oldtailpage, boundary))
+ {
+ /*
+ * SimpleLruTruncate() will ask for NotifySLRULock but will also
+ * release the lock again.
+ */
+ SimpleLruTruncate(NotifyCtl, newtailpage);
+
+ /*
+ * Update QUEUE_STOP_PAGE. This changes asyncQueueIsFull()'s verdict
+ * for the segment immediately prior to the old tail, allowing fresh
+ * data into that segment.
+ */
+ LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
+ QUEUE_STOP_PAGE = newtailpage;
+ LWLockRelease(NotifyQueueLock);
+ }
+
+ LWLockRelease(NotifyQueueTailLock);
+}
+
+/*
+ * ProcessIncomingNotify
+ *
+ * Scan the queue for arriving notifications and report them to the front
+ * end. The notifications might be from other sessions, or our own;
+ * there's no need to distinguish here.
+ *
+ * If "flush" is true, force any frontend messages out immediately.
+ *
+ * NOTE: since we are outside any transaction, we must create our own.
+ */
+static void
+ProcessIncomingNotify(bool flush)
+{
+ /* We *must* reset the flag */
+ notifyInterruptPending = false;
+
+ /* Do nothing else if we aren't actively listening */
+ if (listenChannels == NIL)
+ return;
+
+ if (Trace_notify)
+ elog(DEBUG1, "ProcessIncomingNotify");
+
+ set_ps_display("notify interrupt");
+
+ /*
+ * We must run asyncQueueReadAllNotifications inside a transaction, else
+ * bad things happen if it gets an error.
+ */
+ StartTransactionCommand();
+
+ asyncQueueReadAllNotifications();
+
+ CommitTransactionCommand();
+
+ /*
+ * If this isn't an end-of-command case, we must flush the notify messages
+ * to ensure frontend gets them promptly.
+ */
+ if (flush)
+ pq_flush();
+
+ set_ps_display("idle");
+
+ if (Trace_notify)
+ elog(DEBUG1, "ProcessIncomingNotify: done");
+}
+
+/*
+ * Send NOTIFY message to my front end.
+ */
+void
+NotifyMyFrontEnd(const char *channel, const char *payload, int32 srcPid)
+{
+ if (whereToSendOutput == DestRemote)
+ {
+ StringInfoData buf;
+
+ pq_beginmessage(&buf, 'A');
+ pq_sendint32(&buf, srcPid);
+ pq_sendstring(&buf, channel);
+ pq_sendstring(&buf, payload);
+ pq_endmessage(&buf);
+
+ /*
+ * NOTE: we do not do pq_flush() here. Some level of caller will
+ * handle it later, allowing this message to be combined into a packet
+ * with other ones.
+ */
+ }
+ else
+ elog(INFO, "NOTIFY for \"%s\" payload \"%s\"", channel, payload);
+}
+
+/* Does pendingNotifies include a match for the given event? */
+static bool
+AsyncExistsPendingNotify(Notification *n)
+{
+ if (pendingNotifies == NULL)
+ return false;
+
+ if (pendingNotifies->hashtab != NULL)
+ {
+ /* Use the hash table to probe for a match */
+ if (hash_search(pendingNotifies->hashtab,
+ &n,
+ HASH_FIND,
+ NULL))
+ return true;
+ }
+ else
+ {
+ /* Must scan the event list */
+ ListCell *l;
+
+ foreach(l, pendingNotifies->events)
+ {
+ Notification *oldn = (Notification *) lfirst(l);
+
+ if (n->channel_len == oldn->channel_len &&
+ n->payload_len == oldn->payload_len &&
+ memcmp(n->data, oldn->data,
+ n->channel_len + n->payload_len + 2) == 0)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Add a notification event to a pre-existing pendingNotifies list.
+ *
+ * Because pendingNotifies->events is already nonempty, this works
+ * correctly no matter what CurrentMemoryContext is.
+ */
+static void
+AddEventToPendingNotifies(Notification *n)
+{
+ Assert(pendingNotifies->events != NIL);
+
+ /* Create the hash table if it's time to */
+ if (list_length(pendingNotifies->events) >= MIN_HASHABLE_NOTIFIES &&
+ pendingNotifies->hashtab == NULL)
+ {
+ HASHCTL hash_ctl;
+ ListCell *l;
+
+ /* Create the hash table */
+ hash_ctl.keysize = sizeof(Notification *);
+ hash_ctl.entrysize = sizeof(NotificationHash);
+ hash_ctl.hash = notification_hash;
+ hash_ctl.match = notification_match;
+ hash_ctl.hcxt = CurTransactionContext;
+ pendingNotifies->hashtab =
+ hash_create("Pending Notifies",
+ 256L,
+ &hash_ctl,
+ HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+
+ /* Insert all the already-existing events */
+ foreach(l, pendingNotifies->events)
+ {
+ Notification *oldn = (Notification *) lfirst(l);
+ NotificationHash *hentry;
+ bool found;
+
+ hentry = (NotificationHash *) hash_search(pendingNotifies->hashtab,
+ &oldn,
+ HASH_ENTER,
+ &found);
+ Assert(!found);
+ hentry->event = oldn;
+ }
+ }
+
+ /* Add new event to the list, in order */
+ pendingNotifies->events = lappend(pendingNotifies->events, n);
+
+ /* Add event to the hash table if needed */
+ if (pendingNotifies->hashtab != NULL)
+ {
+ NotificationHash *hentry;
+ bool found;
+
+ hentry = (NotificationHash *) hash_search(pendingNotifies->hashtab,
+ &n,
+ HASH_ENTER,
+ &found);
+ Assert(!found);
+ hentry->event = n;
+ }
+}
+
+/*
+ * notification_hash: hash function for notification hash table
+ *
+ * The hash "keys" are pointers to Notification structs.
+ */
+static uint32
+notification_hash(const void *key, Size keysize)
+{
+ const Notification *k = *(const Notification *const *) key;
+
+ Assert(keysize == sizeof(Notification *));
+ /* We don't bother to include the payload's trailing null in the hash */
+ return DatumGetUInt32(hash_any((const unsigned char *) k->data,
+ k->channel_len + k->payload_len + 1));
+}
+
+/*
+ * notification_match: match function to use with notification_hash
+ */
+static int
+notification_match(const void *key1, const void *key2, Size keysize)
+{
+ const Notification *k1 = *(const Notification *const *) key1;
+ const Notification *k2 = *(const Notification *const *) key2;
+
+ Assert(keysize == sizeof(Notification *));
+ if (k1->channel_len == k2->channel_len &&
+ k1->payload_len == k2->payload_len &&
+ memcmp(k1->data, k2->data,
+ k1->channel_len + k1->payload_len + 2) == 0)
+ return 0; /* equal */
+ return 1; /* not equal */
+}
+
+/* Clear the pendingActions and pendingNotifies lists. */
+static void
+ClearPendingActionsAndNotifies(void)
+{
+ /*
+ * Everything's allocated in either TopTransactionContext or the context
+ * for the subtransaction to which it corresponds. So, there's nothing to
+ * do here except reset the pointers; the space will be reclaimed when the
+ * contexts are deleted.
+ */
+ pendingActions = NULL;
+ pendingNotifies = NULL;
+}
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
new file mode 100644
index 0000000..e4b7ffd
--- /dev/null
+++ b/src/backend/commands/cluster.c
@@ -0,0 +1,1736 @@
+/*-------------------------------------------------------------------------
+ *
+ * cluster.c
+ * CLUSTER a table on an index. This is now also used for VACUUM FULL.
+ *
+ * There is hardly anything left of Paul Brown's original implementation...
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994-5, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/cluster.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/amapi.h"
+#include "access/heapam.h"
+#include "access/multixact.h"
+#include "access/relscan.h"
+#include "access/tableam.h"
+#include "access/toast_internals.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/heap.h"
+#include "catalog/index.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/partition.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/toasting.h"
+#include "commands/cluster.h"
+#include "commands/defrem.h"
+#include "commands/progress.h"
+#include "commands/tablecmds.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "optimizer/optimizer.h"
+#include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/predicate.h"
+#include "utils/acl.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/pg_rusage.h"
+#include "utils/relmapper.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/tuplesort.h"
+
+/*
+ * This struct is used to pass around the information on tables to be
+ * clustered. We need this so we can make a list of them when invoked without
+ * a specific table/index pair.
+ */
+typedef struct
+{
+ Oid tableOid;
+ Oid indexOid;
+} RelToCluster;
+
+
+static void cluster_multiple_rels(List *rtcs, ClusterParams *params);
+static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
+static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
+ bool verbose, bool *pSwapToastByContent,
+ TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
+static List *get_tables_to_cluster(MemoryContext cluster_context);
+static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
+ Oid indexOid);
+
+
+/*---------------------------------------------------------------------------
+ * This cluster code allows for clustering multiple tables at once. Because
+ * of this, we cannot just run everything on a single transaction, or we
+ * would be forced to acquire exclusive locks on all the tables being
+ * clustered, simultaneously --- very likely leading to deadlock.
+ *
+ * To solve this we follow a similar strategy to VACUUM code,
+ * clustering each relation in a separate transaction. For this to work,
+ * we need to:
+ * - provide a separate memory context so that we can pass information in
+ * a way that survives across transactions
+ * - start a new transaction every time a new relation is clustered
+ * - check for validity of the information on to-be-clustered relations,
+ * as someone might have deleted a relation behind our back, or
+ * clustered one on a different index
+ * - end the transaction
+ *
+ * The single-relation case does not have any such overhead.
+ *
+ * We also allow a relation to be specified without index. In that case,
+ * the indisclustered bit will be looked up, and an ERROR will be thrown
+ * if there is no index with the bit set.
+ *---------------------------------------------------------------------------
+ */
+void
+cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
+{
+ ListCell *lc;
+ ClusterParams params = {0};
+ bool verbose = false;
+ Relation rel = NULL;
+ Oid indexOid = InvalidOid;
+ MemoryContext cluster_context;
+ List *rtcs;
+
+ /* Parse option list */
+ foreach(lc, stmt->params)
+ {
+ DefElem *opt = (DefElem *) lfirst(lc);
+
+ if (strcmp(opt->defname, "verbose") == 0)
+ verbose = defGetBoolean(opt);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized CLUSTER option \"%s\"",
+ opt->defname),
+ parser_errposition(pstate, opt->location)));
+ }
+
+ params.options = (verbose ? CLUOPT_VERBOSE : 0);
+
+ if (stmt->relation != NULL)
+ {
+ /* This is the single-relation case. */
+ Oid tableOid;
+
+ /*
+ * Find, lock, and check permissions on the table. We obtain
+ * AccessExclusiveLock right away to avoid lock-upgrade hazard in the
+ * single-transaction case.
+ */
+ tableOid = RangeVarGetRelidExtended(stmt->relation,
+ AccessExclusiveLock,
+ 0,
+ RangeVarCallbackOwnsTable, NULL);
+ rel = table_open(tableOid, NoLock);
+
+ /*
+ * Reject clustering a remote temp table ... their local buffer
+ * manager is not going to cope.
+ */
+ if (RELATION_IS_OTHER_TEMP(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot cluster temporary tables of other sessions")));
+
+ if (stmt->indexname == NULL)
+ {
+ ListCell *index;
+
+ /* We need to find the index that has indisclustered set. */
+ foreach(index, RelationGetIndexList(rel))
+ {
+ indexOid = lfirst_oid(index);
+ if (get_index_isclustered(indexOid))
+ break;
+ indexOid = InvalidOid;
+ }
+
+ if (!OidIsValid(indexOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("there is no previously clustered index for table \"%s\"",
+ stmt->relation->relname)));
+ }
+ else
+ {
+ /*
+ * The index is expected to be in the same namespace as the
+ * relation.
+ */
+ indexOid = get_relname_relid(stmt->indexname,
+ rel->rd_rel->relnamespace);
+ if (!OidIsValid(indexOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("index \"%s\" for table \"%s\" does not exist",
+ stmt->indexname, stmt->relation->relname)));
+ }
+
+ if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ {
+ /* close relation, keep lock till commit */
+ table_close(rel, NoLock);
+
+ /* Do the job. */
+ cluster_rel(tableOid, indexOid, &params);
+
+ return;
+ }
+ }
+
+ /*
+ * By here, we know we are in a multi-table situation. In order to avoid
+ * holding locks for too long, we want to process each table in its own
+ * transaction. This forces us to disallow running inside a user
+ * transaction block.
+ */
+ PreventInTransactionBlock(isTopLevel, "CLUSTER");
+
+ /* Also, we need a memory context to hold our list of relations */
+ cluster_context = AllocSetContextCreate(PortalContext,
+ "Cluster",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /*
+ * Either we're processing a partitioned table, or we were not given any
+ * table name at all. In either case, obtain a list of relations to
+ * process.
+ *
+ * In the former case, an index name must have been given, so we don't
+ * need to recheck its "indisclustered" bit, but we have to check that it
+ * is an index that we can cluster on. In the latter case, we set the
+ * option bit to have indisclustered verified.
+ *
+ * Rechecking the relation itself is necessary here in all cases.
+ */
+ params.options |= CLUOPT_RECHECK;
+ if (rel != NULL)
+ {
+ Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+ check_index_is_clusterable(rel, indexOid, AccessShareLock);
+ rtcs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
+
+ /* close relation, releasing lock on parent table */
+ table_close(rel, AccessExclusiveLock);
+ }
+ else
+ {
+ rtcs = get_tables_to_cluster(cluster_context);
+ params.options |= CLUOPT_RECHECK_ISCLUSTERED;
+ }
+
+ /* Do the job. */
+ cluster_multiple_rels(rtcs, &params);
+
+ /* Start a new transaction for the cleanup work. */
+ StartTransactionCommand();
+
+ /* Clean up working storage */
+ MemoryContextDelete(cluster_context);
+}
+
+/*
+ * Given a list of relations to cluster, process each of them in a separate
+ * transaction.
+ *
+ * We expect to be in a transaction at start, but there isn't one when we
+ * return.
+ */
+static void
+cluster_multiple_rels(List *rtcs, ClusterParams *params)
+{
+ ListCell *lc;
+
+ /* Commit to get out of starting transaction */
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+
+ /* Cluster the tables, each in a separate transaction */
+ foreach(lc, rtcs)
+ {
+ RelToCluster *rtc = (RelToCluster *) lfirst(lc);
+
+ /* Start a new transaction for each relation. */
+ StartTransactionCommand();
+
+ /* functions in indexes may want a snapshot set */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
+ /* Do the job. */
+ cluster_rel(rtc->tableOid, rtc->indexOid, params);
+
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+ }
+}
+
+/*
+ * cluster_rel
+ *
+ * This clusters the table by creating a new, clustered table and
+ * swapping the relfilenodes of the new table and the old table, so
+ * the OID of the original table is preserved. Thus we do not lose
+ * GRANT, inheritance nor references to this table (this was a bug
+ * in releases through 7.3).
+ *
+ * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
+ * the new table, it's better to create the indexes afterwards than to fill
+ * them incrementally while we load the table.
+ *
+ * If indexOid is InvalidOid, the table will be rewritten in physical order
+ * instead of index order. This is the new implementation of VACUUM FULL,
+ * and error messages should refer to the operation as VACUUM not CLUSTER.
+ */
+void
+cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
+{
+ Relation OldHeap;
+ Oid save_userid;
+ int save_sec_context;
+ int save_nestlevel;
+ bool verbose = ((params->options & CLUOPT_VERBOSE) != 0);
+ bool recheck = ((params->options & CLUOPT_RECHECK) != 0);
+
+ /* Check for user-requested abort. */
+ CHECK_FOR_INTERRUPTS();
+
+ pgstat_progress_start_command(PROGRESS_COMMAND_CLUSTER, tableOid);
+ if (OidIsValid(indexOid))
+ pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
+ PROGRESS_CLUSTER_COMMAND_CLUSTER);
+ else
+ pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
+ PROGRESS_CLUSTER_COMMAND_VACUUM_FULL);
+
+ /*
+ * We grab exclusive access to the target rel and index for the duration
+ * of the transaction. (This is redundant for the single-transaction
+ * case, since cluster() already did it.) The index lock is taken inside
+ * check_index_is_clusterable.
+ */
+ OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
+
+ /* If the table has gone away, we can skip processing it */
+ if (!OldHeap)
+ {
+ pgstat_progress_end_command();
+ return;
+ }
+
+ /*
+ * Switch to the table owner's userid, so that any index functions are run
+ * as that user. Also lock down security-restricted operations and
+ * arrange to make GUC variable changes local to this command.
+ */
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+ SetUserIdAndSecContext(OldHeap->rd_rel->relowner,
+ save_sec_context | SECURITY_RESTRICTED_OPERATION);
+ save_nestlevel = NewGUCNestLevel();
+
+ /*
+ * Since we may open a new transaction for each relation, we have to check
+ * that the relation still is what we think it is.
+ *
+ * If this is a single-transaction CLUSTER, we can skip these tests. We
+ * *must* skip the one on indisclustered since it would reject an attempt
+ * to cluster a not-previously-clustered index.
+ */
+ if (recheck)
+ {
+ /* Check that the user still owns the relation */
+ if (!pg_class_ownercheck(tableOid, save_userid))
+ {
+ relation_close(OldHeap, AccessExclusiveLock);
+ goto out;
+ }
+
+ /*
+ * Silently skip a temp table for a remote session. Only doing this
+ * check in the "recheck" case is appropriate (which currently means
+ * somebody is executing a database-wide CLUSTER or on a partitioned
+ * table), because there is another check in cluster() which will stop
+ * any attempt to cluster remote temp tables by name. There is
+ * another check in cluster_rel which is redundant, but we leave it
+ * for extra safety.
+ */
+ if (RELATION_IS_OTHER_TEMP(OldHeap))
+ {
+ relation_close(OldHeap, AccessExclusiveLock);
+ goto out;
+ }
+
+ if (OidIsValid(indexOid))
+ {
+ /*
+ * Check that the index still exists
+ */
+ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid)))
+ {
+ relation_close(OldHeap, AccessExclusiveLock);
+ goto out;
+ }
+
+ /*
+ * Check that the index is still the one with indisclustered set,
+ * if needed.
+ */
+ if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
+ !get_index_isclustered(indexOid))
+ {
+ relation_close(OldHeap, AccessExclusiveLock);
+ goto out;
+ }
+ }
+ }
+
+ /*
+ * We allow VACUUM FULL, but not CLUSTER, on shared catalogs. CLUSTER
+ * would work in most respects, but the index would only get marked as
+ * indisclustered in the current database, leading to unexpected behavior
+ * if CLUSTER were later invoked in another database.
+ */
+ if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot cluster a shared catalog")));
+
+ /*
+ * Don't process temp tables of other backends ... their local buffer
+ * manager is not going to cope.
+ */
+ if (RELATION_IS_OTHER_TEMP(OldHeap))
+ {
+ if (OidIsValid(indexOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot cluster temporary tables of other sessions")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot vacuum temporary tables of other sessions")));
+ }
+
+ /*
+ * Also check for active uses of the relation in the current transaction,
+ * including open scans and pending AFTER trigger events.
+ */
+ CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
+
+ /* Check heap and index are valid to cluster on */
+ if (OidIsValid(indexOid))
+ check_index_is_clusterable(OldHeap, indexOid, AccessExclusiveLock);
+
+ /*
+ * Quietly ignore the request if this is a materialized view which has not
+ * been populated from its query. No harm is done because there is no data
+ * to deal with, and we don't want to throw an error if this is part of a
+ * multi-relation request -- for example, CLUSTER was run on the entire
+ * database.
+ */
+ if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
+ !RelationIsPopulated(OldHeap))
+ {
+ relation_close(OldHeap, AccessExclusiveLock);
+ goto out;
+ }
+
+ Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION ||
+ OldHeap->rd_rel->relkind == RELKIND_MATVIEW ||
+ OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE);
+
+ /*
+ * All predicate locks on the tuples or pages are about to be made
+ * invalid, because we move tuples around. Promote them to relation
+ * locks. Predicate locks on indexes will be promoted when they are
+ * reindexed.
+ */
+ TransferPredicateLocksToHeapRelation(OldHeap);
+
+ /* rebuild_relation does all the dirty work */
+ rebuild_relation(OldHeap, indexOid, verbose);
+
+ /* NB: rebuild_relation does table_close() on OldHeap */
+
+out:
+ /* Roll back any GUC changes executed by index functions */
+ AtEOXact_GUC(false, save_nestlevel);
+
+ /* Restore userid and security context */
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+
+ pgstat_progress_end_command();
+}
+
+/*
+ * Verify that the specified heap and index are valid to cluster on
+ *
+ * Side effect: obtains lock on the index. The caller may
+ * in some cases already have AccessExclusiveLock on the table, but
+ * not in all cases so we can't rely on the table-level lock for
+ * protection here.
+ */
+void
+check_index_is_clusterable(Relation OldHeap, Oid indexOid, LOCKMODE lockmode)
+{
+ Relation OldIndex;
+
+ OldIndex = index_open(indexOid, lockmode);
+
+ /*
+ * Check that index is in fact an index on the given relation
+ */
+ if (OldIndex->rd_index == NULL ||
+ OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not an index for table \"%s\"",
+ RelationGetRelationName(OldIndex),
+ RelationGetRelationName(OldHeap))));
+
+ /* Index AM must allow clustering */
+ if (!OldIndex->rd_indam->amclusterable)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
+ RelationGetRelationName(OldIndex))));
+
+ /*
+ * Disallow clustering on incomplete indexes (those that might not index
+ * every row of the relation). We could relax this by making a separate
+ * seqscan pass over the table to copy the missing rows, but that seems
+ * expensive and tedious.
+ */
+ if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot cluster on partial index \"%s\"",
+ RelationGetRelationName(OldIndex))));
+
+ /*
+ * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
+ * it might well not contain entries for every heap row, or might not even
+ * be internally consistent. (But note that we don't check indcheckxmin;
+ * the worst consequence of following broken HOT chains would be that we
+ * might put recently-dead tuples out-of-order in the new table, and there
+ * is little harm in that.)
+ */
+ if (!OldIndex->rd_index->indisvalid)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot cluster on invalid index \"%s\"",
+ RelationGetRelationName(OldIndex))));
+
+ /* Drop relcache refcnt on OldIndex, but keep lock */
+ index_close(OldIndex, NoLock);
+}
+
+/*
+ * mark_index_clustered: mark the specified index as the one clustered on
+ *
+ * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
+ */
+void
+mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
+{
+ HeapTuple indexTuple;
+ Form_pg_index indexForm;
+ Relation pg_index;
+ ListCell *index;
+
+ /* Disallow applying to a partitioned table */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot mark index clustered in partitioned table")));
+
+ /*
+ * If the index is already marked clustered, no need to do anything.
+ */
+ if (OidIsValid(indexOid))
+ {
+ if (get_index_isclustered(indexOid))
+ return;
+ }
+
+ /*
+ * Check each index of the relation and set/clear the bit as needed.
+ */
+ pg_index = table_open(IndexRelationId, RowExclusiveLock);
+
+ foreach(index, RelationGetIndexList(rel))
+ {
+ Oid thisIndexOid = lfirst_oid(index);
+
+ indexTuple = SearchSysCacheCopy1(INDEXRELID,
+ ObjectIdGetDatum(thisIndexOid));
+ if (!HeapTupleIsValid(indexTuple))
+ elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
+ indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+
+ /*
+ * Unset the bit if set. We know it's wrong because we checked this
+ * earlier.
+ */
+ if (indexForm->indisclustered)
+ {
+ indexForm->indisclustered = false;
+ CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
+ }
+ else if (thisIndexOid == indexOid)
+ {
+ /* this was checked earlier, but let's be real sure */
+ if (!indexForm->indisvalid)
+ elog(ERROR, "cannot cluster on invalid index %u", indexOid);
+ indexForm->indisclustered = true;
+ CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
+ }
+
+ InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
+ InvalidOid, is_internal);
+
+ heap_freetuple(indexTuple);
+ }
+
+ table_close(pg_index, RowExclusiveLock);
+}
+
+/*
+ * rebuild_relation: rebuild an existing relation in index or physical order
+ *
+ * OldHeap: table to rebuild --- must be opened and exclusive-locked!
+ * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
+ *
+ * NB: this routine closes OldHeap at the right time; caller should not.
+ */
+static void
+rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
+{
+ Oid tableOid = RelationGetRelid(OldHeap);
+ Oid accessMethod = OldHeap->rd_rel->relam;
+ Oid tableSpace = OldHeap->rd_rel->reltablespace;
+ Oid OIDNewHeap;
+ char relpersistence;
+ bool is_system_catalog;
+ bool swap_toast_by_content;
+ TransactionId frozenXid;
+ MultiXactId cutoffMulti;
+
+ if (OidIsValid(indexOid))
+ /* Mark the correct index as clustered */
+ mark_index_clustered(OldHeap, indexOid, true);
+
+ /* Remember info about rel before closing OldHeap */
+ relpersistence = OldHeap->rd_rel->relpersistence;
+ is_system_catalog = IsSystemRelation(OldHeap);
+
+ /* Close relcache entry, but keep lock until transaction commit */
+ table_close(OldHeap, NoLock);
+
+ /* Create the transient table that will receive the re-ordered data */
+ OIDNewHeap = make_new_heap(tableOid, tableSpace,
+ accessMethod,
+ relpersistence,
+ AccessExclusiveLock);
+
+ /* Copy the heap data into the new table in the desired order */
+ copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
+ &swap_toast_by_content, &frozenXid, &cutoffMulti);
+
+ /*
+ * Swap the physical files of the target and transient tables, then
+ * rebuild the target's indexes and throw away the transient table.
+ */
+ finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
+ swap_toast_by_content, false, true,
+ frozenXid, cutoffMulti,
+ relpersistence);
+}
+
+
+/*
+ * Create the transient table that will be filled with new data during
+ * CLUSTER, ALTER TABLE, and similar operations. The transient table
+ * duplicates the logical structure of the OldHeap; but will have the
+ * specified physical storage properties NewTableSpace, NewAccessMethod, and
+ * relpersistence.
+ *
+ * After this, the caller should load the new heap with transferred/modified
+ * data, then call finish_heap_swap to complete the operation.
+ */
+Oid
+make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
+ char relpersistence, LOCKMODE lockmode)
+{
+ TupleDesc OldHeapDesc;
+ char NewHeapName[NAMEDATALEN];
+ Oid OIDNewHeap;
+ Oid toastid;
+ Relation OldHeap;
+ HeapTuple tuple;
+ Datum reloptions;
+ bool isNull;
+ Oid namespaceid;
+
+ OldHeap = table_open(OIDOldHeap, lockmode);
+ OldHeapDesc = RelationGetDescr(OldHeap);
+
+ /*
+ * Note that the NewHeap will not receive any of the defaults or
+ * constraints associated with the OldHeap; we don't need 'em, and there's
+ * no reason to spend cycles inserting them into the catalogs only to
+ * delete them.
+ */
+
+ /*
+ * But we do want to use reloptions of the old heap for new heap.
+ */
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
+ reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
+ &isNull);
+ if (isNull)
+ reloptions = (Datum) 0;
+
+ if (relpersistence == RELPERSISTENCE_TEMP)
+ namespaceid = LookupCreationNamespace("pg_temp");
+ else
+ namespaceid = RelationGetNamespace(OldHeap);
+
+ /*
+ * Create the new heap, using a temporary name in the same namespace as
+ * the existing table. NOTE: there is some risk of collision with user
+ * relnames. Working around this seems more trouble than it's worth; in
+ * particular, we can't create the new heap in a different namespace from
+ * the old, or we will have problems with the TEMP status of temp tables.
+ *
+ * Note: the new heap is not a shared relation, even if we are rebuilding
+ * a shared rel. However, we do make the new heap mapped if the source is
+ * mapped. This simplifies swap_relation_files, and is absolutely
+ * necessary for rebuilding pg_class, for reasons explained there.
+ */
+ snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
+
+ OIDNewHeap = heap_create_with_catalog(NewHeapName,
+ namespaceid,
+ NewTableSpace,
+ InvalidOid,
+ InvalidOid,
+ InvalidOid,
+ OldHeap->rd_rel->relowner,
+ NewAccessMethod,
+ OldHeapDesc,
+ NIL,
+ RELKIND_RELATION,
+ relpersistence,
+ false,
+ RelationIsMapped(OldHeap),
+ ONCOMMIT_NOOP,
+ reloptions,
+ false,
+ true,
+ true,
+ OIDOldHeap,
+ NULL);
+ Assert(OIDNewHeap != InvalidOid);
+
+ ReleaseSysCache(tuple);
+
+ /*
+ * Advance command counter so that the newly-created relation's catalog
+ * tuples will be visible to table_open.
+ */
+ CommandCounterIncrement();
+
+ /*
+ * If necessary, create a TOAST table for the new relation.
+ *
+ * If the relation doesn't have a TOAST table already, we can't need one
+ * for the new relation. The other way around is possible though: if some
+ * wide columns have been dropped, NewHeapCreateToastTable can decide that
+ * no TOAST table is needed for the new table.
+ *
+ * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
+ * that the TOAST table will be visible for insertion.
+ */
+ toastid = OldHeap->rd_rel->reltoastrelid;
+ if (OidIsValid(toastid))
+ {
+ /* keep the existing toast table's reloptions, if any */
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", toastid);
+ reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
+ &isNull);
+ if (isNull)
+ reloptions = (Datum) 0;
+
+ NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid);
+
+ ReleaseSysCache(tuple);
+ }
+
+ table_close(OldHeap, NoLock);
+
+ return OIDNewHeap;
+}
+
+/*
+ * Do the physical copying of table data.
+ *
+ * There are three output parameters:
+ * *pSwapToastByContent is set true if toast tables must be swapped by content.
+ * *pFreezeXid receives the TransactionId used as freeze cutoff point.
+ * *pCutoffMulti receives the MultiXactId used as a cutoff point.
+ */
+static void
+copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
+ bool *pSwapToastByContent, TransactionId *pFreezeXid,
+ MultiXactId *pCutoffMulti)
+{
+ Relation NewHeap,
+ OldHeap,
+ OldIndex;
+ Relation relRelation;
+ HeapTuple reltup;
+ Form_pg_class relform;
+ TupleDesc oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
+ TupleDesc newTupDesc PG_USED_FOR_ASSERTS_ONLY;
+ TransactionId OldestXmin,
+ FreezeXid;
+ MultiXactId OldestMxact,
+ MultiXactCutoff;
+ bool use_sort;
+ double num_tuples = 0,
+ tups_vacuumed = 0,
+ tups_recently_dead = 0;
+ BlockNumber num_pages;
+ int elevel = verbose ? INFO : DEBUG2;
+ PGRUsage ru0;
+ char *nspname;
+
+ pg_rusage_init(&ru0);
+
+ /*
+ * Open the relations we need.
+ */
+ NewHeap = table_open(OIDNewHeap, AccessExclusiveLock);
+ OldHeap = table_open(OIDOldHeap, AccessExclusiveLock);
+ if (OidIsValid(OIDOldIndex))
+ OldIndex = index_open(OIDOldIndex, AccessExclusiveLock);
+ else
+ OldIndex = NULL;
+
+ /* Store a copy of the namespace name for logging purposes */
+ nspname = get_namespace_name(RelationGetNamespace(OldHeap));
+
+ /*
+ * Their tuple descriptors should be exactly alike, but here we only need
+ * assume that they have the same number of columns.
+ */
+ oldTupDesc = RelationGetDescr(OldHeap);
+ newTupDesc = RelationGetDescr(NewHeap);
+ Assert(newTupDesc->natts == oldTupDesc->natts);
+
+ /*
+ * If the OldHeap has a toast table, get lock on the toast table to keep
+ * it from being vacuumed. This is needed because autovacuum processes
+ * toast tables independently of their main tables, with no lock on the
+ * latter. If an autovacuum were to start on the toast table after we
+ * compute our OldestXmin below, it would use a later OldestXmin, and then
+ * possibly remove as DEAD toast tuples belonging to main tuples we think
+ * are only RECENTLY_DEAD. Then we'd fail while trying to copy those
+ * tuples.
+ *
+ * We don't need to open the toast relation here, just lock it. The lock
+ * will be held till end of transaction.
+ */
+ if (OldHeap->rd_rel->reltoastrelid)
+ LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
+
+ /*
+ * If both tables have TOAST tables, perform toast swap by content. It is
+ * possible that the old table has a toast table but the new one doesn't,
+ * if toastable columns have been dropped. In that case we have to do
+ * swap by links. This is okay because swap by content is only essential
+ * for system catalogs, and we don't support schema changes for them.
+ */
+ if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
+ {
+ *pSwapToastByContent = true;
+
+ /*
+ * When doing swap by content, any toast pointers written into NewHeap
+ * must use the old toast table's OID, because that's where the toast
+ * data will eventually be found. Set this up by setting rd_toastoid.
+ * This also tells toast_save_datum() to preserve the toast value
+ * OIDs, which we want so as not to invalidate toast pointers in
+ * system catalog caches, and to avoid making multiple copies of a
+ * single toast value.
+ *
+ * Note that we must hold NewHeap open until we are done writing data,
+ * since the relcache will not guarantee to remember this setting once
+ * the relation is closed. Also, this technique depends on the fact
+ * that no one will try to read from the NewHeap until after we've
+ * finished writing it and swapping the rels --- otherwise they could
+ * follow the toast pointers to the wrong place. (It would actually
+ * work for values copied over from the old toast table, but not for
+ * any values that we toast which were previously not toasted.)
+ */
+ NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
+ }
+ else
+ *pSwapToastByContent = false;
+
+ /*
+ * Compute xids used to freeze and weed out dead tuples and multixacts.
+ * Since we're going to rewrite the whole table anyway, there's no reason
+ * not to be aggressive about this.
+ */
+ vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, &OldestXmin, &OldestMxact,
+ &FreezeXid, &MultiXactCutoff);
+
+ /*
+ * FreezeXid will become the table's new relfrozenxid, and that mustn't go
+ * backwards, so take the max.
+ */
+ if (TransactionIdIsValid(OldHeap->rd_rel->relfrozenxid) &&
+ TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid))
+ FreezeXid = OldHeap->rd_rel->relfrozenxid;
+
+ /*
+ * MultiXactCutoff, similarly, shouldn't go backwards either.
+ */
+ if (MultiXactIdIsValid(OldHeap->rd_rel->relminmxid) &&
+ MultiXactIdPrecedes(MultiXactCutoff, OldHeap->rd_rel->relminmxid))
+ MultiXactCutoff = OldHeap->rd_rel->relminmxid;
+
+ /*
+ * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
+ * the OldHeap. We know how to use a sort to duplicate the ordering of a
+ * btree index, and will use seqscan-and-sort for that case if the planner
+ * tells us it's cheaper. Otherwise, always indexscan if an index is
+ * provided, else plain seqscan.
+ */
+ if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
+ use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex);
+ else
+ use_sort = false;
+
+ /* Log what we're doing */
+ if (OldIndex != NULL && !use_sort)
+ ereport(elevel,
+ (errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
+ nspname,
+ RelationGetRelationName(OldHeap),
+ RelationGetRelationName(OldIndex))));
+ else if (use_sort)
+ ereport(elevel,
+ (errmsg("clustering \"%s.%s\" using sequential scan and sort",
+ nspname,
+ RelationGetRelationName(OldHeap))));
+ else
+ ereport(elevel,
+ (errmsg("vacuuming \"%s.%s\"",
+ nspname,
+ RelationGetRelationName(OldHeap))));
+
+ /*
+ * Hand off the actual copying to AM specific function, the generic code
+ * cannot know how to deal with visibility across AMs. Note that this
+ * routine is allowed to set FreezeXid / MultiXactCutoff to different
+ * values (e.g. because the AM doesn't use freezing).
+ */
+ table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
+ OldestXmin, &FreezeXid, &MultiXactCutoff,
+ &num_tuples, &tups_vacuumed,
+ &tups_recently_dead);
+
+ /* return selected values to caller, get set as relfrozenxid/minmxid */
+ *pFreezeXid = FreezeXid;
+ *pCutoffMulti = MultiXactCutoff;
+
+ /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
+ NewHeap->rd_toastoid = InvalidOid;
+
+ num_pages = RelationGetNumberOfBlocks(NewHeap);
+
+ /* Log what we did */
+ ereport(elevel,
+ (errmsg("\"%s.%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
+ nspname,
+ RelationGetRelationName(OldHeap),
+ tups_vacuumed, num_tuples,
+ RelationGetNumberOfBlocks(OldHeap)),
+ errdetail("%.0f dead row versions cannot be removed yet.\n"
+ "%s.",
+ tups_recently_dead,
+ pg_rusage_show(&ru0))));
+
+ if (OldIndex != NULL)
+ index_close(OldIndex, NoLock);
+ table_close(OldHeap, NoLock);
+ table_close(NewHeap, NoLock);
+
+ /* Update pg_class to reflect the correct values of pages and tuples. */
+ relRelation = table_open(RelationRelationId, RowExclusiveLock);
+
+ reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap));
+ if (!HeapTupleIsValid(reltup))
+ elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap);
+ relform = (Form_pg_class) GETSTRUCT(reltup);
+
+ relform->relpages = num_pages;
+ relform->reltuples = num_tuples;
+
+ /* Don't update the stats for pg_class. See swap_relation_files. */
+ if (OIDOldHeap != RelationRelationId)
+ CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
+ else
+ CacheInvalidateRelcacheByTuple(reltup);
+
+ /* Clean up. */
+ heap_freetuple(reltup);
+ table_close(relRelation, RowExclusiveLock);
+
+ /* Make the update visible */
+ CommandCounterIncrement();
+}
+
+/*
+ * Swap the physical files of two given relations.
+ *
+ * We swap the physical identity (reltablespace, relfilenode) while keeping the
+ * same logical identities of the two relations. relpersistence is also
+ * swapped, which is critical since it determines where buffers live for each
+ * relation.
+ *
+ * We can swap associated TOAST data in either of two ways: recursively swap
+ * the physical content of the toast tables (and their indexes), or swap the
+ * TOAST links in the given relations' pg_class entries. The former is needed
+ * to manage rewrites of shared catalogs (where we cannot change the pg_class
+ * links) while the latter is the only way to handle cases in which a toast
+ * table is added or removed altogether.
+ *
+ * Additionally, the first relation is marked with relfrozenxid set to
+ * frozenXid. It seems a bit ugly to have this here, but the caller would
+ * have to do it anyway, so having it here saves a heap_update. Note: in
+ * the swap-toast-links case, we assume we don't need to change the toast
+ * table's relfrozenxid: the new version of the toast table should already
+ * have relfrozenxid set to RecentXmin, which is good enough.
+ *
+ * Lastly, if r2 and its toast table and toast index (if any) are mapped,
+ * their OIDs are emitted into mapped_tables[]. This is hacky but beats
+ * having to look the information up again later in finish_heap_swap.
+ */
+static void
+swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
+ bool swap_toast_by_content,
+ bool is_internal,
+ TransactionId frozenXid,
+ MultiXactId cutoffMulti,
+ Oid *mapped_tables)
+{
+ Relation relRelation;
+ HeapTuple reltup1,
+ reltup2;
+ Form_pg_class relform1,
+ relform2;
+ Oid relfilenode1,
+ relfilenode2;
+ Oid swaptemp;
+ char swptmpchr;
+ Oid relam1,
+ relam2;
+
+ /* We need writable copies of both pg_class tuples. */
+ relRelation = table_open(RelationRelationId, RowExclusiveLock);
+
+ reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1));
+ if (!HeapTupleIsValid(reltup1))
+ elog(ERROR, "cache lookup failed for relation %u", r1);
+ relform1 = (Form_pg_class) GETSTRUCT(reltup1);
+
+ reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2));
+ if (!HeapTupleIsValid(reltup2))
+ elog(ERROR, "cache lookup failed for relation %u", r2);
+ relform2 = (Form_pg_class) GETSTRUCT(reltup2);
+
+ relfilenode1 = relform1->relfilenode;
+ relfilenode2 = relform2->relfilenode;
+ relam1 = relform1->relam;
+ relam2 = relform2->relam;
+
+ if (OidIsValid(relfilenode1) && OidIsValid(relfilenode2))
+ {
+ /*
+ * Normal non-mapped relations: swap relfilenodes, reltablespaces,
+ * relpersistence
+ */
+ Assert(!target_is_pg_class);
+
+ swaptemp = relform1->relfilenode;
+ relform1->relfilenode = relform2->relfilenode;
+ relform2->relfilenode = swaptemp;
+
+ swaptemp = relform1->reltablespace;
+ relform1->reltablespace = relform2->reltablespace;
+ relform2->reltablespace = swaptemp;
+
+ swaptemp = relform1->relam;
+ relform1->relam = relform2->relam;
+ relform2->relam = swaptemp;
+
+ swptmpchr = relform1->relpersistence;
+ relform1->relpersistence = relform2->relpersistence;
+ relform2->relpersistence = swptmpchr;
+
+ /* Also swap toast links, if we're swapping by links */
+ if (!swap_toast_by_content)
+ {
+ swaptemp = relform1->reltoastrelid;
+ relform1->reltoastrelid = relform2->reltoastrelid;
+ relform2->reltoastrelid = swaptemp;
+ }
+ }
+ else
+ {
+ /*
+ * Mapped-relation case. Here we have to swap the relation mappings
+ * instead of modifying the pg_class columns. Both must be mapped.
+ */
+ if (OidIsValid(relfilenode1) || OidIsValid(relfilenode2))
+ elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
+ NameStr(relform1->relname));
+
+ /*
+ * We can't change the tablespace nor persistence of a mapped rel, and
+ * we can't handle toast link swapping for one either, because we must
+ * not apply any critical changes to its pg_class row. These cases
+ * should be prevented by upstream permissions tests, so these checks
+ * are non-user-facing emergency backstop.
+ */
+ if (relform1->reltablespace != relform2->reltablespace)
+ elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
+ NameStr(relform1->relname));
+ if (relform1->relpersistence != relform2->relpersistence)
+ elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
+ NameStr(relform1->relname));
+ if (relform1->relam != relform2->relam)
+ elog(ERROR, "cannot change access method of mapped relation \"%s\"",
+ NameStr(relform1->relname));
+ if (!swap_toast_by_content &&
+ (relform1->reltoastrelid || relform2->reltoastrelid))
+ elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
+ NameStr(relform1->relname));
+
+ /*
+ * Fetch the mappings --- shouldn't fail, but be paranoid
+ */
+ relfilenode1 = RelationMapOidToFilenode(r1, relform1->relisshared);
+ if (!OidIsValid(relfilenode1))
+ elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
+ NameStr(relform1->relname), r1);
+ relfilenode2 = RelationMapOidToFilenode(r2, relform2->relisshared);
+ if (!OidIsValid(relfilenode2))
+ elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
+ NameStr(relform2->relname), r2);
+
+ /*
+ * Send replacement mappings to relmapper. Note these won't actually
+ * take effect until CommandCounterIncrement.
+ */
+ RelationMapUpdateMap(r1, relfilenode2, relform1->relisshared, false);
+ RelationMapUpdateMap(r2, relfilenode1, relform2->relisshared, false);
+
+ /* Pass OIDs of mapped r2 tables back to caller */
+ *mapped_tables++ = r2;
+ }
+
+ /*
+ * Recognize that rel1's relfilenode (swapped from rel2) is new in this
+ * subtransaction. The rel2 storage (swapped from rel1) may or may not be
+ * new.
+ */
+ {
+ Relation rel1,
+ rel2;
+
+ rel1 = relation_open(r1, NoLock);
+ rel2 = relation_open(r2, NoLock);
+ rel2->rd_createSubid = rel1->rd_createSubid;
+ rel2->rd_newRelfilenodeSubid = rel1->rd_newRelfilenodeSubid;
+ rel2->rd_firstRelfilenodeSubid = rel1->rd_firstRelfilenodeSubid;
+ RelationAssumeNewRelfilenode(rel1);
+ relation_close(rel1, NoLock);
+ relation_close(rel2, NoLock);
+ }
+
+ /*
+ * In the case of a shared catalog, these next few steps will only affect
+ * our own database's pg_class row; but that's okay, because they are all
+ * noncritical updates. That's also an important fact for the case of a
+ * mapped catalog, because it's possible that we'll commit the map change
+ * and then fail to commit the pg_class update.
+ */
+
+ /* set rel1's frozen Xid and minimum MultiXid */
+ if (relform1->relkind != RELKIND_INDEX)
+ {
+ Assert(!TransactionIdIsValid(frozenXid) ||
+ TransactionIdIsNormal(frozenXid));
+ relform1->relfrozenxid = frozenXid;
+ relform1->relminmxid = cutoffMulti;
+ }
+
+ /* swap size statistics too, since new rel has freshly-updated stats */
+ {
+ int32 swap_pages;
+ float4 swap_tuples;
+ int32 swap_allvisible;
+
+ swap_pages = relform1->relpages;
+ relform1->relpages = relform2->relpages;
+ relform2->relpages = swap_pages;
+
+ swap_tuples = relform1->reltuples;
+ relform1->reltuples = relform2->reltuples;
+ relform2->reltuples = swap_tuples;
+
+ swap_allvisible = relform1->relallvisible;
+ relform1->relallvisible = relform2->relallvisible;
+ relform2->relallvisible = swap_allvisible;
+ }
+
+ /*
+ * Update the tuples in pg_class --- unless the target relation of the
+ * swap is pg_class itself. In that case, there is zero point in making
+ * changes because we'd be updating the old data that we're about to throw
+ * away. Because the real work being done here for a mapped relation is
+ * just to change the relation map settings, it's all right to not update
+ * the pg_class rows in this case. The most important changes will instead
+ * performed later, in finish_heap_swap() itself.
+ */
+ if (!target_is_pg_class)
+ {
+ CatalogIndexState indstate;
+
+ indstate = CatalogOpenIndexes(relRelation);
+ CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1,
+ indstate);
+ CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2,
+ indstate);
+ CatalogCloseIndexes(indstate);
+ }
+ else
+ {
+ /* no update ... but we do still need relcache inval */
+ CacheInvalidateRelcacheByTuple(reltup1);
+ CacheInvalidateRelcacheByTuple(reltup2);
+ }
+
+ /*
+ * Now that pg_class has been updated with its relevant information for
+ * the swap, update the dependency of the relations to point to their new
+ * table AM, if it has changed.
+ */
+ if (relam1 != relam2)
+ {
+ if (changeDependencyFor(RelationRelationId,
+ r1,
+ AccessMethodRelationId,
+ relam1,
+ relam2) != 1)
+ elog(ERROR, "failed to change access method dependency for relation \"%s.%s\"",
+ get_namespace_name(get_rel_namespace(r1)),
+ get_rel_name(r1));
+ if (changeDependencyFor(RelationRelationId,
+ r2,
+ AccessMethodRelationId,
+ relam2,
+ relam1) != 1)
+ elog(ERROR, "failed to change access method dependency for relation \"%s.%s\"",
+ get_namespace_name(get_rel_namespace(r2)),
+ get_rel_name(r2));
+ }
+
+ /*
+ * Post alter hook for modified relations. The change to r2 is always
+ * internal, but r1 depends on the invocation context.
+ */
+ InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0,
+ InvalidOid, is_internal);
+ InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0,
+ InvalidOid, true);
+
+ /*
+ * If we have toast tables associated with the relations being swapped,
+ * deal with them too.
+ */
+ if (relform1->reltoastrelid || relform2->reltoastrelid)
+ {
+ if (swap_toast_by_content)
+ {
+ if (relform1->reltoastrelid && relform2->reltoastrelid)
+ {
+ /* Recursively swap the contents of the toast tables */
+ swap_relation_files(relform1->reltoastrelid,
+ relform2->reltoastrelid,
+ target_is_pg_class,
+ swap_toast_by_content,
+ is_internal,
+ frozenXid,
+ cutoffMulti,
+ mapped_tables);
+ }
+ else
+ {
+ /* caller messed up */
+ elog(ERROR, "cannot swap toast files by content when there's only one");
+ }
+ }
+ else
+ {
+ /*
+ * We swapped the ownership links, so we need to change dependency
+ * data to match.
+ *
+ * NOTE: it is possible that only one table has a toast table.
+ *
+ * NOTE: at present, a TOAST table's only dependency is the one on
+ * its owning table. If more are ever created, we'd need to use
+ * something more selective than deleteDependencyRecordsFor() to
+ * get rid of just the link we want.
+ */
+ ObjectAddress baseobject,
+ toastobject;
+ long count;
+
+ /*
+ * We disallow this case for system catalogs, to avoid the
+ * possibility that the catalog we're rebuilding is one of the
+ * ones the dependency changes would change. It's too late to be
+ * making any data changes to the target catalog.
+ */
+ if (IsSystemClass(r1, relform1))
+ elog(ERROR, "cannot swap toast files by links for system catalogs");
+
+ /* Delete old dependencies */
+ if (relform1->reltoastrelid)
+ {
+ count = deleteDependencyRecordsFor(RelationRelationId,
+ relform1->reltoastrelid,
+ false);
+ if (count != 1)
+ elog(ERROR, "expected one dependency record for TOAST table, found %ld",
+ count);
+ }
+ if (relform2->reltoastrelid)
+ {
+ count = deleteDependencyRecordsFor(RelationRelationId,
+ relform2->reltoastrelid,
+ false);
+ if (count != 1)
+ elog(ERROR, "expected one dependency record for TOAST table, found %ld",
+ count);
+ }
+
+ /* Register new dependencies */
+ baseobject.classId = RelationRelationId;
+ baseobject.objectSubId = 0;
+ toastobject.classId = RelationRelationId;
+ toastobject.objectSubId = 0;
+
+ if (relform1->reltoastrelid)
+ {
+ baseobject.objectId = r1;
+ toastobject.objectId = relform1->reltoastrelid;
+ recordDependencyOn(&toastobject, &baseobject,
+ DEPENDENCY_INTERNAL);
+ }
+
+ if (relform2->reltoastrelid)
+ {
+ baseobject.objectId = r2;
+ toastobject.objectId = relform2->reltoastrelid;
+ recordDependencyOn(&toastobject, &baseobject,
+ DEPENDENCY_INTERNAL);
+ }
+ }
+ }
+
+ /*
+ * If we're swapping two toast tables by content, do the same for their
+ * valid index. The swap can actually be safely done only if the relations
+ * have indexes.
+ */
+ if (swap_toast_by_content &&
+ relform1->relkind == RELKIND_TOASTVALUE &&
+ relform2->relkind == RELKIND_TOASTVALUE)
+ {
+ Oid toastIndex1,
+ toastIndex2;
+
+ /* Get valid index for each relation */
+ toastIndex1 = toast_get_valid_index(r1,
+ AccessExclusiveLock);
+ toastIndex2 = toast_get_valid_index(r2,
+ AccessExclusiveLock);
+
+ swap_relation_files(toastIndex1,
+ toastIndex2,
+ target_is_pg_class,
+ swap_toast_by_content,
+ is_internal,
+ InvalidTransactionId,
+ InvalidMultiXactId,
+ mapped_tables);
+ }
+
+ /* Clean up. */
+ heap_freetuple(reltup1);
+ heap_freetuple(reltup2);
+
+ table_close(relRelation, RowExclusiveLock);
+
+ /*
+ * Close both relcache entries' smgr links. We need this kluge because
+ * both links will be invalidated during upcoming CommandCounterIncrement.
+ * Whichever of the rels is the second to be cleared will have a dangling
+ * reference to the other's smgr entry. Rather than trying to avoid this
+ * by ordering operations just so, it's easiest to close the links first.
+ * (Fortunately, since one of the entries is local in our transaction,
+ * it's sufficient to clear out our own relcache this way; the problem
+ * cannot arise for other backends when they see our update on the
+ * non-transient relation.)
+ *
+ * Caution: the placement of this step interacts with the decision to
+ * handle toast rels by recursion. When we are trying to rebuild pg_class
+ * itself, the smgr close on pg_class must happen after all accesses in
+ * this function.
+ */
+ RelationCloseSmgrByOid(r1);
+ RelationCloseSmgrByOid(r2);
+}
+
+/*
+ * Remove the transient table that was built by make_new_heap, and finish
+ * cleaning up (including rebuilding all indexes on the old heap).
+ */
+void
+finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
+ bool is_system_catalog,
+ bool swap_toast_by_content,
+ bool check_constraints,
+ bool is_internal,
+ TransactionId frozenXid,
+ MultiXactId cutoffMulti,
+ char newrelpersistence)
+{
+ ObjectAddress object;
+ Oid mapped_tables[4];
+ int reindex_flags;
+ ReindexParams reindex_params = {0};
+ int i;
+
+ /* Report that we are now swapping relation files */
+ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
+ PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES);
+
+ /* Zero out possible results from swapped_relation_files */
+ memset(mapped_tables, 0, sizeof(mapped_tables));
+
+ /*
+ * Swap the contents of the heap relations (including any toast tables).
+ * Also set old heap's relfrozenxid to frozenXid.
+ */
+ swap_relation_files(OIDOldHeap, OIDNewHeap,
+ (OIDOldHeap == RelationRelationId),
+ swap_toast_by_content, is_internal,
+ frozenXid, cutoffMulti, mapped_tables);
+
+ /*
+ * If it's a system catalog, queue a sinval message to flush all catcaches
+ * on the catalog when we reach CommandCounterIncrement.
+ */
+ if (is_system_catalog)
+ CacheInvalidateCatalog(OIDOldHeap);
+
+ /*
+ * Rebuild each index on the relation (but not the toast table, which is
+ * all-new at this point). It is important to do this before the DROP
+ * step because if we are processing a system catalog that will be used
+ * during DROP, we want to have its indexes available. There is no
+ * advantage to the other order anyway because this is all transactional,
+ * so no chance to reclaim disk space before commit. We do not need a
+ * final CommandCounterIncrement() because reindex_relation does it.
+ *
+ * Note: because index_build is called via reindex_relation, it will never
+ * set indcheckxmin true for the indexes. This is OK even though in some
+ * sense we are building new indexes rather than rebuilding existing ones,
+ * because the new heap won't contain any HOT chains at all, let alone
+ * broken ones, so it can't be necessary to set indcheckxmin.
+ */
+ reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
+ if (check_constraints)
+ reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
+
+ /*
+ * Ensure that the indexes have the same persistence as the parent
+ * relation.
+ */
+ if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
+ reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED;
+ else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
+ reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT;
+
+ /* Report that we are now reindexing relations */
+ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
+ PROGRESS_CLUSTER_PHASE_REBUILD_INDEX);
+
+ reindex_relation(OIDOldHeap, reindex_flags, &reindex_params);
+
+ /* Report that we are now doing clean up */
+ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
+ PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP);
+
+ /*
+ * If the relation being rebuilt is pg_class, swap_relation_files()
+ * couldn't update pg_class's own pg_class entry (check comments in
+ * swap_relation_files()), thus relfrozenxid was not updated. That's
+ * annoying because a potential reason for doing a VACUUM FULL is a
+ * imminent or actual anti-wraparound shutdown. So, now that we can
+ * access the new relation using its indices, update relfrozenxid.
+ * pg_class doesn't have a toast relation, so we don't need to update the
+ * corresponding toast relation. Not that there's little point moving all
+ * relfrozenxid updates here since swap_relation_files() needs to write to
+ * pg_class for non-mapped relations anyway.
+ */
+ if (OIDOldHeap == RelationRelationId)
+ {
+ Relation relRelation;
+ HeapTuple reltup;
+ Form_pg_class relform;
+
+ relRelation = table_open(RelationRelationId, RowExclusiveLock);
+
+ reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap));
+ if (!HeapTupleIsValid(reltup))
+ elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
+ relform = (Form_pg_class) GETSTRUCT(reltup);
+
+ relform->relfrozenxid = frozenXid;
+ relform->relminmxid = cutoffMulti;
+
+ CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
+
+ table_close(relRelation, RowExclusiveLock);
+ }
+
+ /* Destroy new heap with old filenode */
+ object.classId = RelationRelationId;
+ object.objectId = OIDNewHeap;
+ object.objectSubId = 0;
+
+ /*
+ * The new relation is local to our transaction and we know nothing
+ * depends on it, so DROP_RESTRICT should be OK.
+ */
+ performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
+
+ /* performDeletion does CommandCounterIncrement at end */
+
+ /*
+ * Now we must remove any relation mapping entries that we set up for the
+ * transient table, as well as its toast table and toast index if any. If
+ * we fail to do this before commit, the relmapper will complain about new
+ * permanent map entries being added post-bootstrap.
+ */
+ for (i = 0; OidIsValid(mapped_tables[i]); i++)
+ RelationMapRemoveMapping(mapped_tables[i]);
+
+ /*
+ * At this point, everything is kosher except that, if we did toast swap
+ * by links, the toast table's name corresponds to the transient table.
+ * The name is irrelevant to the backend because it's referenced by OID,
+ * but users looking at the catalogs could be confused. Rename it to
+ * prevent this problem.
+ *
+ * Note no lock required on the relation, because we already hold an
+ * exclusive lock on it.
+ */
+ if (!swap_toast_by_content)
+ {
+ Relation newrel;
+
+ newrel = table_open(OIDOldHeap, NoLock);
+ if (OidIsValid(newrel->rd_rel->reltoastrelid))
+ {
+ Oid toastidx;
+ char NewToastName[NAMEDATALEN];
+
+ /* Get the associated valid index to be renamed */
+ toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
+ NoLock);
+
+ /* rename the toast table ... */
+ snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
+ OIDOldHeap);
+ RenameRelationInternal(newrel->rd_rel->reltoastrelid,
+ NewToastName, true, false);
+
+ /* ... and its valid index too. */
+ snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
+ OIDOldHeap);
+
+ RenameRelationInternal(toastidx,
+ NewToastName, true, true);
+
+ /*
+ * Reset the relrewrite for the toast. The command-counter
+ * increment is required here as we are about to update the tuple
+ * that is updated as part of RenameRelationInternal.
+ */
+ CommandCounterIncrement();
+ ResetRelRewrite(newrel->rd_rel->reltoastrelid);
+ }
+ relation_close(newrel, NoLock);
+ }
+
+ /* if it's not a catalog table, clear any missing attribute settings */
+ if (!is_system_catalog)
+ {
+ Relation newrel;
+
+ newrel = table_open(OIDOldHeap, NoLock);
+ RelationClearMissing(newrel);
+ relation_close(newrel, NoLock);
+ }
+}
+
+
+/*
+ * Get a list of tables that the current user owns and
+ * have indisclustered set. Return the list in a List * of RelToCluster
+ * (stored in the specified memory context), each one giving the tableOid
+ * and the indexOid on which the table is already clustered.
+ */
+static List *
+get_tables_to_cluster(MemoryContext cluster_context)
+{
+ Relation indRelation;
+ TableScanDesc scan;
+ ScanKeyData entry;
+ HeapTuple indexTuple;
+ Form_pg_index index;
+ MemoryContext old_context;
+ List *rtcs = NIL;
+
+ /*
+ * Get all indexes that have indisclustered set and are owned by
+ * appropriate user.
+ */
+ indRelation = table_open(IndexRelationId, AccessShareLock);
+ ScanKeyInit(&entry,
+ Anum_pg_index_indisclustered,
+ BTEqualStrategyNumber, F_BOOLEQ,
+ BoolGetDatum(true));
+ scan = table_beginscan_catalog(indRelation, 1, &entry);
+ while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ RelToCluster *rtc;
+
+ index = (Form_pg_index) GETSTRUCT(indexTuple);
+
+ if (!pg_class_ownercheck(index->indrelid, GetUserId()))
+ continue;
+
+ /* Use a permanent memory context for the result list */
+ old_context = MemoryContextSwitchTo(cluster_context);
+
+ rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
+ rtc->tableOid = index->indrelid;
+ rtc->indexOid = index->indexrelid;
+ rtcs = lappend(rtcs, rtc);
+
+ MemoryContextSwitchTo(old_context);
+ }
+ table_endscan(scan);
+
+ relation_close(indRelation, AccessShareLock);
+
+ return rtcs;
+}
+
+/*
+ * Given an index on a partitioned table, return a list of RelToCluster for
+ * all the children leaves tables/indexes.
+ *
+ * Like expand_vacuum_rel, but here caller must hold AccessExclusiveLock
+ * on the table containing the index.
+ */
+static List *
+get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
+{
+ List *inhoids;
+ ListCell *lc;
+ List *rtcs = NIL;
+ MemoryContext old_context;
+
+ /* Do not lock the children until they're processed */
+ inhoids = find_all_inheritors(indexOid, NoLock, NULL);
+
+ foreach(lc, inhoids)
+ {
+ Oid indexrelid = lfirst_oid(lc);
+ Oid relid = IndexGetRelation(indexrelid, false);
+ RelToCluster *rtc;
+
+ /* consider only leaf indexes */
+ if (get_rel_relkind(indexrelid) != RELKIND_INDEX)
+ continue;
+
+ /* Silently skip partitions which the user has no access to. */
+ if (!pg_class_ownercheck(relid, GetUserId()) &&
+ (!pg_database_ownercheck(MyDatabaseId, GetUserId()) ||
+ IsSharedRelation(relid)))
+ continue;
+
+ /* Use a permanent memory context for the result list */
+ old_context = MemoryContextSwitchTo(cluster_context);
+
+ rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
+ rtc->tableOid = relid;
+ rtc->indexOid = indexrelid;
+ rtcs = lappend(rtcs, rtc);
+
+ MemoryContextSwitchTo(old_context);
+ }
+
+ return rtcs;
+}
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
new file mode 100644
index 0000000..fcfc02d
--- /dev/null
+++ b/src/backend/commands/collationcmds.c
@@ -0,0 +1,820 @@
+/*-------------------------------------------------------------------------
+ *
+ * collationcmds.c
+ * collation-related commands support code
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/collationcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_collation.h"
+#include "commands/alter.h"
+#include "commands/collationcmds.h"
+#include "commands/comment.h"
+#include "commands/dbcommands.h"
+#include "commands/defrem.h"
+#include "common/string.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/pg_locale.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+typedef struct
+{
+ char *localename; /* name of locale, as per "locale -a" */
+ char *alias; /* shortened alias for same */
+ int enc; /* encoding */
+} CollAliasData;
+
+
+/*
+ * CREATE COLLATION
+ */
+ObjectAddress
+DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists)
+{
+ char *collName;
+ Oid collNamespace;
+ AclResult aclresult;
+ ListCell *pl;
+ DefElem *fromEl = NULL;
+ DefElem *localeEl = NULL;
+ DefElem *lccollateEl = NULL;
+ DefElem *lcctypeEl = NULL;
+ DefElem *providerEl = NULL;
+ DefElem *deterministicEl = NULL;
+ DefElem *versionEl = NULL;
+ char *collcollate;
+ char *collctype;
+ char *colliculocale;
+ bool collisdeterministic;
+ int collencoding;
+ char collprovider;
+ char *collversion = NULL;
+ Oid newoid;
+ ObjectAddress address;
+
+ collNamespace = QualifiedNameGetCreationNamespace(names, &collName);
+
+ aclresult = pg_namespace_aclcheck(collNamespace, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(collNamespace));
+
+ foreach(pl, parameters)
+ {
+ DefElem *defel = lfirst_node(DefElem, pl);
+ DefElem **defelp;
+
+ if (strcmp(defel->defname, "from") == 0)
+ defelp = &fromEl;
+ else if (strcmp(defel->defname, "locale") == 0)
+ defelp = &localeEl;
+ else if (strcmp(defel->defname, "lc_collate") == 0)
+ defelp = &lccollateEl;
+ else if (strcmp(defel->defname, "lc_ctype") == 0)
+ defelp = &lcctypeEl;
+ else if (strcmp(defel->defname, "provider") == 0)
+ defelp = &providerEl;
+ else if (strcmp(defel->defname, "deterministic") == 0)
+ defelp = &deterministicEl;
+ else if (strcmp(defel->defname, "version") == 0)
+ defelp = &versionEl;
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("collation attribute \"%s\" not recognized",
+ defel->defname),
+ parser_errposition(pstate, defel->location)));
+ break;
+ }
+ if (*defelp != NULL)
+ errorConflictingDefElem(defel, pstate);
+ *defelp = defel;
+ }
+
+ if (localeEl && (lccollateEl || lcctypeEl))
+ ereport(ERROR,
+ errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options"),
+ errdetail("LOCALE cannot be specified together with LC_COLLATE or LC_CTYPE."));
+
+ if (fromEl && list_length(parameters) != 1)
+ ereport(ERROR,
+ errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options"),
+ errdetail("FROM cannot be specified together with any other options."));
+
+ if (fromEl)
+ {
+ Oid collid;
+ HeapTuple tp;
+ Datum datum;
+ bool isnull;
+
+ collid = get_collation_oid(defGetQualifiedName(fromEl), false);
+ tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
+ if (!HeapTupleIsValid(tp))
+ elog(ERROR, "cache lookup failed for collation %u", collid);
+
+ collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
+ collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic;
+ collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding;
+
+ datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull);
+ if (!isnull)
+ collcollate = TextDatumGetCString(datum);
+ else
+ collcollate = NULL;
+
+ datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull);
+ if (!isnull)
+ collctype = TextDatumGetCString(datum);
+ else
+ collctype = NULL;
+
+ datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colliculocale, &isnull);
+ if (!isnull)
+ colliculocale = TextDatumGetCString(datum);
+ else
+ colliculocale = NULL;
+
+ ReleaseSysCache(tp);
+
+ /*
+ * Copying the "default" collation is not allowed because most code
+ * checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT,
+ * and so having a second collation with COLLPROVIDER_DEFAULT would
+ * not work and potentially confuse or crash some code. This could be
+ * fixed with some legwork.
+ */
+ if (collprovider == COLLPROVIDER_DEFAULT)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("collation \"default\" cannot be copied")));
+ }
+ else
+ {
+ char *collproviderstr = NULL;
+
+ collcollate = NULL;
+ collctype = NULL;
+ colliculocale = NULL;
+
+ if (providerEl)
+ collproviderstr = defGetString(providerEl);
+
+ if (deterministicEl)
+ collisdeterministic = defGetBoolean(deterministicEl);
+ else
+ collisdeterministic = true;
+
+ if (versionEl)
+ collversion = defGetString(versionEl);
+
+ if (collproviderstr)
+ {
+ if (pg_strcasecmp(collproviderstr, "icu") == 0)
+ collprovider = COLLPROVIDER_ICU;
+ else if (pg_strcasecmp(collproviderstr, "libc") == 0)
+ collprovider = COLLPROVIDER_LIBC;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("unrecognized collation provider: %s",
+ collproviderstr)));
+ }
+ else
+ collprovider = COLLPROVIDER_LIBC;
+
+ if (localeEl)
+ {
+ if (collprovider == COLLPROVIDER_LIBC)
+ {
+ collcollate = defGetString(localeEl);
+ collctype = defGetString(localeEl);
+ }
+ else
+ colliculocale = defGetString(localeEl);
+ }
+
+ if (lccollateEl)
+ collcollate = defGetString(lccollateEl);
+
+ if (lcctypeEl)
+ collctype = defGetString(lcctypeEl);
+
+ if (collprovider == COLLPROVIDER_LIBC)
+ {
+ if (!collcollate)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("parameter \"lc_collate\" must be specified")));
+
+ if (!collctype)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("parameter \"lc_ctype\" must be specified")));
+ }
+ else if (collprovider == COLLPROVIDER_ICU)
+ {
+ if (!colliculocale)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("parameter \"locale\" must be specified")));
+ }
+
+ /*
+ * Nondeterministic collations are currently only supported with ICU
+ * because that's the only case where it can actually make a
+ * difference. So we can save writing the code for the other
+ * providers.
+ */
+ if (!collisdeterministic && collprovider != COLLPROVIDER_ICU)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations not supported with this provider")));
+
+ if (collprovider == COLLPROVIDER_ICU)
+ {
+#ifdef USE_ICU
+ /*
+ * We could create ICU collations with collencoding == database
+ * encoding, but it seems better to use -1 so that it matches the
+ * way initdb would create ICU collations. However, only allow
+ * one to be created when the current database's encoding is
+ * supported. Otherwise the collation is useless, plus we get
+ * surprising behaviors like not being able to drop the collation.
+ *
+ * Skip this test when !USE_ICU, because the error we want to
+ * throw for that isn't thrown till later.
+ */
+ if (!is_encoding_supported_by_icu(GetDatabaseEncoding()))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("current database's encoding is not supported with this provider")));
+#endif
+ collencoding = -1;
+ }
+ else
+ {
+ collencoding = GetDatabaseEncoding();
+ check_encoding_locale_matches(collencoding, collcollate, collctype);
+ }
+ }
+
+ if (!collversion)
+ collversion = get_collation_actual_version(collprovider, collprovider == COLLPROVIDER_ICU ? colliculocale : collcollate);
+
+ newoid = CollationCreate(collName,
+ collNamespace,
+ GetUserId(),
+ collprovider,
+ collisdeterministic,
+ collencoding,
+ collcollate,
+ collctype,
+ colliculocale,
+ collversion,
+ if_not_exists,
+ false); /* not quiet */
+
+ if (!OidIsValid(newoid))
+ return InvalidObjectAddress;
+
+ /*
+ * Check that the locales can be loaded. NB: pg_newlocale_from_collation
+ * is only supposed to be called on non-C-equivalent locales.
+ */
+ CommandCounterIncrement();
+ if (!lc_collate_is_c(newoid) || !lc_ctype_is_c(newoid))
+ (void) pg_newlocale_from_collation(newoid);
+
+ ObjectAddressSet(address, CollationRelationId, newoid);
+
+ return address;
+}
+
+/*
+ * Subroutine for ALTER COLLATION SET SCHEMA and RENAME
+ *
+ * Is there a collation with the same name of the given collation already in
+ * the given namespace? If so, raise an appropriate error message.
+ */
+void
+IsThereCollationInNamespace(const char *collname, Oid nspOid)
+{
+ /* make sure the name doesn't already exist in new schema */
+ if (SearchSysCacheExists3(COLLNAMEENCNSP,
+ CStringGetDatum(collname),
+ Int32GetDatum(GetDatabaseEncoding()),
+ ObjectIdGetDatum(nspOid)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"",
+ collname, GetDatabaseEncodingName(),
+ get_namespace_name(nspOid))));
+
+ /* mustn't match an any-encoding entry, either */
+ if (SearchSysCacheExists3(COLLNAMEENCNSP,
+ CStringGetDatum(collname),
+ Int32GetDatum(-1),
+ ObjectIdGetDatum(nspOid)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("collation \"%s\" already exists in schema \"%s\"",
+ collname, get_namespace_name(nspOid))));
+}
+
+/*
+ * ALTER COLLATION
+ */
+ObjectAddress
+AlterCollation(AlterCollationStmt *stmt)
+{
+ Relation rel;
+ Oid collOid;
+ HeapTuple tup;
+ Form_pg_collation collForm;
+ Datum datum;
+ bool isnull;
+ char *oldversion;
+ char *newversion;
+ ObjectAddress address;
+
+ rel = table_open(CollationRelationId, RowExclusiveLock);
+ collOid = get_collation_oid(stmt->collname, false);
+
+ if (!pg_collation_ownercheck(collOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_COLLATION,
+ NameListToString(stmt->collname));
+
+ tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for collation %u", collOid);
+
+ collForm = (Form_pg_collation) GETSTRUCT(tup);
+ datum = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, &isnull);
+ oldversion = isnull ? NULL : TextDatumGetCString(datum);
+
+ datum = SysCacheGetAttr(COLLOID, tup, collForm->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate, &isnull);
+ if (isnull)
+ elog(ERROR, "unexpected null in pg_collation");
+ newversion = get_collation_actual_version(collForm->collprovider, TextDatumGetCString(datum));
+
+ /* cannot change from NULL to non-NULL or vice versa */
+ if ((!oldversion && newversion) || (oldversion && !newversion))
+ elog(ERROR, "invalid collation version change");
+ else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
+ {
+ bool nulls[Natts_pg_collation];
+ bool replaces[Natts_pg_collation];
+ Datum values[Natts_pg_collation];
+
+ ereport(NOTICE,
+ (errmsg("changing version from %s to %s",
+ oldversion, newversion)));
+
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+ memset(replaces, false, sizeof(replaces));
+
+ values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion);
+ replaces[Anum_pg_collation_collversion - 1] = true;
+
+ tup = heap_modify_tuple(tup, RelationGetDescr(rel),
+ values, nulls, replaces);
+ }
+ else
+ ereport(NOTICE,
+ (errmsg("version has not changed")));
+
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+ InvokeObjectPostAlterHook(CollationRelationId, collOid, 0);
+
+ ObjectAddressSet(address, CollationRelationId, collOid);
+
+ heap_freetuple(tup);
+ table_close(rel, NoLock);
+
+ return address;
+}
+
+
+Datum
+pg_collation_actual_version(PG_FUNCTION_ARGS)
+{
+ Oid collid = PG_GETARG_OID(0);
+ HeapTuple tp;
+ char collprovider;
+ Datum datum;
+ bool isnull;
+ char *version;
+
+ tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
+ if (!HeapTupleIsValid(tp))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("collation with OID %u does not exist", collid)));
+
+ collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
+
+ if (collprovider != COLLPROVIDER_DEFAULT)
+ {
+ datum = SysCacheGetAttr(COLLOID, tp, collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate, &isnull);
+ if (isnull)
+ elog(ERROR, "unexpected null in pg_collation");
+ version = get_collation_actual_version(collprovider, TextDatumGetCString(datum));
+ }
+ else
+ version = NULL;
+
+ ReleaseSysCache(tp);
+
+ if (version)
+ PG_RETURN_TEXT_P(cstring_to_text(version));
+ else
+ PG_RETURN_NULL();
+}
+
+
+/* will we use "locale -a" in pg_import_system_collations? */
+#if defined(HAVE_LOCALE_T) && !defined(WIN32)
+#define READ_LOCALE_A_OUTPUT
+#endif
+
+#ifdef READ_LOCALE_A_OUTPUT
+/*
+ * "Normalize" a libc locale name, stripping off encoding tags such as
+ * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
+ * -> "br_FR@euro"). Return true if a new, different name was
+ * generated.
+ */
+static bool
+normalize_libc_locale_name(char *new, const char *old)
+{
+ char *n = new;
+ const char *o = old;
+ bool changed = false;
+
+ while (*o)
+ {
+ if (*o == '.')
+ {
+ /* skip over encoding tag such as ".utf8" or ".UTF-8" */
+ o++;
+ while ((*o >= 'A' && *o <= 'Z')
+ || (*o >= 'a' && *o <= 'z')
+ || (*o >= '0' && *o <= '9')
+ || (*o == '-'))
+ o++;
+ changed = true;
+ }
+ else
+ *n++ = *o++;
+ }
+ *n = '\0';
+
+ return changed;
+}
+
+/*
+ * qsort comparator for CollAliasData items
+ */
+static int
+cmpaliases(const void *a, const void *b)
+{
+ const CollAliasData *ca = (const CollAliasData *) a;
+ const CollAliasData *cb = (const CollAliasData *) b;
+
+ /* comparing localename is enough because other fields are derived */
+ return strcmp(ca->localename, cb->localename);
+}
+#endif /* READ_LOCALE_A_OUTPUT */
+
+
+#ifdef USE_ICU
+/*
+ * Get the ICU language tag for a locale name.
+ * The result is a palloc'd string.
+ */
+static char *
+get_icu_language_tag(const char *localename)
+{
+ char buf[ULOC_FULLNAME_CAPACITY];
+ UErrorCode status;
+
+ status = U_ZERO_ERROR;
+ uloc_toLanguageTag(localename, buf, sizeof(buf), true, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not convert locale name \"%s\" to language tag: %s",
+ localename, u_errorName(status))));
+
+ return pstrdup(buf);
+}
+
+/*
+ * Get a comment (specifically, the display name) for an ICU locale.
+ * The result is a palloc'd string, or NULL if we can't get a comment
+ * or find that it's not all ASCII. (We can *not* accept non-ASCII
+ * comments, because the contents of template0 must be encoding-agnostic.)
+ */
+static char *
+get_icu_locale_comment(const char *localename)
+{
+ UErrorCode status;
+ UChar displayname[128];
+ int32 len_uchar;
+ int32 i;
+ char *result;
+
+ status = U_ZERO_ERROR;
+ len_uchar = uloc_getDisplayName(localename, "en",
+ displayname, lengthof(displayname),
+ &status);
+ if (U_FAILURE(status))
+ return NULL; /* no good reason to raise an error */
+
+ /* Check for non-ASCII comment (can't use pg_is_ascii for this) */
+ for (i = 0; i < len_uchar; i++)
+ {
+ if (displayname[i] > 127)
+ return NULL;
+ }
+
+ /* OK, transcribe */
+ result = palloc(len_uchar + 1);
+ for (i = 0; i < len_uchar; i++)
+ result[i] = displayname[i];
+ result[len_uchar] = '\0';
+
+ return result;
+}
+#endif /* USE_ICU */
+
+
+/*
+ * pg_import_system_collations: add known system collations to pg_collation
+ */
+Datum
+pg_import_system_collations(PG_FUNCTION_ARGS)
+{
+ Oid nspid = PG_GETARG_OID(0);
+ int ncreated = 0;
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to import system collations")));
+
+ if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(nspid)))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_SCHEMA),
+ errmsg("schema with OID %u does not exist", nspid)));
+
+ /* Load collations known to libc, using "locale -a" to enumerate them */
+#ifdef READ_LOCALE_A_OUTPUT
+ {
+ FILE *locale_a_handle;
+ char localebuf[LOCALE_NAME_BUFLEN];
+ int nvalid = 0;
+ Oid collid;
+ CollAliasData *aliases;
+ int naliases,
+ maxaliases,
+ i;
+
+ /* expansible array of aliases */
+ maxaliases = 100;
+ aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData));
+ naliases = 0;
+
+ locale_a_handle = OpenPipeStream("locale -a", "r");
+ if (locale_a_handle == NULL)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not execute command \"%s\": %m",
+ "locale -a")));
+
+ while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
+ {
+ size_t len;
+ int enc;
+ char alias[LOCALE_NAME_BUFLEN];
+
+ len = strlen(localebuf);
+
+ if (len == 0 || localebuf[len - 1] != '\n')
+ {
+ elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
+ continue;
+ }
+ localebuf[len - 1] = '\0';
+
+ /*
+ * Some systems have locale names that don't consist entirely of
+ * ASCII letters (such as "bokm&aring;l" or "fran&ccedil;ais").
+ * This is pretty silly, since we need the locale itself to
+ * interpret the non-ASCII characters. We can't do much with
+ * those, so we filter them out.
+ */
+ if (!pg_is_ascii(localebuf))
+ {
+ elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", localebuf);
+ continue;
+ }
+
+ enc = pg_get_encoding_from_locale(localebuf, false);
+ if (enc < 0)
+ {
+ elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"",
+ localebuf);
+ continue;
+ }
+ if (!PG_VALID_BE_ENCODING(enc))
+ {
+ elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", localebuf);
+ continue;
+ }
+ if (enc == PG_SQL_ASCII)
+ continue; /* C/POSIX are already in the catalog */
+
+ /* count valid locales found in operating system */
+ nvalid++;
+
+ /*
+ * Create a collation named the same as the locale, but quietly
+ * doing nothing if it already exists. This is the behavior we
+ * need even at initdb time, because some versions of "locale -a"
+ * can report the same locale name more than once. And it's
+ * convenient for later import runs, too, since you just about
+ * always want to add on new locales without a lot of chatter
+ * about existing ones.
+ */
+ collid = CollationCreate(localebuf, nspid, GetUserId(),
+ COLLPROVIDER_LIBC, true, enc,
+ localebuf, localebuf, NULL,
+ get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
+ true, true);
+ if (OidIsValid(collid))
+ {
+ ncreated++;
+
+ /* Must do CCI between inserts to handle duplicates correctly */
+ CommandCounterIncrement();
+ }
+
+ /*
+ * Generate aliases such as "en_US" in addition to "en_US.utf8"
+ * for ease of use. Note that collation names are unique per
+ * encoding only, so this doesn't clash with "en_US" for LATIN1,
+ * say.
+ *
+ * However, it might conflict with a name we'll see later in the
+ * "locale -a" output. So save up the aliases and try to add them
+ * after we've read all the output.
+ */
+ if (normalize_libc_locale_name(alias, localebuf))
+ {
+ if (naliases >= maxaliases)
+ {
+ maxaliases *= 2;
+ aliases = (CollAliasData *)
+ repalloc(aliases, maxaliases * sizeof(CollAliasData));
+ }
+ aliases[naliases].localename = pstrdup(localebuf);
+ aliases[naliases].alias = pstrdup(alias);
+ aliases[naliases].enc = enc;
+ naliases++;
+ }
+ }
+
+ ClosePipeStream(locale_a_handle);
+
+ /*
+ * Before processing the aliases, sort them by locale name. The point
+ * here is that if "locale -a" gives us multiple locale names with the
+ * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we
+ * want to pick a deterministic one of them. First in ASCII sort
+ * order is a good enough rule. (Before PG 10, the code corresponding
+ * to this logic in initdb.c had an additional ordering rule, to
+ * prefer the locale name exactly matching the alias, if any. We
+ * don't need to consider that here, because we would have already
+ * created such a pg_collation entry above, and that one will win.)
+ */
+ if (naliases > 1)
+ qsort((void *) aliases, naliases, sizeof(CollAliasData), cmpaliases);
+
+ /* Now add aliases, ignoring any that match pre-existing entries */
+ for (i = 0; i < naliases; i++)
+ {
+ char *locale = aliases[i].localename;
+ char *alias = aliases[i].alias;
+ int enc = aliases[i].enc;
+
+ collid = CollationCreate(alias, nspid, GetUserId(),
+ COLLPROVIDER_LIBC, true, enc,
+ locale, locale, NULL,
+ get_collation_actual_version(COLLPROVIDER_LIBC, locale),
+ true, true);
+ if (OidIsValid(collid))
+ {
+ ncreated++;
+
+ CommandCounterIncrement();
+ }
+ }
+
+ /* Give a warning if "locale -a" seems to be malfunctioning */
+ if (nvalid == 0)
+ ereport(WARNING,
+ (errmsg("no usable system locales were found")));
+ }
+#endif /* READ_LOCALE_A_OUTPUT */
+
+ /*
+ * Load collations known to ICU
+ *
+ * We use uloc_countAvailable()/uloc_getAvailable() rather than
+ * ucol_countAvailable()/ucol_getAvailable(). The former returns a full
+ * set of language+region combinations, whereas the latter only returns
+ * language+region combinations if they are distinct from the language's
+ * base collation. So there might not be a de-DE or en-GB, which would be
+ * confusing.
+ */
+#ifdef USE_ICU
+ {
+ int i;
+
+ /*
+ * Start the loop at -1 to sneak in the root locale without too much
+ * code duplication.
+ */
+ for (i = -1; i < uloc_countAvailable(); i++)
+ {
+ const char *name;
+ char *langtag;
+ char *icucomment;
+ const char *iculocstr;
+ Oid collid;
+
+ if (i == -1)
+ name = ""; /* ICU root locale */
+ else
+ name = uloc_getAvailable(i);
+
+ langtag = get_icu_language_tag(name);
+ iculocstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
+
+ /*
+ * Be paranoid about not allowing any non-ASCII strings into
+ * pg_collation
+ */
+ if (!pg_is_ascii(langtag) || !pg_is_ascii(iculocstr))
+ continue;
+
+ collid = CollationCreate(psprintf("%s-x-icu", langtag),
+ nspid, GetUserId(),
+ COLLPROVIDER_ICU, true, -1,
+ NULL, NULL, iculocstr,
+ get_collation_actual_version(COLLPROVIDER_ICU, iculocstr),
+ true, true);
+ if (OidIsValid(collid))
+ {
+ ncreated++;
+
+ CommandCounterIncrement();
+
+ icucomment = get_icu_locale_comment(name);
+ if (icucomment)
+ CreateComments(collid, CollationRelationId, 0,
+ icucomment);
+ }
+ }
+ }
+#endif /* USE_ICU */
+
+ PG_RETURN_INT32(ncreated);
+}
diff --git a/src/backend/commands/comment.c b/src/backend/commands/comment.c
new file mode 100644
index 0000000..86985a9
--- /dev/null
+++ b/src/backend/commands/comment.c
@@ -0,0 +1,459 @@
+/*-------------------------------------------------------------------------
+ *
+ * comment.c
+ *
+ * PostgreSQL object comments utility code.
+ *
+ * Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/commands/comment.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/relation.h"
+#include "access/table.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaddress.h"
+#include "catalog/pg_description.h"
+#include "catalog/pg_shdescription.h"
+#include "commands/comment.h"
+#include "commands/dbcommands.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/rel.h"
+
+
+/*
+ * CommentObject --
+ *
+ * This routine is used to add the associated comment into
+ * pg_description for the object specified by the given SQL command.
+ */
+ObjectAddress
+CommentObject(CommentStmt *stmt)
+{
+ Relation relation;
+ ObjectAddress address = InvalidObjectAddress;
+
+ /*
+ * When loading a dump, we may see a COMMENT ON DATABASE for the old name
+ * of the database. Erroring out would prevent pg_restore from completing
+ * (which is really pg_restore's fault, but for now we will work around
+ * the problem here). Consensus is that the best fix is to treat wrong
+ * database name as a WARNING not an ERROR; hence, the following special
+ * case.
+ */
+ if (stmt->objtype == OBJECT_DATABASE)
+ {
+ char *database = strVal(stmt->object);
+
+ if (!OidIsValid(get_database_oid(database, true)))
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_UNDEFINED_DATABASE),
+ errmsg("database \"%s\" does not exist", database)));
+ return address;
+ }
+ }
+
+ /*
+ * Translate the parser representation that identifies this object into an
+ * ObjectAddress. get_object_address() will throw an error if the object
+ * does not exist, and will also acquire a lock on the target to guard
+ * against concurrent DROP operations.
+ */
+ address = get_object_address(stmt->objtype, stmt->object,
+ &relation, ShareUpdateExclusiveLock, false);
+
+ /* Require ownership of the target object. */
+ check_object_ownership(GetUserId(), stmt->objtype, address,
+ stmt->object, relation);
+
+ /* Perform other integrity checks as needed. */
+ switch (stmt->objtype)
+ {
+ case OBJECT_COLUMN:
+
+ /*
+ * Allow comments only on columns of tables, views, materialized
+ * views, composite types, and foreign tables (which are the only
+ * relkinds for which pg_dump will dump per-column comments). In
+ * particular we wish to disallow comments on index columns,
+ * because the naming of an index's columns may change across PG
+ * versions, so dumping per-column comments could create reload
+ * failures.
+ */
+ if (relation->rd_rel->relkind != RELKIND_RELATION &&
+ relation->rd_rel->relkind != RELKIND_VIEW &&
+ relation->rd_rel->relkind != RELKIND_MATVIEW &&
+ relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE &&
+ relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+ relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot set comment on relation \"%s\"",
+ RelationGetRelationName(relation)),
+ errdetail_relkind_not_supported(relation->rd_rel->relkind)));
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * Databases, tablespaces, and roles are cluster-wide objects, so any
+ * comments on those objects are recorded in the shared pg_shdescription
+ * catalog. Comments on all other objects are recorded in pg_description.
+ */
+ if (stmt->objtype == OBJECT_DATABASE || stmt->objtype == OBJECT_TABLESPACE
+ || stmt->objtype == OBJECT_ROLE)
+ CreateSharedComments(address.objectId, address.classId, stmt->comment);
+ else
+ CreateComments(address.objectId, address.classId, address.objectSubId,
+ stmt->comment);
+
+ /*
+ * If get_object_address() opened the relation for us, we close it to keep
+ * the reference count correct - but we retain any locks acquired by
+ * get_object_address() until commit time, to guard against concurrent
+ * activity.
+ */
+ if (relation != NULL)
+ relation_close(relation, NoLock);
+
+ return address;
+}
+
+/*
+ * CreateComments --
+ *
+ * Create a comment for the specified object descriptor. Inserts a new
+ * pg_description tuple, or replaces an existing one with the same key.
+ *
+ * If the comment given is null or an empty string, instead delete any
+ * existing comment for the specified key.
+ */
+void
+CreateComments(Oid oid, Oid classoid, int32 subid, const char *comment)
+{
+ Relation description;
+ ScanKeyData skey[3];
+ SysScanDesc sd;
+ HeapTuple oldtuple;
+ HeapTuple newtuple = NULL;
+ Datum values[Natts_pg_description];
+ bool nulls[Natts_pg_description];
+ bool replaces[Natts_pg_description];
+ int i;
+
+ /* Reduce empty-string to NULL case */
+ if (comment != NULL && strlen(comment) == 0)
+ comment = NULL;
+
+ /* Prepare to form or update a tuple, if necessary */
+ if (comment != NULL)
+ {
+ for (i = 0; i < Natts_pg_description; i++)
+ {
+ nulls[i] = false;
+ replaces[i] = true;
+ }
+ values[Anum_pg_description_objoid - 1] = ObjectIdGetDatum(oid);
+ values[Anum_pg_description_classoid - 1] = ObjectIdGetDatum(classoid);
+ values[Anum_pg_description_objsubid - 1] = Int32GetDatum(subid);
+ values[Anum_pg_description_description - 1] = CStringGetTextDatum(comment);
+ }
+
+ /* Use the index to search for a matching old tuple */
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_description_objoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(oid));
+ ScanKeyInit(&skey[1],
+ Anum_pg_description_classoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(classoid));
+ ScanKeyInit(&skey[2],
+ Anum_pg_description_objsubid,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(subid));
+
+ description = table_open(DescriptionRelationId, RowExclusiveLock);
+
+ sd = systable_beginscan(description, DescriptionObjIndexId, true,
+ NULL, 3, skey);
+
+ while ((oldtuple = systable_getnext(sd)) != NULL)
+ {
+ /* Found the old tuple, so delete or update it */
+
+ if (comment == NULL)
+ CatalogTupleDelete(description, &oldtuple->t_self);
+ else
+ {
+ newtuple = heap_modify_tuple(oldtuple, RelationGetDescr(description), values,
+ nulls, replaces);
+ CatalogTupleUpdate(description, &oldtuple->t_self, newtuple);
+ }
+
+ break; /* Assume there can be only one match */
+ }
+
+ systable_endscan(sd);
+
+ /* If we didn't find an old tuple, insert a new one */
+
+ if (newtuple == NULL && comment != NULL)
+ {
+ newtuple = heap_form_tuple(RelationGetDescr(description),
+ values, nulls);
+ CatalogTupleInsert(description, newtuple);
+ }
+
+ if (newtuple != NULL)
+ heap_freetuple(newtuple);
+
+ /* Done */
+
+ table_close(description, NoLock);
+}
+
+/*
+ * CreateSharedComments --
+ *
+ * Create a comment for the specified shared object descriptor. Inserts a
+ * new pg_shdescription tuple, or replaces an existing one with the same key.
+ *
+ * If the comment given is null or an empty string, instead delete any
+ * existing comment for the specified key.
+ */
+void
+CreateSharedComments(Oid oid, Oid classoid, const char *comment)
+{
+ Relation shdescription;
+ ScanKeyData skey[2];
+ SysScanDesc sd;
+ HeapTuple oldtuple;
+ HeapTuple newtuple = NULL;
+ Datum values[Natts_pg_shdescription];
+ bool nulls[Natts_pg_shdescription];
+ bool replaces[Natts_pg_shdescription];
+ int i;
+
+ /* Reduce empty-string to NULL case */
+ if (comment != NULL && strlen(comment) == 0)
+ comment = NULL;
+
+ /* Prepare to form or update a tuple, if necessary */
+ if (comment != NULL)
+ {
+ for (i = 0; i < Natts_pg_shdescription; i++)
+ {
+ nulls[i] = false;
+ replaces[i] = true;
+ }
+ values[Anum_pg_shdescription_objoid - 1] = ObjectIdGetDatum(oid);
+ values[Anum_pg_shdescription_classoid - 1] = ObjectIdGetDatum(classoid);
+ values[Anum_pg_shdescription_description - 1] = CStringGetTextDatum(comment);
+ }
+
+ /* Use the index to search for a matching old tuple */
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_shdescription_objoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(oid));
+ ScanKeyInit(&skey[1],
+ Anum_pg_shdescription_classoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(classoid));
+
+ shdescription = table_open(SharedDescriptionRelationId, RowExclusiveLock);
+
+ sd = systable_beginscan(shdescription, SharedDescriptionObjIndexId, true,
+ NULL, 2, skey);
+
+ while ((oldtuple = systable_getnext(sd)) != NULL)
+ {
+ /* Found the old tuple, so delete or update it */
+
+ if (comment == NULL)
+ CatalogTupleDelete(shdescription, &oldtuple->t_self);
+ else
+ {
+ newtuple = heap_modify_tuple(oldtuple, RelationGetDescr(shdescription),
+ values, nulls, replaces);
+ CatalogTupleUpdate(shdescription, &oldtuple->t_self, newtuple);
+ }
+
+ break; /* Assume there can be only one match */
+ }
+
+ systable_endscan(sd);
+
+ /* If we didn't find an old tuple, insert a new one */
+
+ if (newtuple == NULL && comment != NULL)
+ {
+ newtuple = heap_form_tuple(RelationGetDescr(shdescription),
+ values, nulls);
+ CatalogTupleInsert(shdescription, newtuple);
+ }
+
+ if (newtuple != NULL)
+ heap_freetuple(newtuple);
+
+ /* Done */
+
+ table_close(shdescription, NoLock);
+}
+
+/*
+ * DeleteComments -- remove comments for an object
+ *
+ * If subid is nonzero then only comments matching it will be removed.
+ * If subid is zero, all comments matching the oid/classoid will be removed
+ * (this corresponds to deleting a whole object).
+ */
+void
+DeleteComments(Oid oid, Oid classoid, int32 subid)
+{
+ Relation description;
+ ScanKeyData skey[3];
+ int nkeys;
+ SysScanDesc sd;
+ HeapTuple oldtuple;
+
+ /* Use the index to search for all matching old tuples */
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_description_objoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(oid));
+ ScanKeyInit(&skey[1],
+ Anum_pg_description_classoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(classoid));
+
+ if (subid != 0)
+ {
+ ScanKeyInit(&skey[2],
+ Anum_pg_description_objsubid,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(subid));
+ nkeys = 3;
+ }
+ else
+ nkeys = 2;
+
+ description = table_open(DescriptionRelationId, RowExclusiveLock);
+
+ sd = systable_beginscan(description, DescriptionObjIndexId, true,
+ NULL, nkeys, skey);
+
+ while ((oldtuple = systable_getnext(sd)) != NULL)
+ CatalogTupleDelete(description, &oldtuple->t_self);
+
+ /* Done */
+
+ systable_endscan(sd);
+ table_close(description, RowExclusiveLock);
+}
+
+/*
+ * DeleteSharedComments -- remove comments for a shared object
+ */
+void
+DeleteSharedComments(Oid oid, Oid classoid)
+{
+ Relation shdescription;
+ ScanKeyData skey[2];
+ SysScanDesc sd;
+ HeapTuple oldtuple;
+
+ /* Use the index to search for all matching old tuples */
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_shdescription_objoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(oid));
+ ScanKeyInit(&skey[1],
+ Anum_pg_shdescription_classoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(classoid));
+
+ shdescription = table_open(SharedDescriptionRelationId, RowExclusiveLock);
+
+ sd = systable_beginscan(shdescription, SharedDescriptionObjIndexId, true,
+ NULL, 2, skey);
+
+ while ((oldtuple = systable_getnext(sd)) != NULL)
+ CatalogTupleDelete(shdescription, &oldtuple->t_self);
+
+ /* Done */
+
+ systable_endscan(sd);
+ table_close(shdescription, RowExclusiveLock);
+}
+
+/*
+ * GetComment -- get the comment for an object, or null if not found.
+ */
+char *
+GetComment(Oid oid, Oid classoid, int32 subid)
+{
+ Relation description;
+ ScanKeyData skey[3];
+ SysScanDesc sd;
+ TupleDesc tupdesc;
+ HeapTuple tuple;
+ char *comment;
+
+ /* Use the index to search for a matching old tuple */
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_description_objoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(oid));
+ ScanKeyInit(&skey[1],
+ Anum_pg_description_classoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(classoid));
+ ScanKeyInit(&skey[2],
+ Anum_pg_description_objsubid,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(subid));
+
+ description = table_open(DescriptionRelationId, AccessShareLock);
+ tupdesc = RelationGetDescr(description);
+
+ sd = systable_beginscan(description, DescriptionObjIndexId, true,
+ NULL, 3, skey);
+
+ comment = NULL;
+ while ((tuple = systable_getnext(sd)) != NULL)
+ {
+ Datum value;
+ bool isnull;
+
+ /* Found the tuple, get description field */
+ value = heap_getattr(tuple, Anum_pg_description_description, tupdesc, &isnull);
+ if (!isnull)
+ comment = TextDatumGetCString(value);
+ break; /* Assume there can be only one match */
+ }
+
+ systable_endscan(sd);
+
+ /* Done */
+ table_close(description, AccessShareLock);
+
+ return comment;
+}
diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c
new file mode 100644
index 0000000..721de17
--- /dev/null
+++ b/src/backend/commands/constraint.c
@@ -0,0 +1,205 @@
+/*-------------------------------------------------------------------------
+ *
+ * constraint.c
+ * PostgreSQL CONSTRAINT support code.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/commands/constraint.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/tableam.h"
+#include "catalog/index.h"
+#include "commands/trigger.h"
+#include "executor/executor.h"
+#include "utils/builtins.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+
+
+/*
+ * unique_key_recheck - trigger function to do a deferred uniqueness check.
+ *
+ * This now also does deferred exclusion-constraint checks, so the name is
+ * somewhat historical.
+ *
+ * This is invoked as an AFTER ROW trigger for both INSERT and UPDATE,
+ * for any rows recorded as potentially violating a deferrable unique
+ * or exclusion constraint.
+ *
+ * This may be an end-of-statement check, a commit-time check, or a
+ * check triggered by a SET CONSTRAINTS command.
+ */
+Datum
+unique_key_recheck(PG_FUNCTION_ARGS)
+{
+ TriggerData *trigdata = (TriggerData *) fcinfo->context;
+ const char *funcname = "unique_key_recheck";
+ ItemPointerData checktid;
+ ItemPointerData tmptid;
+ Relation indexRel;
+ IndexInfo *indexInfo;
+ EState *estate;
+ ExprContext *econtext;
+ TupleTableSlot *slot;
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+
+ /*
+ * Make sure this is being called as an AFTER ROW trigger. Note:
+ * translatable error strings are shared with ri_triggers.c, so resist the
+ * temptation to fold the function name into them.
+ */
+ if (!CALLED_AS_TRIGGER(fcinfo))
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("function \"%s\" was not called by trigger manager",
+ funcname)));
+
+ if (!TRIGGER_FIRED_AFTER(trigdata->tg_event) ||
+ !TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("function \"%s\" must be fired AFTER ROW",
+ funcname)));
+
+ /*
+ * Get the new data that was inserted/updated.
+ */
+ if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+ checktid = trigdata->tg_trigslot->tts_tid;
+ else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+ checktid = trigdata->tg_newslot->tts_tid;
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("function \"%s\" must be fired for INSERT or UPDATE",
+ funcname)));
+ ItemPointerSetInvalid(&checktid); /* keep compiler quiet */
+ }
+
+ slot = table_slot_create(trigdata->tg_relation, NULL);
+
+ /*
+ * If the row pointed at by checktid is now dead (ie, inserted and then
+ * deleted within our transaction), we can skip the check. However, we
+ * have to be careful, because this trigger gets queued only in response
+ * to index insertions; which means it does not get queued e.g. for HOT
+ * updates. The row we are called for might now be dead, but have a live
+ * HOT child, in which case we still need to make the check ---
+ * effectively, we're applying the check against the live child row,
+ * although we can use the values from this row since by definition all
+ * columns of interest to us are the same.
+ *
+ * This might look like just an optimization, because the index AM will
+ * make this identical test before throwing an error. But it's actually
+ * needed for correctness, because the index AM will also throw an error
+ * if it doesn't find the index entry for the row. If the row's dead then
+ * it's possible the index entry has also been marked dead, and even
+ * removed.
+ */
+ tmptid = checktid;
+ {
+ IndexFetchTableData *scan = table_index_fetch_begin(trigdata->tg_relation);
+ bool call_again = false;
+
+ if (!table_index_fetch_tuple(scan, &tmptid, SnapshotSelf, slot,
+ &call_again, NULL))
+ {
+ /*
+ * All rows referenced by the index entry are dead, so skip the
+ * check.
+ */
+ ExecDropSingleTupleTableSlot(slot);
+ table_index_fetch_end(scan);
+ return PointerGetDatum(NULL);
+ }
+ table_index_fetch_end(scan);
+ }
+
+ /*
+ * Open the index, acquiring a RowExclusiveLock, just as if we were going
+ * to update it. (This protects against possible changes of the index
+ * schema, not against concurrent updates.)
+ */
+ indexRel = index_open(trigdata->tg_trigger->tgconstrindid,
+ RowExclusiveLock);
+ indexInfo = BuildIndexInfo(indexRel);
+
+ /*
+ * Typically the index won't have expressions, but if it does we need an
+ * EState to evaluate them. We need it for exclusion constraints too,
+ * even if they are just on simple columns.
+ */
+ if (indexInfo->ii_Expressions != NIL ||
+ indexInfo->ii_ExclusionOps != NULL)
+ {
+ estate = CreateExecutorState();
+ econtext = GetPerTupleExprContext(estate);
+ econtext->ecxt_scantuple = slot;
+ }
+ else
+ estate = NULL;
+
+ /*
+ * Form the index values and isnull flags for the index entry that we need
+ * to check.
+ *
+ * Note: if the index uses functions that are not as immutable as they are
+ * supposed to be, this could produce an index tuple different from the
+ * original. The index AM can catch such errors by verifying that it
+ * finds a matching index entry with the tuple's TID. For exclusion
+ * constraints we check this in check_exclusion_constraint().
+ */
+ FormIndexDatum(indexInfo, slot, estate, values, isnull);
+
+ /*
+ * Now do the appropriate check.
+ */
+ if (indexInfo->ii_ExclusionOps == NULL)
+ {
+ /*
+ * Note: this is not a real insert; it is a check that the index entry
+ * that has already been inserted is unique. Passing the tuple's tid
+ * (i.e. unmodified by table_index_fetch_tuple()) is correct even if
+ * the row is now dead, because that is the TID the index will know
+ * about.
+ */
+ index_insert(indexRel, values, isnull, &checktid,
+ trigdata->tg_relation, UNIQUE_CHECK_EXISTING,
+ false, indexInfo);
+ }
+ else
+ {
+ /*
+ * For exclusion constraints we just do the normal check, but now it's
+ * okay to throw error. In the HOT-update case, we must use the live
+ * HOT child's TID here, else check_exclusion_constraint will think
+ * the child is a conflict.
+ */
+ check_exclusion_constraint(trigdata->tg_relation, indexRel, indexInfo,
+ &tmptid, values, isnull,
+ estate, false);
+ }
+
+ /*
+ * If that worked, then this index entry is unique or non-excluded, and we
+ * are done.
+ */
+ if (estate != NULL)
+ FreeExecutorState(estate);
+
+ ExecDropSingleTupleTableSlot(slot);
+
+ index_close(indexRel, RowExclusiveLock);
+
+ return PointerGetDatum(NULL);
+}
diff --git a/src/backend/commands/conversioncmds.c b/src/backend/commands/conversioncmds.c
new file mode 100644
index 0000000..67feda3
--- /dev/null
+++ b/src/backend/commands/conversioncmds.c
@@ -0,0 +1,139 @@
+/*-------------------------------------------------------------------------
+ *
+ * conversioncmds.c
+ * conversion creation command support code
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/conversioncmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_conversion.h"
+#include "catalog/pg_type.h"
+#include "commands/alter.h"
+#include "commands/conversioncmds.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+/*
+ * CREATE CONVERSION
+ */
+ObjectAddress
+CreateConversionCommand(CreateConversionStmt *stmt)
+{
+ Oid namespaceId;
+ char *conversion_name;
+ AclResult aclresult;
+ int from_encoding;
+ int to_encoding;
+ Oid funcoid;
+ const char *from_encoding_name = stmt->for_encoding_name;
+ const char *to_encoding_name = stmt->to_encoding_name;
+ List *func_name = stmt->func_name;
+ static const Oid funcargs[] = {INT4OID, INT4OID, CSTRINGOID, INTERNALOID, INT4OID, BOOLOID};
+ char result[1];
+ Datum funcresult;
+
+ /* Convert list of names to a name and namespace */
+ namespaceId = QualifiedNameGetCreationNamespace(stmt->conversion_name,
+ &conversion_name);
+
+ /* Check we have creation rights in target namespace */
+ aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(namespaceId));
+
+ /* Check the encoding names */
+ from_encoding = pg_char_to_encoding(from_encoding_name);
+ if (from_encoding < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("source encoding \"%s\" does not exist",
+ from_encoding_name)));
+
+ to_encoding = pg_char_to_encoding(to_encoding_name);
+ if (to_encoding < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("destination encoding \"%s\" does not exist",
+ to_encoding_name)));
+
+ /*
+ * We consider conversions to or from SQL_ASCII to be meaningless. (If
+ * you wish to change this, note that pg_do_encoding_conversion() and its
+ * sister functions have hard-wired fast paths for any conversion in which
+ * the source or target encoding is SQL_ASCII, so that an encoding
+ * conversion function declared for such a case will never be used.)
+ */
+ if (from_encoding == PG_SQL_ASCII || to_encoding == PG_SQL_ASCII)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("encoding conversion to or from \"SQL_ASCII\" is not supported")));
+
+ /*
+ * Check the existence of the conversion function. Function name could be
+ * a qualified name.
+ */
+ funcoid = LookupFuncName(func_name, sizeof(funcargs) / sizeof(Oid),
+ funcargs, false);
+
+ /* Check it returns int4, else it's probably the wrong function */
+ if (get_func_rettype(funcoid) != INT4OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("encoding conversion function %s must return type %s",
+ NameListToString(func_name), "integer")));
+
+ /* Check we have EXECUTE rights for the function */
+ aclresult = pg_proc_aclcheck(funcoid, GetUserId(), ACL_EXECUTE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_FUNCTION,
+ NameListToString(func_name));
+
+ /*
+ * Check that the conversion function is suitable for the requested source
+ * and target encodings. We do that by calling the function with an empty
+ * string; the conversion function should throw an error if it can't
+ * perform the requested conversion.
+ */
+ funcresult = OidFunctionCall6(funcoid,
+ Int32GetDatum(from_encoding),
+ Int32GetDatum(to_encoding),
+ CStringGetDatum(""),
+ CStringGetDatum(result),
+ Int32GetDatum(0),
+ BoolGetDatum(false));
+
+ /*
+ * The function should return 0 for empty input. Might as well check that,
+ * too.
+ */
+ if (DatumGetInt32(funcresult) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("encoding conversion function %s returned incorrect result for empty input",
+ NameListToString(func_name))));
+
+ /*
+ * All seem ok, go ahead (possible failure would be a duplicate conversion
+ * name)
+ */
+ return ConversionCreate(conversion_name, namespaceId, GetUserId(),
+ from_encoding, to_encoding, funcoid, stmt->def);
+}
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
new file mode 100644
index 0000000..cc1909e
--- /dev/null
+++ b/src/backend/commands/copy.c
@@ -0,0 +1,798 @@
+/*-------------------------------------------------------------------------
+ *
+ * copy.c
+ * Implements the COPY utility command
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/copy.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/pg_authid.h"
+#include "commands/copy.h"
+#include "commands/defrem.h"
+#include "executor/executor.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "optimizer/optimizer.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_relation.h"
+#include "rewrite/rewriteHandler.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/rls.h"
+
+/*
+ * DoCopy executes the SQL COPY statement
+ *
+ * Either unload or reload contents of table <relation>, depending on <from>.
+ * (<from> = true means we are inserting into the table.) In the "TO" case
+ * we also support copying the output of an arbitrary SELECT, INSERT, UPDATE
+ * or DELETE query.
+ *
+ * If <pipe> is false, transfer is between the table and the file named
+ * <filename>. Otherwise, transfer is between the table and our regular
+ * input/output stream. The latter could be either stdin/stdout or a
+ * socket, depending on whether we're running under Postmaster control.
+ *
+ * Do not allow a Postgres user without the 'pg_read_server_files' or
+ * 'pg_write_server_files' role to read from or write to a file.
+ *
+ * Do not allow the copy if user doesn't have proper permission to access
+ * the table or the specifically requested columns.
+ */
+void
+DoCopy(ParseState *pstate, const CopyStmt *stmt,
+ int stmt_location, int stmt_len,
+ uint64 *processed)
+{
+ bool is_from = stmt->is_from;
+ bool pipe = (stmt->filename == NULL);
+ Relation rel;
+ Oid relid;
+ RawStmt *query = NULL;
+ Node *whereClause = NULL;
+
+ /*
+ * Disallow COPY to/from file or program except to users with the
+ * appropriate role.
+ */
+ if (!pipe)
+ {
+ if (stmt->is_program)
+ {
+ if (!has_privs_of_role(GetUserId(), ROLE_PG_EXECUTE_SERVER_PROGRAM))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser or have privileges of the pg_execute_server_program role to COPY to or from an external program"),
+ errhint("Anyone can COPY to stdout or from stdin. "
+ "psql's \\copy command also works for anyone.")));
+ }
+ else
+ {
+ if (is_from && !has_privs_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser or have privileges of the pg_read_server_files role to COPY from a file"),
+ errhint("Anyone can COPY to stdout or from stdin. "
+ "psql's \\copy command also works for anyone.")));
+
+ if (!is_from && !has_privs_of_role(GetUserId(), ROLE_PG_WRITE_SERVER_FILES))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser or have privileges of the pg_write_server_files role to COPY to a file"),
+ errhint("Anyone can COPY to stdout or from stdin. "
+ "psql's \\copy command also works for anyone.")));
+ }
+ }
+
+ if (stmt->relation)
+ {
+ LOCKMODE lockmode = is_from ? RowExclusiveLock : AccessShareLock;
+ ParseNamespaceItem *nsitem;
+ RangeTblEntry *rte;
+ TupleDesc tupDesc;
+ List *attnums;
+ ListCell *cur;
+
+ Assert(!stmt->query);
+
+ /* Open and lock the relation, using the appropriate lock type. */
+ rel = table_openrv(stmt->relation, lockmode);
+
+ relid = RelationGetRelid(rel);
+
+ nsitem = addRangeTableEntryForRelation(pstate, rel, lockmode,
+ NULL, false, false);
+ rte = nsitem->p_rte;
+ rte->requiredPerms = (is_from ? ACL_INSERT : ACL_SELECT);
+
+ if (stmt->whereClause)
+ {
+ /* add nsitem to query namespace */
+ addNSItemToQuery(pstate, nsitem, false, true, true);
+
+ /* Transform the raw expression tree */
+ whereClause = transformExpr(pstate, stmt->whereClause, EXPR_KIND_COPY_WHERE);
+
+ /* Make sure it yields a boolean result. */
+ whereClause = coerce_to_boolean(pstate, whereClause, "WHERE");
+
+ /* we have to fix its collations too */
+ assign_expr_collations(pstate, whereClause);
+
+ whereClause = eval_const_expressions(NULL, whereClause);
+
+ whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false);
+ whereClause = (Node *) make_ands_implicit((Expr *) whereClause);
+ }
+
+ tupDesc = RelationGetDescr(rel);
+ attnums = CopyGetAttnums(tupDesc, rel, stmt->attlist);
+ foreach(cur, attnums)
+ {
+ int attno = lfirst_int(cur) -
+ FirstLowInvalidHeapAttributeNumber;
+
+ if (is_from)
+ rte->insertedCols = bms_add_member(rte->insertedCols, attno);
+ else
+ rte->selectedCols = bms_add_member(rte->selectedCols, attno);
+ }
+ ExecCheckRTPerms(pstate->p_rtable, true);
+
+ /*
+ * Permission check for row security policies.
+ *
+ * check_enable_rls will ereport(ERROR) if the user has requested
+ * something invalid and will otherwise indicate if we should enable
+ * RLS (returns RLS_ENABLED) or not for this COPY statement.
+ *
+ * If the relation has a row security policy and we are to apply it
+ * then perform a "query" copy and allow the normal query processing
+ * to handle the policies.
+ *
+ * If RLS is not enabled for this, then just fall through to the
+ * normal non-filtering relation handling.
+ */
+ if (check_enable_rls(rte->relid, InvalidOid, false) == RLS_ENABLED)
+ {
+ SelectStmt *select;
+ ColumnRef *cr;
+ ResTarget *target;
+ RangeVar *from;
+ List *targetList = NIL;
+
+ if (is_from)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY FROM not supported with row-level security"),
+ errhint("Use INSERT statements instead.")));
+
+ /*
+ * Build target list
+ *
+ * If no columns are specified in the attribute list of the COPY
+ * command, then the target list is 'all' columns. Therefore, '*'
+ * should be used as the target list for the resulting SELECT
+ * statement.
+ *
+ * In the case that columns are specified in the attribute list,
+ * create a ColumnRef and ResTarget for each column and add them
+ * to the target list for the resulting SELECT statement.
+ */
+ if (!stmt->attlist)
+ {
+ cr = makeNode(ColumnRef);
+ cr->fields = list_make1(makeNode(A_Star));
+ cr->location = -1;
+
+ target = makeNode(ResTarget);
+ target->name = NULL;
+ target->indirection = NIL;
+ target->val = (Node *) cr;
+ target->location = -1;
+
+ targetList = list_make1(target);
+ }
+ else
+ {
+ ListCell *lc;
+
+ foreach(lc, stmt->attlist)
+ {
+ /*
+ * Build the ColumnRef for each column. The ColumnRef
+ * 'fields' property is a String node that corresponds to
+ * the column name respectively.
+ */
+ cr = makeNode(ColumnRef);
+ cr->fields = list_make1(lfirst(lc));
+ cr->location = -1;
+
+ /* Build the ResTarget and add the ColumnRef to it. */
+ target = makeNode(ResTarget);
+ target->name = NULL;
+ target->indirection = NIL;
+ target->val = (Node *) cr;
+ target->location = -1;
+
+ /* Add each column to the SELECT statement's target list */
+ targetList = lappend(targetList, target);
+ }
+ }
+
+ /*
+ * Build RangeVar for from clause, fully qualified based on the
+ * relation which we have opened and locked. Use "ONLY" so that
+ * COPY retrieves rows from only the target table not any
+ * inheritance children, the same as when RLS doesn't apply.
+ */
+ from = makeRangeVar(get_namespace_name(RelationGetNamespace(rel)),
+ pstrdup(RelationGetRelationName(rel)),
+ -1);
+ from->inh = false; /* apply ONLY */
+
+ /* Build query */
+ select = makeNode(SelectStmt);
+ select->targetList = targetList;
+ select->fromClause = list_make1(from);
+
+ query = makeNode(RawStmt);
+ query->stmt = (Node *) select;
+ query->stmt_location = stmt_location;
+ query->stmt_len = stmt_len;
+
+ /*
+ * Close the relation for now, but keep the lock on it to prevent
+ * changes between now and when we start the query-based COPY.
+ *
+ * We'll reopen it later as part of the query-based COPY.
+ */
+ table_close(rel, NoLock);
+ rel = NULL;
+ }
+ }
+ else
+ {
+ Assert(stmt->query);
+
+ /* MERGE is allowed by parser, but unimplemented. Reject for now */
+ if (IsA(stmt->query, MergeStmt))
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("MERGE not supported in COPY"));
+
+ query = makeNode(RawStmt);
+ query->stmt = stmt->query;
+ query->stmt_location = stmt_location;
+ query->stmt_len = stmt_len;
+
+ relid = InvalidOid;
+ rel = NULL;
+ }
+
+ if (is_from)
+ {
+ CopyFromState cstate;
+
+ Assert(rel);
+
+ /* check read-only transaction and parallel mode */
+ if (XactReadOnly && !rel->rd_islocaltemp)
+ PreventCommandIfReadOnly("COPY FROM");
+
+ cstate = BeginCopyFrom(pstate, rel, whereClause,
+ stmt->filename, stmt->is_program,
+ NULL, stmt->attlist, stmt->options);
+ *processed = CopyFrom(cstate); /* copy from file to database */
+ EndCopyFrom(cstate);
+ }
+ else
+ {
+ CopyToState cstate;
+
+ cstate = BeginCopyTo(pstate, rel, query, relid,
+ stmt->filename, stmt->is_program,
+ stmt->attlist, stmt->options);
+ *processed = DoCopyTo(cstate); /* copy from database to file */
+ EndCopyTo(cstate);
+ }
+
+ if (rel != NULL)
+ table_close(rel, NoLock);
+}
+
+/*
+ * Extract a CopyHeaderChoice value from a DefElem. This is like
+ * defGetBoolean() but also accepts the special value "match".
+ */
+static CopyHeaderChoice
+defGetCopyHeaderChoice(DefElem *def, bool is_from)
+{
+ /*
+ * If no parameter given, assume "true" is meant.
+ */
+ if (def->arg == NULL)
+ return COPY_HEADER_TRUE;
+
+ /*
+ * Allow 0, 1, "true", "false", "on", "off", or "match".
+ */
+ switch (nodeTag(def->arg))
+ {
+ case T_Integer:
+ switch (intVal(def->arg))
+ {
+ case 0:
+ return COPY_HEADER_FALSE;
+ case 1:
+ return COPY_HEADER_TRUE;
+ default:
+ /* otherwise, error out below */
+ break;
+ }
+ break;
+ default:
+ {
+ char *sval = defGetString(def);
+
+ /*
+ * The set of strings accepted here should match up with the
+ * grammar's opt_boolean_or_string production.
+ */
+ if (pg_strcasecmp(sval, "true") == 0)
+ return COPY_HEADER_TRUE;
+ if (pg_strcasecmp(sval, "false") == 0)
+ return COPY_HEADER_FALSE;
+ if (pg_strcasecmp(sval, "on") == 0)
+ return COPY_HEADER_TRUE;
+ if (pg_strcasecmp(sval, "off") == 0)
+ return COPY_HEADER_FALSE;
+ if (pg_strcasecmp(sval, "match") == 0)
+ {
+ if (!is_from)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot use \"%s\" with HEADER in COPY TO",
+ sval)));
+ return COPY_HEADER_MATCH;
+ }
+ }
+ break;
+ }
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires a Boolean value or \"match\"",
+ def->defname)));
+ return COPY_HEADER_FALSE; /* keep compiler quiet */
+}
+
+/*
+ * Process the statement option list for COPY.
+ *
+ * Scan the options list (a list of DefElem) and transpose the information
+ * into *opts_out, applying appropriate error checking.
+ *
+ * If 'opts_out' is not NULL, it is assumed to be filled with zeroes initially.
+ *
+ * This is exported so that external users of the COPY API can sanity-check
+ * a list of options. In that usage, 'opts_out' can be passed as NULL and
+ * the collected data is just leaked until CurrentMemoryContext is reset.
+ *
+ * Note that additional checking, such as whether column names listed in FORCE
+ * QUOTE actually exist, has to be applied later. This just checks for
+ * self-consistency of the options list.
+ */
+void
+ProcessCopyOptions(ParseState *pstate,
+ CopyFormatOptions *opts_out,
+ bool is_from,
+ List *options)
+{
+ bool format_specified = false;
+ bool freeze_specified = false;
+ bool header_specified = false;
+ ListCell *option;
+
+ /* Support external use for option sanity checking */
+ if (opts_out == NULL)
+ opts_out = (CopyFormatOptions *) palloc0(sizeof(CopyFormatOptions));
+
+ opts_out->file_encoding = -1;
+
+ /* Extract options from the statement node tree */
+ foreach(option, options)
+ {
+ DefElem *defel = lfirst_node(DefElem, option);
+
+ if (strcmp(defel->defname, "format") == 0)
+ {
+ char *fmt = defGetString(defel);
+
+ if (format_specified)
+ errorConflictingDefElem(defel, pstate);
+ format_specified = true;
+ if (strcmp(fmt, "text") == 0)
+ /* default format */ ;
+ else if (strcmp(fmt, "csv") == 0)
+ opts_out->csv_mode = true;
+ else if (strcmp(fmt, "binary") == 0)
+ opts_out->binary = true;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("COPY format \"%s\" not recognized", fmt),
+ parser_errposition(pstate, defel->location)));
+ }
+ else if (strcmp(defel->defname, "freeze") == 0)
+ {
+ if (freeze_specified)
+ errorConflictingDefElem(defel, pstate);
+ freeze_specified = true;
+ opts_out->freeze = defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "delimiter") == 0)
+ {
+ if (opts_out->delim)
+ errorConflictingDefElem(defel, pstate);
+ opts_out->delim = defGetString(defel);
+ }
+ else if (strcmp(defel->defname, "null") == 0)
+ {
+ if (opts_out->null_print)
+ errorConflictingDefElem(defel, pstate);
+ opts_out->null_print = defGetString(defel);
+ }
+ else if (strcmp(defel->defname, "header") == 0)
+ {
+ if (header_specified)
+ errorConflictingDefElem(defel, pstate);
+ header_specified = true;
+ opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
+ }
+ else if (strcmp(defel->defname, "quote") == 0)
+ {
+ if (opts_out->quote)
+ errorConflictingDefElem(defel, pstate);
+ opts_out->quote = defGetString(defel);
+ }
+ else if (strcmp(defel->defname, "escape") == 0)
+ {
+ if (opts_out->escape)
+ errorConflictingDefElem(defel, pstate);
+ opts_out->escape = defGetString(defel);
+ }
+ else if (strcmp(defel->defname, "force_quote") == 0)
+ {
+ if (opts_out->force_quote || opts_out->force_quote_all)
+ errorConflictingDefElem(defel, pstate);
+ if (defel->arg && IsA(defel->arg, A_Star))
+ opts_out->force_quote_all = true;
+ else if (defel->arg && IsA(defel->arg, List))
+ opts_out->force_quote = castNode(List, defel->arg);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("argument to option \"%s\" must be a list of column names",
+ defel->defname),
+ parser_errposition(pstate, defel->location)));
+ }
+ else if (strcmp(defel->defname, "force_not_null") == 0)
+ {
+ if (opts_out->force_notnull)
+ errorConflictingDefElem(defel, pstate);
+ if (defel->arg && IsA(defel->arg, List))
+ opts_out->force_notnull = castNode(List, defel->arg);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("argument to option \"%s\" must be a list of column names",
+ defel->defname),
+ parser_errposition(pstate, defel->location)));
+ }
+ else if (strcmp(defel->defname, "force_null") == 0)
+ {
+ if (opts_out->force_null)
+ errorConflictingDefElem(defel, pstate);
+ if (defel->arg && IsA(defel->arg, List))
+ opts_out->force_null = castNode(List, defel->arg);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("argument to option \"%s\" must be a list of column names",
+ defel->defname),
+ parser_errposition(pstate, defel->location)));
+ }
+ else if (strcmp(defel->defname, "convert_selectively") == 0)
+ {
+ /*
+ * Undocumented, not-accessible-from-SQL option: convert only the
+ * named columns to binary form, storing the rest as NULLs. It's
+ * allowed for the column list to be NIL.
+ */
+ if (opts_out->convert_selectively)
+ errorConflictingDefElem(defel, pstate);
+ opts_out->convert_selectively = true;
+ if (defel->arg == NULL || IsA(defel->arg, List))
+ opts_out->convert_select = castNode(List, defel->arg);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("argument to option \"%s\" must be a list of column names",
+ defel->defname),
+ parser_errposition(pstate, defel->location)));
+ }
+ else if (strcmp(defel->defname, "encoding") == 0)
+ {
+ if (opts_out->file_encoding >= 0)
+ errorConflictingDefElem(defel, pstate);
+ opts_out->file_encoding = pg_char_to_encoding(defGetString(defel));
+ if (opts_out->file_encoding < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("argument to option \"%s\" must be a valid encoding name",
+ defel->defname),
+ parser_errposition(pstate, defel->location)));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("option \"%s\" not recognized",
+ defel->defname),
+ parser_errposition(pstate, defel->location)));
+ }
+
+ /*
+ * Check for incompatible options (must do these two before inserting
+ * defaults)
+ */
+ if (opts_out->binary && opts_out->delim)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot specify DELIMITER in BINARY mode")));
+
+ if (opts_out->binary && opts_out->null_print)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot specify NULL in BINARY mode")));
+
+ /* Set defaults for omitted options */
+ if (!opts_out->delim)
+ opts_out->delim = opts_out->csv_mode ? "," : "\t";
+
+ if (!opts_out->null_print)
+ opts_out->null_print = opts_out->csv_mode ? "" : "\\N";
+ opts_out->null_print_len = strlen(opts_out->null_print);
+
+ if (opts_out->csv_mode)
+ {
+ if (!opts_out->quote)
+ opts_out->quote = "\"";
+ if (!opts_out->escape)
+ opts_out->escape = opts_out->quote;
+ }
+
+ /* Only single-byte delimiter strings are supported. */
+ if (strlen(opts_out->delim) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY delimiter must be a single one-byte character")));
+
+ /* Disallow end-of-line characters */
+ if (strchr(opts_out->delim, '\r') != NULL ||
+ strchr(opts_out->delim, '\n') != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("COPY delimiter cannot be newline or carriage return")));
+
+ if (strchr(opts_out->null_print, '\r') != NULL ||
+ strchr(opts_out->null_print, '\n') != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("COPY null representation cannot use newline or carriage return")));
+
+ /*
+ * Disallow unsafe delimiter characters in non-CSV mode. We can't allow
+ * backslash because it would be ambiguous. We can't allow the other
+ * cases because data characters matching the delimiter must be
+ * backslashed, and certain backslash combinations are interpreted
+ * non-literally by COPY IN. Disallowing all lower case ASCII letters is
+ * more than strictly necessary, but seems best for consistency and
+ * future-proofing. Likewise we disallow all digits though only octal
+ * digits are actually dangerous.
+ */
+ if (!opts_out->csv_mode &&
+ strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789",
+ opts_out->delim[0]) != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
+
+ /* Check header */
+ if (opts_out->binary && opts_out->header_line)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot specify HEADER in BINARY mode")));
+
+ /* Check quote */
+ if (!opts_out->csv_mode && opts_out->quote != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY quote available only in CSV mode")));
+
+ if (opts_out->csv_mode && strlen(opts_out->quote) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY quote must be a single one-byte character")));
+
+ if (opts_out->csv_mode && opts_out->delim[0] == opts_out->quote[0])
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("COPY delimiter and quote must be different")));
+
+ /* Check escape */
+ if (!opts_out->csv_mode && opts_out->escape != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY escape available only in CSV mode")));
+
+ if (opts_out->csv_mode && strlen(opts_out->escape) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY escape must be a single one-byte character")));
+
+ /* Check force_quote */
+ if (!opts_out->csv_mode && (opts_out->force_quote || opts_out->force_quote_all))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY force quote available only in CSV mode")));
+ if ((opts_out->force_quote || opts_out->force_quote_all) && is_from)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY force quote only available using COPY TO")));
+
+ /* Check force_notnull */
+ if (!opts_out->csv_mode && opts_out->force_notnull != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY force not null available only in CSV mode")));
+ if (opts_out->force_notnull != NIL && !is_from)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY force not null only available using COPY FROM")));
+
+ /* Check force_null */
+ if (!opts_out->csv_mode && opts_out->force_null != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY force null available only in CSV mode")));
+
+ if (opts_out->force_null != NIL && !is_from)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY force null only available using COPY FROM")));
+
+ /* Don't allow the delimiter to appear in the null string. */
+ if (strchr(opts_out->null_print, opts_out->delim[0]) != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY delimiter must not appear in the NULL specification")));
+
+ /* Don't allow the CSV quote char to appear in the null string. */
+ if (opts_out->csv_mode &&
+ strchr(opts_out->null_print, opts_out->quote[0]) != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("CSV quote character must not appear in the NULL specification")));
+}
+
+/*
+ * CopyGetAttnums - build an integer list of attnums to be copied
+ *
+ * The input attnamelist is either the user-specified column list,
+ * or NIL if there was none (in which case we want all the non-dropped
+ * columns).
+ *
+ * We don't include generated columns in the generated full list and we don't
+ * allow them to be specified explicitly. They don't make sense for COPY
+ * FROM, but we could possibly allow them for COPY TO. But this way it's at
+ * least ensured that whatever we copy out can be copied back in.
+ *
+ * rel can be NULL ... it's only used for error reports.
+ */
+List *
+CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist)
+{
+ List *attnums = NIL;
+
+ if (attnamelist == NIL)
+ {
+ /* Generate default column list */
+ int attr_count = tupDesc->natts;
+ int i;
+
+ for (i = 0; i < attr_count; i++)
+ {
+ if (TupleDescAttr(tupDesc, i)->attisdropped)
+ continue;
+ if (TupleDescAttr(tupDesc, i)->attgenerated)
+ continue;
+ attnums = lappend_int(attnums, i + 1);
+ }
+ }
+ else
+ {
+ /* Validate the user-supplied list and extract attnums */
+ ListCell *l;
+
+ foreach(l, attnamelist)
+ {
+ char *name = strVal(lfirst(l));
+ int attnum;
+ int i;
+
+ /* Lookup column name */
+ attnum = InvalidAttrNumber;
+ for (i = 0; i < tupDesc->natts; i++)
+ {
+ Form_pg_attribute att = TupleDescAttr(tupDesc, i);
+
+ if (att->attisdropped)
+ continue;
+ if (namestrcmp(&(att->attname), name) == 0)
+ {
+ if (att->attgenerated)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("column \"%s\" is a generated column",
+ name),
+ errdetail("Generated columns cannot be used in COPY.")));
+ attnum = att->attnum;
+ break;
+ }
+ }
+ if (attnum == InvalidAttrNumber)
+ {
+ if (rel != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ name, RelationGetRelationName(rel))));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" does not exist",
+ name)));
+ }
+ /* Check for duplicates */
+ if (list_member_int(attnums, attnum))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_COLUMN),
+ errmsg("column \"%s\" specified more than once",
+ name)));
+ attnums = lappend_int(attnums, attnum);
+ }
+ }
+
+ return attnums;
+}
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
new file mode 100644
index 0000000..c6dbd97
--- /dev/null
+++ b/src/backend/commands/copyfrom.c
@@ -0,0 +1,1624 @@
+/*-------------------------------------------------------------------------
+ *
+ * copyfrom.c
+ * COPY <table> FROM file/program/client
+ *
+ * This file contains routines needed to efficiently load tuples into a
+ * table. That includes looking up the correct partition, firing triggers,
+ * calling the table AM function to insert the data, and updating indexes.
+ * Reading data from the input file or client and parsing it into Datums
+ * is handled in copyfromparse.c.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/copyfrom.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "catalog/namespace.h"
+#include "commands/copy.h"
+#include "commands/copyfrom_internal.h"
+#include "commands/progress.h"
+#include "commands/trigger.h"
+#include "executor/execPartition.h"
+#include "executor/executor.h"
+#include "executor/nodeModifyTable.h"
+#include "executor/tuptable.h"
+#include "foreign/fdwapi.h"
+#include "libpq/libpq.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "optimizer/optimizer.h"
+#include "pgstat.h"
+#include "rewrite/rewriteHandler.h"
+#include "storage/fd.h"
+#include "tcop/tcopprot.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/portal.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+
+/*
+ * No more than this many tuples per CopyMultiInsertBuffer
+ *
+ * Caution: Don't make this too big, as we could end up with this many
+ * CopyMultiInsertBuffer items stored in CopyMultiInsertInfo's
+ * multiInsertBuffers list. Increasing this can cause quadratic growth in
+ * memory requirements during copies into partitioned tables with a large
+ * number of partitions.
+ */
+#define MAX_BUFFERED_TUPLES 1000
+
+/*
+ * Flush buffers if there are >= this many bytes, as counted by the input
+ * size, of tuples stored.
+ */
+#define MAX_BUFFERED_BYTES 65535
+
+/* Trim the list of buffers back down to this number after flushing */
+#define MAX_PARTITION_BUFFERS 32
+
+/* Stores multi-insert data related to a single relation in CopyFrom. */
+typedef struct CopyMultiInsertBuffer
+{
+ TupleTableSlot *slots[MAX_BUFFERED_TUPLES]; /* Array to store tuples */
+ ResultRelInfo *resultRelInfo; /* ResultRelInfo for 'relid' */
+ BulkInsertState bistate; /* BulkInsertState for this rel */
+ int nused; /* number of 'slots' containing tuples */
+ uint64 linenos[MAX_BUFFERED_TUPLES]; /* Line # of tuple in copy
+ * stream */
+} CopyMultiInsertBuffer;
+
+/*
+ * Stores one or many CopyMultiInsertBuffers and details about the size and
+ * number of tuples which are stored in them. This allows multiple buffers to
+ * exist at once when COPYing into a partitioned table.
+ */
+typedef struct CopyMultiInsertInfo
+{
+ List *multiInsertBuffers; /* List of tracked CopyMultiInsertBuffers */
+ int bufferedTuples; /* number of tuples buffered over all buffers */
+ int bufferedBytes; /* number of bytes from all buffered tuples */
+ CopyFromState cstate; /* Copy state for this CopyMultiInsertInfo */
+ EState *estate; /* Executor state used for COPY */
+ CommandId mycid; /* Command Id used for COPY */
+ int ti_options; /* table insert options */
+} CopyMultiInsertInfo;
+
+
+/* non-export function prototypes */
+static char *limit_printout_length(const char *str);
+
+static void ClosePipeFromProgram(CopyFromState cstate);
+
+/*
+ * error context callback for COPY FROM
+ *
+ * The argument for the error context must be CopyFromState.
+ */
+void
+CopyFromErrorCallback(void *arg)
+{
+ CopyFromState cstate = (CopyFromState) arg;
+
+ if (cstate->opts.binary)
+ {
+ /* can't usefully display the data */
+ if (cstate->cur_attname)
+ errcontext("COPY %s, line %llu, column %s",
+ cstate->cur_relname,
+ (unsigned long long) cstate->cur_lineno,
+ cstate->cur_attname);
+ else
+ errcontext("COPY %s, line %llu",
+ cstate->cur_relname,
+ (unsigned long long) cstate->cur_lineno);
+ }
+ else
+ {
+ if (cstate->cur_attname && cstate->cur_attval)
+ {
+ /* error is relevant to a particular column */
+ char *attval;
+
+ attval = limit_printout_length(cstate->cur_attval);
+ errcontext("COPY %s, line %llu, column %s: \"%s\"",
+ cstate->cur_relname,
+ (unsigned long long) cstate->cur_lineno,
+ cstate->cur_attname,
+ attval);
+ pfree(attval);
+ }
+ else if (cstate->cur_attname)
+ {
+ /* error is relevant to a particular column, value is NULL */
+ errcontext("COPY %s, line %llu, column %s: null input",
+ cstate->cur_relname,
+ (unsigned long long) cstate->cur_lineno,
+ cstate->cur_attname);
+ }
+ else
+ {
+ /*
+ * Error is relevant to a particular line.
+ *
+ * If line_buf still contains the correct line, print it.
+ */
+ if (cstate->line_buf_valid)
+ {
+ char *lineval;
+
+ lineval = limit_printout_length(cstate->line_buf.data);
+ errcontext("COPY %s, line %llu: \"%s\"",
+ cstate->cur_relname,
+ (unsigned long long) cstate->cur_lineno, lineval);
+ pfree(lineval);
+ }
+ else
+ {
+ errcontext("COPY %s, line %llu",
+ cstate->cur_relname,
+ (unsigned long long) cstate->cur_lineno);
+ }
+ }
+ }
+}
+
+/*
+ * Make sure we don't print an unreasonable amount of COPY data in a message.
+ *
+ * Returns a pstrdup'd copy of the input.
+ */
+static char *
+limit_printout_length(const char *str)
+{
+#define MAX_COPY_DATA_DISPLAY 100
+
+ int slen = strlen(str);
+ int len;
+ char *res;
+
+ /* Fast path if definitely okay */
+ if (slen <= MAX_COPY_DATA_DISPLAY)
+ return pstrdup(str);
+
+ /* Apply encoding-dependent truncation */
+ len = pg_mbcliplen(str, slen, MAX_COPY_DATA_DISPLAY);
+
+ /*
+ * Truncate, and add "..." to show we truncated the input.
+ */
+ res = (char *) palloc(len + 4);
+ memcpy(res, str, len);
+ strcpy(res + len, "...");
+
+ return res;
+}
+
+/*
+ * Allocate memory and initialize a new CopyMultiInsertBuffer for this
+ * ResultRelInfo.
+ */
+static CopyMultiInsertBuffer *
+CopyMultiInsertBufferInit(ResultRelInfo *rri)
+{
+ CopyMultiInsertBuffer *buffer;
+
+ buffer = (CopyMultiInsertBuffer *) palloc(sizeof(CopyMultiInsertBuffer));
+ memset(buffer->slots, 0, sizeof(TupleTableSlot *) * MAX_BUFFERED_TUPLES);
+ buffer->resultRelInfo = rri;
+ buffer->bistate = GetBulkInsertState();
+ buffer->nused = 0;
+
+ return buffer;
+}
+
+/*
+ * Make a new buffer for this ResultRelInfo.
+ */
+static inline void
+CopyMultiInsertInfoSetupBuffer(CopyMultiInsertInfo *miinfo,
+ ResultRelInfo *rri)
+{
+ CopyMultiInsertBuffer *buffer;
+
+ buffer = CopyMultiInsertBufferInit(rri);
+
+ /* Setup back-link so we can easily find this buffer again */
+ rri->ri_CopyMultiInsertBuffer = buffer;
+ /* Record that we're tracking this buffer */
+ miinfo->multiInsertBuffers = lappend(miinfo->multiInsertBuffers, buffer);
+}
+
+/*
+ * Initialize an already allocated CopyMultiInsertInfo.
+ *
+ * If rri is a non-partitioned table then a CopyMultiInsertBuffer is set up
+ * for that table.
+ */
+static void
+CopyMultiInsertInfoInit(CopyMultiInsertInfo *miinfo, ResultRelInfo *rri,
+ CopyFromState cstate, EState *estate, CommandId mycid,
+ int ti_options)
+{
+ miinfo->multiInsertBuffers = NIL;
+ miinfo->bufferedTuples = 0;
+ miinfo->bufferedBytes = 0;
+ miinfo->cstate = cstate;
+ miinfo->estate = estate;
+ miinfo->mycid = mycid;
+ miinfo->ti_options = ti_options;
+
+ /*
+ * Only setup the buffer when not dealing with a partitioned table.
+ * Buffers for partitioned tables will just be setup when we need to send
+ * tuples their way for the first time.
+ */
+ if (rri->ri_RelationDesc->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ CopyMultiInsertInfoSetupBuffer(miinfo, rri);
+}
+
+/*
+ * Returns true if the buffers are full
+ */
+static inline bool
+CopyMultiInsertInfoIsFull(CopyMultiInsertInfo *miinfo)
+{
+ if (miinfo->bufferedTuples >= MAX_BUFFERED_TUPLES ||
+ miinfo->bufferedBytes >= MAX_BUFFERED_BYTES)
+ return true;
+ return false;
+}
+
+/*
+ * Returns true if we have no buffered tuples
+ */
+static inline bool
+CopyMultiInsertInfoIsEmpty(CopyMultiInsertInfo *miinfo)
+{
+ return miinfo->bufferedTuples == 0;
+}
+
+/*
+ * Write the tuples stored in 'buffer' out to the table.
+ */
+static inline void
+CopyMultiInsertBufferFlush(CopyMultiInsertInfo *miinfo,
+ CopyMultiInsertBuffer *buffer)
+{
+ MemoryContext oldcontext;
+ int i;
+ uint64 save_cur_lineno;
+ CopyFromState cstate = miinfo->cstate;
+ EState *estate = miinfo->estate;
+ CommandId mycid = miinfo->mycid;
+ int ti_options = miinfo->ti_options;
+ bool line_buf_valid = cstate->line_buf_valid;
+ int nused = buffer->nused;
+ ResultRelInfo *resultRelInfo = buffer->resultRelInfo;
+ TupleTableSlot **slots = buffer->slots;
+
+ /*
+ * Print error context information correctly, if one of the operations
+ * below fails.
+ */
+ cstate->line_buf_valid = false;
+ save_cur_lineno = cstate->cur_lineno;
+
+ /*
+ * table_multi_insert may leak memory, so switch to short-lived memory
+ * context before calling it.
+ */
+ oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+ table_multi_insert(resultRelInfo->ri_RelationDesc,
+ slots,
+ nused,
+ mycid,
+ ti_options,
+ buffer->bistate);
+ MemoryContextSwitchTo(oldcontext);
+
+ for (i = 0; i < nused; i++)
+ {
+ /*
+ * If there are any indexes, update them for all the inserted tuples,
+ * and run AFTER ROW INSERT triggers.
+ */
+ if (resultRelInfo->ri_NumIndices > 0)
+ {
+ List *recheckIndexes;
+
+ cstate->cur_lineno = buffer->linenos[i];
+ recheckIndexes =
+ ExecInsertIndexTuples(resultRelInfo,
+ buffer->slots[i], estate, false, false,
+ NULL, NIL);
+ ExecARInsertTriggers(estate, resultRelInfo,
+ slots[i], recheckIndexes,
+ cstate->transition_capture);
+ list_free(recheckIndexes);
+ }
+
+ /*
+ * There's no indexes, but see if we need to run AFTER ROW INSERT
+ * triggers anyway.
+ */
+ else if (resultRelInfo->ri_TrigDesc != NULL &&
+ (resultRelInfo->ri_TrigDesc->trig_insert_after_row ||
+ resultRelInfo->ri_TrigDesc->trig_insert_new_table))
+ {
+ cstate->cur_lineno = buffer->linenos[i];
+ ExecARInsertTriggers(estate, resultRelInfo,
+ slots[i], NIL, cstate->transition_capture);
+ }
+
+ ExecClearTuple(slots[i]);
+ }
+
+ /* Mark that all slots are free */
+ buffer->nused = 0;
+
+ /* reset cur_lineno and line_buf_valid to what they were */
+ cstate->line_buf_valid = line_buf_valid;
+ cstate->cur_lineno = save_cur_lineno;
+}
+
+/*
+ * Drop used slots and free member for this buffer.
+ *
+ * The buffer must be flushed before cleanup.
+ */
+static inline void
+CopyMultiInsertBufferCleanup(CopyMultiInsertInfo *miinfo,
+ CopyMultiInsertBuffer *buffer)
+{
+ int i;
+
+ /* Ensure buffer was flushed */
+ Assert(buffer->nused == 0);
+
+ /* Remove back-link to ourself */
+ buffer->resultRelInfo->ri_CopyMultiInsertBuffer = NULL;
+
+ FreeBulkInsertState(buffer->bistate);
+
+ /* Since we only create slots on demand, just drop the non-null ones. */
+ for (i = 0; i < MAX_BUFFERED_TUPLES && buffer->slots[i] != NULL; i++)
+ ExecDropSingleTupleTableSlot(buffer->slots[i]);
+
+ table_finish_bulk_insert(buffer->resultRelInfo->ri_RelationDesc,
+ miinfo->ti_options);
+
+ pfree(buffer);
+}
+
+/*
+ * Write out all stored tuples in all buffers out to the tables.
+ *
+ * Once flushed we also trim the tracked buffers list down to size by removing
+ * the buffers created earliest first.
+ *
+ * Callers should pass 'curr_rri' as the ResultRelInfo that's currently being
+ * used. When cleaning up old buffers we'll never remove the one for
+ * 'curr_rri'.
+ */
+static inline void
+CopyMultiInsertInfoFlush(CopyMultiInsertInfo *miinfo, ResultRelInfo *curr_rri)
+{
+ ListCell *lc;
+
+ foreach(lc, miinfo->multiInsertBuffers)
+ {
+ CopyMultiInsertBuffer *buffer = (CopyMultiInsertBuffer *) lfirst(lc);
+
+ CopyMultiInsertBufferFlush(miinfo, buffer);
+ }
+
+ miinfo->bufferedTuples = 0;
+ miinfo->bufferedBytes = 0;
+
+ /*
+ * Trim the list of tracked buffers down if it exceeds the limit. Here we
+ * remove buffers starting with the ones we created first. It seems less
+ * likely that these older ones will be needed than the ones that were
+ * just created.
+ */
+ while (list_length(miinfo->multiInsertBuffers) > MAX_PARTITION_BUFFERS)
+ {
+ CopyMultiInsertBuffer *buffer;
+
+ buffer = (CopyMultiInsertBuffer *) linitial(miinfo->multiInsertBuffers);
+
+ /*
+ * We never want to remove the buffer that's currently being used, so
+ * if we happen to find that then move it to the end of the list.
+ */
+ if (buffer->resultRelInfo == curr_rri)
+ {
+ miinfo->multiInsertBuffers = list_delete_first(miinfo->multiInsertBuffers);
+ miinfo->multiInsertBuffers = lappend(miinfo->multiInsertBuffers, buffer);
+ buffer = (CopyMultiInsertBuffer *) linitial(miinfo->multiInsertBuffers);
+ }
+
+ CopyMultiInsertBufferCleanup(miinfo, buffer);
+ miinfo->multiInsertBuffers = list_delete_first(miinfo->multiInsertBuffers);
+ }
+}
+
+/*
+ * Cleanup allocated buffers and free memory
+ */
+static inline void
+CopyMultiInsertInfoCleanup(CopyMultiInsertInfo *miinfo)
+{
+ ListCell *lc;
+
+ foreach(lc, miinfo->multiInsertBuffers)
+ CopyMultiInsertBufferCleanup(miinfo, lfirst(lc));
+
+ list_free(miinfo->multiInsertBuffers);
+}
+
+/*
+ * Get the next TupleTableSlot that the next tuple should be stored in.
+ *
+ * Callers must ensure that the buffer is not full.
+ *
+ * Note: 'miinfo' is unused but has been included for consistency with the
+ * other functions in this area.
+ */
+static inline TupleTableSlot *
+CopyMultiInsertInfoNextFreeSlot(CopyMultiInsertInfo *miinfo,
+ ResultRelInfo *rri)
+{
+ CopyMultiInsertBuffer *buffer = rri->ri_CopyMultiInsertBuffer;
+ int nused = buffer->nused;
+
+ Assert(buffer != NULL);
+ Assert(nused < MAX_BUFFERED_TUPLES);
+
+ if (buffer->slots[nused] == NULL)
+ buffer->slots[nused] = table_slot_create(rri->ri_RelationDesc, NULL);
+ return buffer->slots[nused];
+}
+
+/*
+ * Record the previously reserved TupleTableSlot that was reserved by
+ * CopyMultiInsertInfoNextFreeSlot as being consumed.
+ */
+static inline void
+CopyMultiInsertInfoStore(CopyMultiInsertInfo *miinfo, ResultRelInfo *rri,
+ TupleTableSlot *slot, int tuplen, uint64 lineno)
+{
+ CopyMultiInsertBuffer *buffer = rri->ri_CopyMultiInsertBuffer;
+
+ Assert(buffer != NULL);
+ Assert(slot == buffer->slots[buffer->nused]);
+
+ /* Store the line number so we can properly report any errors later */
+ buffer->linenos[buffer->nused] = lineno;
+
+ /* Record this slot as being used */
+ buffer->nused++;
+
+ /* Update how many tuples are stored and their size */
+ miinfo->bufferedTuples++;
+ miinfo->bufferedBytes += tuplen;
+}
+
+/*
+ * Copy FROM file to relation.
+ */
+uint64
+CopyFrom(CopyFromState cstate)
+{
+ ResultRelInfo *resultRelInfo;
+ ResultRelInfo *target_resultRelInfo;
+ ResultRelInfo *prevResultRelInfo = NULL;
+ EState *estate = CreateExecutorState(); /* for ExecConstraints() */
+ ModifyTableState *mtstate;
+ ExprContext *econtext;
+ TupleTableSlot *singleslot = NULL;
+ MemoryContext oldcontext = CurrentMemoryContext;
+
+ PartitionTupleRouting *proute = NULL;
+ ErrorContextCallback errcallback;
+ CommandId mycid = GetCurrentCommandId(true);
+ int ti_options = 0; /* start with default options for insert */
+ BulkInsertState bistate = NULL;
+ CopyInsertMethod insertMethod;
+ CopyMultiInsertInfo multiInsertInfo = {0}; /* pacify compiler */
+ int64 processed = 0;
+ int64 excluded = 0;
+ bool has_before_insert_row_trig;
+ bool has_instead_insert_row_trig;
+ bool leafpart_use_multi_insert = false;
+
+ Assert(cstate->rel);
+ Assert(list_length(cstate->range_table) == 1);
+
+ /*
+ * The target must be a plain, foreign, or partitioned relation, or have
+ * an INSTEAD OF INSERT row trigger. (Currently, such triggers are only
+ * allowed on views, so we only hint about them in the view case.)
+ */
+ if (cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
+ cstate->rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+ cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
+ !(cstate->rel->trigdesc &&
+ cstate->rel->trigdesc->trig_insert_instead_row))
+ {
+ if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot copy to view \"%s\"",
+ RelationGetRelationName(cstate->rel)),
+ errhint("To enable copying to a view, provide an INSTEAD OF INSERT trigger.")));
+ else if (cstate->rel->rd_rel->relkind == RELKIND_MATVIEW)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot copy to materialized view \"%s\"",
+ RelationGetRelationName(cstate->rel))));
+ else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot copy to sequence \"%s\"",
+ RelationGetRelationName(cstate->rel))));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot copy to non-table relation \"%s\"",
+ RelationGetRelationName(cstate->rel))));
+ }
+
+ /*
+ * If the target file is new-in-transaction, we assume that checking FSM
+ * for free space is a waste of time. This could possibly be wrong, but
+ * it's unlikely.
+ */
+ if (RELKIND_HAS_STORAGE(cstate->rel->rd_rel->relkind) &&
+ (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
+ cstate->rel->rd_firstRelfilenodeSubid != InvalidSubTransactionId))
+ ti_options |= TABLE_INSERT_SKIP_FSM;
+
+ /*
+ * Optimize if new relfilenode was created in this subxact or one of its
+ * committed children and we won't see those rows later as part of an
+ * earlier scan or command. The subxact test ensures that if this subxact
+ * aborts then the frozen rows won't be visible after xact cleanup. Note
+ * that the stronger test of exactly which subtransaction created it is
+ * crucial for correctness of this optimization. The test for an earlier
+ * scan or command tolerates false negatives. FREEZE causes other sessions
+ * to see rows they would not see under MVCC, and a false negative merely
+ * spreads that anomaly to the current session.
+ */
+ if (cstate->opts.freeze)
+ {
+ /*
+ * We currently disallow COPY FREEZE on partitioned tables. The
+ * reason for this is that we've simply not yet opened the partitions
+ * to determine if the optimization can be applied to them. We could
+ * go and open them all here, but doing so may be quite a costly
+ * overhead for small copies. In any case, we may just end up routing
+ * tuples to a small number of partitions. It seems better just to
+ * raise an ERROR for partitioned tables.
+ */
+ if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot perform COPY FREEZE on a partitioned table")));
+ }
+
+ /*
+ * Tolerate one registration for the benefit of FirstXactSnapshot.
+ * Scan-bearing queries generally create at least two registrations,
+ * though relying on that is fragile, as is ignoring ActiveSnapshot.
+ * Clear CatalogSnapshot to avoid counting its registration. We'll
+ * still detect ongoing catalog scans, each of which separately
+ * registers the snapshot it uses.
+ */
+ InvalidateCatalogSnapshot();
+ if (!ThereAreNoPriorRegisteredSnapshots() || !ThereAreNoReadyPortals())
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+ errmsg("cannot perform COPY FREEZE because of prior transaction activity")));
+
+ if (cstate->rel->rd_createSubid != GetCurrentSubTransactionId() &&
+ cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot perform COPY FREEZE because the table was not created or truncated in the current subtransaction")));
+
+ ti_options |= TABLE_INSERT_FROZEN;
+ }
+
+ /*
+ * We need a ResultRelInfo so we can use the regular executor's
+ * index-entry-making machinery. (There used to be a huge amount of code
+ * here that basically duplicated execUtils.c ...)
+ */
+ ExecInitRangeTable(estate, cstate->range_table);
+ resultRelInfo = target_resultRelInfo = makeNode(ResultRelInfo);
+ ExecInitResultRelation(estate, resultRelInfo, 1);
+
+ /* Verify the named relation is a valid target for INSERT */
+ CheckValidResultRel(resultRelInfo, CMD_INSERT);
+
+ ExecOpenIndices(resultRelInfo, false);
+
+ /*
+ * Set up a ModifyTableState so we can let FDW(s) init themselves for
+ * foreign-table result relation(s).
+ */
+ mtstate = makeNode(ModifyTableState);
+ mtstate->ps.plan = NULL;
+ mtstate->ps.state = estate;
+ mtstate->operation = CMD_INSERT;
+ mtstate->mt_nrels = 1;
+ mtstate->resultRelInfo = resultRelInfo;
+ mtstate->rootResultRelInfo = resultRelInfo;
+
+ if (resultRelInfo->ri_FdwRoutine != NULL &&
+ resultRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
+ resultRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate,
+ resultRelInfo);
+
+ /* Prepare to catch AFTER triggers. */
+ AfterTriggerBeginQuery();
+
+ /*
+ * If there are any triggers with transition tables on the named relation,
+ * we need to be prepared to capture transition tuples.
+ *
+ * Because partition tuple routing would like to know about whether
+ * transition capture is active, we also set it in mtstate, which is
+ * passed to ExecFindPartition() below.
+ */
+ cstate->transition_capture = mtstate->mt_transition_capture =
+ MakeTransitionCaptureState(cstate->rel->trigdesc,
+ RelationGetRelid(cstate->rel),
+ CMD_INSERT);
+
+ /*
+ * If the named relation is a partitioned table, initialize state for
+ * CopyFrom tuple routing.
+ */
+ if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ proute = ExecSetupPartitionTupleRouting(estate, cstate->rel);
+
+ if (cstate->whereClause)
+ cstate->qualexpr = ExecInitQual(castNode(List, cstate->whereClause),
+ &mtstate->ps);
+
+ /*
+ * It's generally more efficient to prepare a bunch of tuples for
+ * insertion, and insert them in one table_multi_insert() call, than call
+ * table_tuple_insert() separately for every tuple. However, there are a
+ * number of reasons why we might not be able to do this. These are
+ * explained below.
+ */
+ if (resultRelInfo->ri_TrigDesc != NULL &&
+ (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
+ resultRelInfo->ri_TrigDesc->trig_insert_instead_row))
+ {
+ /*
+ * Can't support multi-inserts when there are any BEFORE/INSTEAD OF
+ * triggers on the table. Such triggers might query the table we're
+ * inserting into and act differently if the tuples that have already
+ * been processed and prepared for insertion are not there.
+ */
+ insertMethod = CIM_SINGLE;
+ }
+ else if (proute != NULL && resultRelInfo->ri_TrigDesc != NULL &&
+ resultRelInfo->ri_TrigDesc->trig_insert_new_table)
+ {
+ /*
+ * For partitioned tables we can't support multi-inserts when there
+ * are any statement level insert triggers. It might be possible to
+ * allow partitioned tables with such triggers in the future, but for
+ * now, CopyMultiInsertInfoFlush expects that any after row insert and
+ * statement level insert triggers are on the same relation.
+ */
+ insertMethod = CIM_SINGLE;
+ }
+ else if (resultRelInfo->ri_FdwRoutine != NULL ||
+ cstate->volatile_defexprs)
+ {
+ /*
+ * Can't support multi-inserts to foreign tables or if there are any
+ * volatile default expressions in the table. Similarly to the
+ * trigger case above, such expressions may query the table we're
+ * inserting into.
+ *
+ * Note: It does not matter if any partitions have any volatile
+ * default expressions as we use the defaults from the target of the
+ * COPY command.
+ */
+ insertMethod = CIM_SINGLE;
+ }
+ else if (contain_volatile_functions(cstate->whereClause))
+ {
+ /*
+ * Can't support multi-inserts if there are any volatile function
+ * expressions in WHERE clause. Similarly to the trigger case above,
+ * such expressions may query the table we're inserting into.
+ */
+ insertMethod = CIM_SINGLE;
+ }
+ else
+ {
+ /*
+ * For partitioned tables, we may still be able to perform bulk
+ * inserts. However, the possibility of this depends on which types
+ * of triggers exist on the partition. We must disable bulk inserts
+ * if the partition is a foreign table or it has any before row insert
+ * or insert instead triggers (same as we checked above for the parent
+ * table). Since the partition's resultRelInfos are initialized only
+ * when we actually need to insert the first tuple into them, we must
+ * have the intermediate insert method of CIM_MULTI_CONDITIONAL to
+ * flag that we must later determine if we can use bulk-inserts for
+ * the partition being inserted into.
+ */
+ if (proute)
+ insertMethod = CIM_MULTI_CONDITIONAL;
+ else
+ insertMethod = CIM_MULTI;
+
+ CopyMultiInsertInfoInit(&multiInsertInfo, resultRelInfo, cstate,
+ estate, mycid, ti_options);
+ }
+
+ /*
+ * If not using batch mode (which allocates slots as needed) set up a
+ * tuple slot too. When inserting into a partitioned table, we also need
+ * one, even if we might batch insert, to read the tuple in the root
+ * partition's form.
+ */
+ if (insertMethod == CIM_SINGLE || insertMethod == CIM_MULTI_CONDITIONAL)
+ {
+ singleslot = table_slot_create(resultRelInfo->ri_RelationDesc,
+ &estate->es_tupleTable);
+ bistate = GetBulkInsertState();
+ }
+
+ has_before_insert_row_trig = (resultRelInfo->ri_TrigDesc &&
+ resultRelInfo->ri_TrigDesc->trig_insert_before_row);
+
+ has_instead_insert_row_trig = (resultRelInfo->ri_TrigDesc &&
+ resultRelInfo->ri_TrigDesc->trig_insert_instead_row);
+
+ /*
+ * Check BEFORE STATEMENT insertion triggers. It's debatable whether we
+ * should do this for COPY, since it's not really an "INSERT" statement as
+ * such. However, executing these triggers maintains consistency with the
+ * EACH ROW triggers that we already fire on COPY.
+ */
+ ExecBSInsertTriggers(estate, resultRelInfo);
+
+ econtext = GetPerTupleExprContext(estate);
+
+ /* Set up callback to identify error line number */
+ errcallback.callback = CopyFromErrorCallback;
+ errcallback.arg = (void *) cstate;
+ errcallback.previous = error_context_stack;
+ error_context_stack = &errcallback;
+
+ for (;;)
+ {
+ TupleTableSlot *myslot;
+ bool skip_tuple;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /*
+ * Reset the per-tuple exprcontext. We do this after every tuple, to
+ * clean-up after expression evaluations etc.
+ */
+ ResetPerTupleExprContext(estate);
+
+ /* select slot to (initially) load row into */
+ if (insertMethod == CIM_SINGLE || proute)
+ {
+ myslot = singleslot;
+ Assert(myslot != NULL);
+ }
+ else
+ {
+ Assert(resultRelInfo == target_resultRelInfo);
+ Assert(insertMethod == CIM_MULTI);
+
+ myslot = CopyMultiInsertInfoNextFreeSlot(&multiInsertInfo,
+ resultRelInfo);
+ }
+
+ /*
+ * Switch to per-tuple context before calling NextCopyFrom, which does
+ * evaluate default expressions etc. and requires per-tuple context.
+ */
+ MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+
+ ExecClearTuple(myslot);
+
+ /* Directly store the values/nulls array in the slot */
+ if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
+ break;
+
+ ExecStoreVirtualTuple(myslot);
+
+ /*
+ * Constraints and where clause might reference the tableoid column,
+ * so (re-)initialize tts_tableOid before evaluating them.
+ */
+ myslot->tts_tableOid = RelationGetRelid(target_resultRelInfo->ri_RelationDesc);
+
+ /* Triggers and stuff need to be invoked in query context. */
+ MemoryContextSwitchTo(oldcontext);
+
+ if (cstate->whereClause)
+ {
+ econtext->ecxt_scantuple = myslot;
+ /* Skip items that don't match COPY's WHERE clause */
+ if (!ExecQual(cstate->qualexpr, econtext))
+ {
+ /*
+ * Report that this tuple was filtered out by the WHERE
+ * clause.
+ */
+ pgstat_progress_update_param(PROGRESS_COPY_TUPLES_EXCLUDED,
+ ++excluded);
+ continue;
+ }
+ }
+
+ /* Determine the partition to insert the tuple into */
+ if (proute)
+ {
+ TupleConversionMap *map;
+
+ /*
+ * Attempt to find a partition suitable for this tuple.
+ * ExecFindPartition() will raise an error if none can be found or
+ * if the found partition is not suitable for INSERTs.
+ */
+ resultRelInfo = ExecFindPartition(mtstate, target_resultRelInfo,
+ proute, myslot, estate);
+
+ if (prevResultRelInfo != resultRelInfo)
+ {
+ /* Determine which triggers exist on this partition */
+ has_before_insert_row_trig = (resultRelInfo->ri_TrigDesc &&
+ resultRelInfo->ri_TrigDesc->trig_insert_before_row);
+
+ has_instead_insert_row_trig = (resultRelInfo->ri_TrigDesc &&
+ resultRelInfo->ri_TrigDesc->trig_insert_instead_row);
+
+ /*
+ * Disable multi-inserts when the partition has BEFORE/INSTEAD
+ * OF triggers, or if the partition is a foreign partition.
+ */
+ leafpart_use_multi_insert = insertMethod == CIM_MULTI_CONDITIONAL &&
+ !has_before_insert_row_trig &&
+ !has_instead_insert_row_trig &&
+ resultRelInfo->ri_FdwRoutine == NULL;
+
+ /* Set the multi-insert buffer to use for this partition. */
+ if (leafpart_use_multi_insert)
+ {
+ if (resultRelInfo->ri_CopyMultiInsertBuffer == NULL)
+ CopyMultiInsertInfoSetupBuffer(&multiInsertInfo,
+ resultRelInfo);
+ }
+ else if (insertMethod == CIM_MULTI_CONDITIONAL &&
+ !CopyMultiInsertInfoIsEmpty(&multiInsertInfo))
+ {
+ /*
+ * Flush pending inserts if this partition can't use
+ * batching, so rows are visible to triggers etc.
+ */
+ CopyMultiInsertInfoFlush(&multiInsertInfo, resultRelInfo);
+ }
+
+ if (bistate != NULL)
+ ReleaseBulkInsertStatePin(bistate);
+ prevResultRelInfo = resultRelInfo;
+ }
+
+ /*
+ * If we're capturing transition tuples, we might need to convert
+ * from the partition rowtype to root rowtype. But if there are no
+ * BEFORE triggers on the partition that could change the tuple,
+ * we can just remember the original unconverted tuple to avoid a
+ * needless round trip conversion.
+ */
+ if (cstate->transition_capture != NULL)
+ cstate->transition_capture->tcs_original_insert_tuple =
+ !has_before_insert_row_trig ? myslot : NULL;
+
+ /*
+ * We might need to convert from the root rowtype to the partition
+ * rowtype.
+ */
+ map = resultRelInfo->ri_RootToPartitionMap;
+ if (insertMethod == CIM_SINGLE || !leafpart_use_multi_insert)
+ {
+ /* non batch insert */
+ if (map != NULL)
+ {
+ TupleTableSlot *new_slot;
+
+ new_slot = resultRelInfo->ri_PartitionTupleSlot;
+ myslot = execute_attr_map_slot(map->attrMap, myslot, new_slot);
+ }
+ }
+ else
+ {
+ /*
+ * Prepare to queue up tuple for later batch insert into
+ * current partition.
+ */
+ TupleTableSlot *batchslot;
+
+ /* no other path available for partitioned table */
+ Assert(insertMethod == CIM_MULTI_CONDITIONAL);
+
+ batchslot = CopyMultiInsertInfoNextFreeSlot(&multiInsertInfo,
+ resultRelInfo);
+
+ if (map != NULL)
+ myslot = execute_attr_map_slot(map->attrMap, myslot,
+ batchslot);
+ else
+ {
+ /*
+ * This looks more expensive than it is (Believe me, I
+ * optimized it away. Twice.). The input is in virtual
+ * form, and we'll materialize the slot below - for most
+ * slot types the copy performs the work materialization
+ * would later require anyway.
+ */
+ ExecCopySlot(batchslot, myslot);
+ myslot = batchslot;
+ }
+ }
+
+ /* ensure that triggers etc see the right relation */
+ myslot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+ }
+
+ skip_tuple = false;
+
+ /* BEFORE ROW INSERT Triggers */
+ if (has_before_insert_row_trig)
+ {
+ if (!ExecBRInsertTriggers(estate, resultRelInfo, myslot))
+ skip_tuple = true; /* "do nothing" */
+ }
+
+ if (!skip_tuple)
+ {
+ /*
+ * If there is an INSTEAD OF INSERT ROW trigger, let it handle the
+ * tuple. Otherwise, proceed with inserting the tuple into the
+ * table or foreign table.
+ */
+ if (has_instead_insert_row_trig)
+ {
+ ExecIRInsertTriggers(estate, resultRelInfo, myslot);
+ }
+ else
+ {
+ /* Compute stored generated columns */
+ if (resultRelInfo->ri_RelationDesc->rd_att->constr &&
+ resultRelInfo->ri_RelationDesc->rd_att->constr->has_generated_stored)
+ ExecComputeStoredGenerated(resultRelInfo, estate, myslot,
+ CMD_INSERT);
+
+ /*
+ * If the target is a plain table, check the constraints of
+ * the tuple.
+ */
+ if (resultRelInfo->ri_FdwRoutine == NULL &&
+ resultRelInfo->ri_RelationDesc->rd_att->constr)
+ ExecConstraints(resultRelInfo, myslot, estate);
+
+ /*
+ * Also check the tuple against the partition constraint, if
+ * there is one; except that if we got here via tuple-routing,
+ * we don't need to if there's no BR trigger defined on the
+ * partition.
+ */
+ if (resultRelInfo->ri_RelationDesc->rd_rel->relispartition &&
+ (proute == NULL || has_before_insert_row_trig))
+ ExecPartitionCheck(resultRelInfo, myslot, estate, true);
+
+ /* Store the slot in the multi-insert buffer, when enabled. */
+ if (insertMethod == CIM_MULTI || leafpart_use_multi_insert)
+ {
+ /*
+ * The slot previously might point into the per-tuple
+ * context. For batching it needs to be longer lived.
+ */
+ ExecMaterializeSlot(myslot);
+
+ /* Add this tuple to the tuple buffer */
+ CopyMultiInsertInfoStore(&multiInsertInfo,
+ resultRelInfo, myslot,
+ cstate->line_buf.len,
+ cstate->cur_lineno);
+
+ /*
+ * If enough inserts have queued up, then flush all
+ * buffers out to their tables.
+ */
+ if (CopyMultiInsertInfoIsFull(&multiInsertInfo))
+ CopyMultiInsertInfoFlush(&multiInsertInfo, resultRelInfo);
+ }
+ else
+ {
+ List *recheckIndexes = NIL;
+
+ /* OK, store the tuple */
+ if (resultRelInfo->ri_FdwRoutine != NULL)
+ {
+ myslot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
+ resultRelInfo,
+ myslot,
+ NULL);
+
+ if (myslot == NULL) /* "do nothing" */
+ continue; /* next tuple please */
+
+ /*
+ * AFTER ROW Triggers might reference the tableoid
+ * column, so (re-)initialize tts_tableOid before
+ * evaluating them.
+ */
+ myslot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+ }
+ else
+ {
+ /* OK, store the tuple and create index entries for it */
+ table_tuple_insert(resultRelInfo->ri_RelationDesc,
+ myslot, mycid, ti_options, bistate);
+
+ if (resultRelInfo->ri_NumIndices > 0)
+ recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
+ myslot,
+ estate,
+ false,
+ false,
+ NULL,
+ NIL);
+ }
+
+ /* AFTER ROW INSERT Triggers */
+ ExecARInsertTriggers(estate, resultRelInfo, myslot,
+ recheckIndexes, cstate->transition_capture);
+
+ list_free(recheckIndexes);
+ }
+ }
+
+ /*
+ * We count only tuples not suppressed by a BEFORE INSERT trigger
+ * or FDW; this is the same definition used by nodeModifyTable.c
+ * for counting tuples inserted by an INSERT command. Update
+ * progress of the COPY command as well.
+ */
+ pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
+ ++processed);
+ }
+ }
+
+ /* Flush any remaining buffered tuples */
+ if (insertMethod != CIM_SINGLE)
+ {
+ if (!CopyMultiInsertInfoIsEmpty(&multiInsertInfo))
+ CopyMultiInsertInfoFlush(&multiInsertInfo, NULL);
+ }
+
+ /* Done, clean up */
+ error_context_stack = errcallback.previous;
+
+ if (bistate != NULL)
+ FreeBulkInsertState(bistate);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ /* Execute AFTER STATEMENT insertion triggers */
+ ExecASInsertTriggers(estate, target_resultRelInfo, cstate->transition_capture);
+
+ /* Handle queued AFTER triggers */
+ AfterTriggerEndQuery(estate);
+
+ ExecResetTupleTable(estate->es_tupleTable, false);
+
+ /* Allow the FDW to shut down */
+ if (target_resultRelInfo->ri_FdwRoutine != NULL &&
+ target_resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
+ target_resultRelInfo->ri_FdwRoutine->EndForeignInsert(estate,
+ target_resultRelInfo);
+
+ /* Tear down the multi-insert buffer data */
+ if (insertMethod != CIM_SINGLE)
+ CopyMultiInsertInfoCleanup(&multiInsertInfo);
+
+ /* Close all the partitioned tables, leaf partitions, and their indices */
+ if (proute)
+ ExecCleanupTupleRouting(mtstate, proute);
+
+ /* Close the result relations, including any trigger target relations */
+ ExecCloseResultRelations(estate);
+ ExecCloseRangeTableRelations(estate);
+
+ FreeExecutorState(estate);
+
+ return processed;
+}
+
+/*
+ * Setup to read tuples from a file for COPY FROM.
+ *
+ * 'rel': Used as a template for the tuples
+ * 'whereClause': WHERE clause from the COPY FROM command
+ * 'filename': Name of server-local file to read, NULL for STDIN
+ * 'is_program': true if 'filename' is program to execute
+ * 'data_source_cb': callback that provides the input data
+ * 'attnamelist': List of char *, columns to include. NIL selects all cols.
+ * 'options': List of DefElem. See copy_opt_item in gram.y for selections.
+ *
+ * Returns a CopyFromState, to be passed to NextCopyFrom and related functions.
+ */
+CopyFromState
+BeginCopyFrom(ParseState *pstate,
+ Relation rel,
+ Node *whereClause,
+ const char *filename,
+ bool is_program,
+ copy_data_source_cb data_source_cb,
+ List *attnamelist,
+ List *options)
+{
+ CopyFromState cstate;
+ bool pipe = (filename == NULL);
+ TupleDesc tupDesc;
+ AttrNumber num_phys_attrs,
+ num_defaults;
+ FmgrInfo *in_functions;
+ Oid *typioparams;
+ int attnum;
+ Oid in_func_oid;
+ int *defmap;
+ ExprState **defexprs;
+ MemoryContext oldcontext;
+ bool volatile_defexprs;
+ const int progress_cols[] = {
+ PROGRESS_COPY_COMMAND,
+ PROGRESS_COPY_TYPE,
+ PROGRESS_COPY_BYTES_TOTAL
+ };
+ int64 progress_vals[] = {
+ PROGRESS_COPY_COMMAND_FROM,
+ 0,
+ 0
+ };
+
+ /* Allocate workspace and zero all fields */
+ cstate = (CopyFromStateData *) palloc0(sizeof(CopyFromStateData));
+
+ /*
+ * We allocate everything used by a cstate in a new memory context. This
+ * avoids memory leaks during repeated use of COPY in a query.
+ */
+ cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
+ "COPY",
+ ALLOCSET_DEFAULT_SIZES);
+
+ oldcontext = MemoryContextSwitchTo(cstate->copycontext);
+
+ /* Extract options from the statement node tree */
+ ProcessCopyOptions(pstate, &cstate->opts, true /* is_from */ , options);
+
+ /* Process the target relation */
+ cstate->rel = rel;
+
+ tupDesc = RelationGetDescr(cstate->rel);
+
+ /* process common options or initialization */
+
+ /* Generate or convert list of attributes to process */
+ cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
+
+ num_phys_attrs = tupDesc->natts;
+
+ /* Convert FORCE_NOT_NULL name list to per-column flags, check validity */
+ cstate->opts.force_notnull_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+ if (cstate->opts.force_notnull)
+ {
+ List *attnums;
+ ListCell *cur;
+
+ attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_notnull);
+
+ foreach(cur, attnums)
+ {
+ int attnum = lfirst_int(cur);
+ Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+ if (!list_member_int(cstate->attnumlist, attnum))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("FORCE_NOT_NULL column \"%s\" not referenced by COPY",
+ NameStr(attr->attname))));
+ cstate->opts.force_notnull_flags[attnum - 1] = true;
+ }
+ }
+
+ /* Convert FORCE_NULL name list to per-column flags, check validity */
+ cstate->opts.force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+ if (cstate->opts.force_null)
+ {
+ List *attnums;
+ ListCell *cur;
+
+ attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_null);
+
+ foreach(cur, attnums)
+ {
+ int attnum = lfirst_int(cur);
+ Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+ if (!list_member_int(cstate->attnumlist, attnum))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("FORCE_NULL column \"%s\" not referenced by COPY",
+ NameStr(attr->attname))));
+ cstate->opts.force_null_flags[attnum - 1] = true;
+ }
+ }
+
+ /* Convert convert_selectively name list to per-column flags */
+ if (cstate->opts.convert_selectively)
+ {
+ List *attnums;
+ ListCell *cur;
+
+ cstate->convert_select_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+
+ attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.convert_select);
+
+ foreach(cur, attnums)
+ {
+ int attnum = lfirst_int(cur);
+ Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+ if (!list_member_int(cstate->attnumlist, attnum))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg_internal("selected column \"%s\" not referenced by COPY",
+ NameStr(attr->attname))));
+ cstate->convert_select_flags[attnum - 1] = true;
+ }
+ }
+
+ /* Use client encoding when ENCODING option is not specified. */
+ if (cstate->opts.file_encoding < 0)
+ cstate->file_encoding = pg_get_client_encoding();
+ else
+ cstate->file_encoding = cstate->opts.file_encoding;
+
+ /*
+ * Look up encoding conversion function.
+ */
+ if (cstate->file_encoding == GetDatabaseEncoding() ||
+ cstate->file_encoding == PG_SQL_ASCII ||
+ GetDatabaseEncoding() == PG_SQL_ASCII)
+ {
+ cstate->need_transcoding = false;
+ }
+ else
+ {
+ cstate->need_transcoding = true;
+ cstate->conversion_proc = FindDefaultConversionProc(cstate->file_encoding,
+ GetDatabaseEncoding());
+ if (!OidIsValid(cstate->conversion_proc))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
+ pg_encoding_to_char(cstate->file_encoding),
+ pg_encoding_to_char(GetDatabaseEncoding()))));
+ }
+
+ cstate->copy_src = COPY_FILE; /* default */
+
+ cstate->whereClause = whereClause;
+
+ /* Initialize state variables */
+ cstate->eol_type = EOL_UNKNOWN;
+ cstate->cur_relname = RelationGetRelationName(cstate->rel);
+ cstate->cur_lineno = 0;
+ cstate->cur_attname = NULL;
+ cstate->cur_attval = NULL;
+
+ /*
+ * Allocate buffers for the input pipeline.
+ *
+ * attribute_buf and raw_buf are used in both text and binary modes, but
+ * input_buf and line_buf only in text mode.
+ */
+ cstate->raw_buf = palloc(RAW_BUF_SIZE + 1);
+ cstate->raw_buf_index = cstate->raw_buf_len = 0;
+ cstate->raw_reached_eof = false;
+
+ if (!cstate->opts.binary)
+ {
+ /*
+ * If encoding conversion is needed, we need another buffer to hold
+ * the converted input data. Otherwise, we can just point input_buf
+ * to the same buffer as raw_buf.
+ */
+ if (cstate->need_transcoding)
+ {
+ cstate->input_buf = (char *) palloc(INPUT_BUF_SIZE + 1);
+ cstate->input_buf_index = cstate->input_buf_len = 0;
+ }
+ else
+ cstate->input_buf = cstate->raw_buf;
+ cstate->input_reached_eof = false;
+
+ initStringInfo(&cstate->line_buf);
+ }
+
+ initStringInfo(&cstate->attribute_buf);
+
+ /* Assign range table, we'll need it in CopyFrom. */
+ if (pstate)
+ cstate->range_table = pstate->p_rtable;
+
+ tupDesc = RelationGetDescr(cstate->rel);
+ num_phys_attrs = tupDesc->natts;
+ num_defaults = 0;
+ volatile_defexprs = false;
+
+ /*
+ * Pick up the required catalog information for each attribute in the
+ * relation, including the input function, the element type (to pass to
+ * the input function), and info about defaults and constraints. (Which
+ * input function we use depends on text/binary format choice.)
+ */
+ in_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
+ typioparams = (Oid *) palloc(num_phys_attrs * sizeof(Oid));
+ defmap = (int *) palloc(num_phys_attrs * sizeof(int));
+ defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
+
+ for (attnum = 1; attnum <= num_phys_attrs; attnum++)
+ {
+ Form_pg_attribute att = TupleDescAttr(tupDesc, attnum - 1);
+
+ /* We don't need info for dropped attributes */
+ if (att->attisdropped)
+ continue;
+
+ /* Fetch the input function and typioparam info */
+ if (cstate->opts.binary)
+ getTypeBinaryInputInfo(att->atttypid,
+ &in_func_oid, &typioparams[attnum - 1]);
+ else
+ getTypeInputInfo(att->atttypid,
+ &in_func_oid, &typioparams[attnum - 1]);
+ fmgr_info(in_func_oid, &in_functions[attnum - 1]);
+
+ /* Get default info if needed */
+ if (!list_member_int(cstate->attnumlist, attnum) && !att->attgenerated)
+ {
+ /* attribute is NOT to be copied from input */
+ /* use default value if one exists */
+ Expr *defexpr = (Expr *) build_column_default(cstate->rel,
+ attnum);
+
+ if (defexpr != NULL)
+ {
+ /* Run the expression through planner */
+ defexpr = expression_planner(defexpr);
+
+ /* Initialize executable expression in copycontext */
+ defexprs[num_defaults] = ExecInitExpr(defexpr, NULL);
+ defmap[num_defaults] = attnum - 1;
+ num_defaults++;
+
+ /*
+ * If a default expression looks at the table being loaded,
+ * then it could give the wrong answer when using
+ * multi-insert. Since database access can be dynamic this is
+ * hard to test for exactly, so we use the much wider test of
+ * whether the default expression is volatile. We allow for
+ * the special case of when the default expression is the
+ * nextval() of a sequence which in this specific case is
+ * known to be safe for use with the multi-insert
+ * optimization. Hence we use this special case function
+ * checker rather than the standard check for
+ * contain_volatile_functions().
+ */
+ if (!volatile_defexprs)
+ volatile_defexprs = contain_volatile_functions_not_nextval((Node *) defexpr);
+ }
+ }
+ }
+
+
+ /* initialize progress */
+ pgstat_progress_start_command(PROGRESS_COMMAND_COPY,
+ cstate->rel ? RelationGetRelid(cstate->rel) : InvalidOid);
+ cstate->bytes_processed = 0;
+
+ /* We keep those variables in cstate. */
+ cstate->in_functions = in_functions;
+ cstate->typioparams = typioparams;
+ cstate->defmap = defmap;
+ cstate->defexprs = defexprs;
+ cstate->volatile_defexprs = volatile_defexprs;
+ cstate->num_defaults = num_defaults;
+ cstate->is_program = is_program;
+
+ if (data_source_cb)
+ {
+ progress_vals[1] = PROGRESS_COPY_TYPE_CALLBACK;
+ cstate->copy_src = COPY_CALLBACK;
+ cstate->data_source_cb = data_source_cb;
+ }
+ else if (pipe)
+ {
+ progress_vals[1] = PROGRESS_COPY_TYPE_PIPE;
+ Assert(!is_program); /* the grammar does not allow this */
+ if (whereToSendOutput == DestRemote)
+ ReceiveCopyBegin(cstate);
+ else
+ cstate->copy_file = stdin;
+ }
+ else
+ {
+ cstate->filename = pstrdup(filename);
+
+ if (cstate->is_program)
+ {
+ progress_vals[1] = PROGRESS_COPY_TYPE_PROGRAM;
+ cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_R);
+ if (cstate->copy_file == NULL)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not execute command \"%s\": %m",
+ cstate->filename)));
+ }
+ else
+ {
+ struct stat st;
+
+ progress_vals[1] = PROGRESS_COPY_TYPE_FILE;
+ cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_R);
+ if (cstate->copy_file == NULL)
+ {
+ /* copy errno because ereport subfunctions might change it */
+ int save_errno = errno;
+
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\" for reading: %m",
+ cstate->filename),
+ (save_errno == ENOENT || save_errno == EACCES) ?
+ errhint("COPY FROM instructs the PostgreSQL server process to read a file. "
+ "You may want a client-side facility such as psql's \\copy.") : 0));
+ }
+
+ if (fstat(fileno(cstate->copy_file), &st))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ cstate->filename)));
+
+ if (S_ISDIR(st.st_mode))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is a directory", cstate->filename)));
+
+ progress_vals[2] = st.st_size;
+ }
+ }
+
+ pgstat_progress_update_multi_param(3, progress_cols, progress_vals);
+
+ if (cstate->opts.binary)
+ {
+ /* Read and verify binary header */
+ ReceiveCopyBinaryHeader(cstate);
+ }
+
+ /* create workspace for CopyReadAttributes results */
+ if (!cstate->opts.binary)
+ {
+ AttrNumber attr_count = list_length(cstate->attnumlist);
+
+ cstate->max_fields = attr_count;
+ cstate->raw_fields = (char **) palloc(attr_count * sizeof(char *));
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return cstate;
+}
+
+/*
+ * Clean up storage and release resources for COPY FROM.
+ */
+void
+EndCopyFrom(CopyFromState cstate)
+{
+ /* No COPY FROM related resources except memory. */
+ if (cstate->is_program)
+ {
+ ClosePipeFromProgram(cstate);
+ }
+ else
+ {
+ if (cstate->filename != NULL && FreeFile(cstate->copy_file))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close file \"%s\": %m",
+ cstate->filename)));
+ }
+
+ pgstat_progress_end_command();
+
+ MemoryContextDelete(cstate->copycontext);
+ pfree(cstate);
+}
+
+/*
+ * Closes the pipe from an external program, checking the pclose() return code.
+ */
+static void
+ClosePipeFromProgram(CopyFromState cstate)
+{
+ int pclose_rc;
+
+ Assert(cstate->is_program);
+
+ pclose_rc = ClosePipeStream(cstate->copy_file);
+ if (pclose_rc == -1)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close pipe to external command: %m")));
+ else if (pclose_rc != 0)
+ {
+ /*
+ * If we ended a COPY FROM PROGRAM before reaching EOF, then it's
+ * expectable for the called program to fail with SIGPIPE, and we
+ * should not report that as an error. Otherwise, SIGPIPE indicates a
+ * problem.
+ */
+ if (!cstate->raw_reached_eof &&
+ wait_result_is_signal(pclose_rc, SIGPIPE))
+ return;
+
+ ereport(ERROR,
+ (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
+ errmsg("program \"%s\" failed",
+ cstate->filename),
+ errdetail_internal("%s", wait_result_to_str(pclose_rc))));
+ }
+}
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
new file mode 100644
index 0000000..097414e
--- /dev/null
+++ b/src/backend/commands/copyfromparse.c
@@ -0,0 +1,1921 @@
+/*-------------------------------------------------------------------------
+ *
+ * copyfromparse.c
+ * Parse CSV/text/binary format for COPY FROM.
+ *
+ * This file contains routines to parse the text, CSV and binary input
+ * formats. The main entry point is NextCopyFrom(), which parses the
+ * next input line and returns it as Datums.
+ *
+ * In text/CSV mode, the parsing happens in multiple stages:
+ *
+ * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf
+ * 1. 2. 3. 4.
+ *
+ * 1. CopyLoadRawBuf() reads raw data from the input file or client, and
+ * places it into 'raw_buf'.
+ *
+ * 2. CopyConvertBuf() calls the encoding conversion function to convert
+ * the data in 'raw_buf' from client to server encoding, placing the
+ * converted result in 'input_buf'.
+ *
+ * 3. CopyReadLine() parses the data in 'input_buf', one line at a time.
+ * It is responsible for finding the next newline marker, taking quote and
+ * escape characters into account according to the COPY options. The line
+ * is copied into 'line_buf', with quotes and escape characters still
+ * intact.
+ *
+ * 4. CopyReadAttributesText/CSV() function takes the input line from
+ * 'line_buf', and splits it into fields, unescaping the data as required.
+ * The fields are stored in 'attribute_buf', and 'raw_fields' array holds
+ * pointers to each field.
+ *
+ * If encoding conversion is not required, a shortcut is taken in step 2 to
+ * avoid copying the data unnecessarily. The 'input_buf' pointer is set to
+ * point directly to 'raw_buf', so that CopyLoadRawBuf() loads the raw data
+ * directly into 'input_buf'. CopyConvertBuf() then merely validates that
+ * the data is valid in the current encoding.
+ *
+ * In binary mode, the pipeline is much simpler. Input is loaded into
+ * 'raw_buf', and encoding conversion is done in the datatype-specific
+ * receive functions, if required. 'input_buf' and 'line_buf' are not used,
+ * but 'attribute_buf' is used as a temporary buffer to hold one attribute's
+ * data when it's passed the receive function.
+ *
+ * 'raw_buf' is always 64 kB in size (RAW_BUF_SIZE). 'input_buf' is also
+ * 64 kB (INPUT_BUF_SIZE), if encoding conversion is required. 'line_buf'
+ * and 'attribute_buf' are expanded on demand, to hold the longest line
+ * encountered so far.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/copyfromparse.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "commands/copy.h"
+#include "commands/copyfrom_internal.h"
+#include "commands/progress.h"
+#include "executor/executor.h"
+#include "libpq/libpq.h"
+#include "libpq/pqformat.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "port/pg_bswap.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+#define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
+#define OCTVALUE(c) ((c) - '0')
+
+/*
+ * These macros centralize code used to process line_buf and input_buf buffers.
+ * They are macros because they often do continue/break control and to avoid
+ * function call overhead in tight COPY loops.
+ *
+ * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
+ * prevent the continue/break processing from working. We end the "if (1)"
+ * with "else ((void) 0)" to ensure the "if" does not unintentionally match
+ * any "else" in the calling code, and to avoid any compiler warnings about
+ * empty statements. See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
+ */
+
+/*
+ * This keeps the character read at the top of the loop in the buffer
+ * even if there is more than one read-ahead.
+ */
+#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
+if (1) \
+{ \
+ if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
+ { \
+ input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
+ need_data = true; \
+ continue; \
+ } \
+} else ((void) 0)
+
+/* This consumes the remainder of the buffer and breaks */
+#define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
+if (1) \
+{ \
+ if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
+ { \
+ if (extralen) \
+ input_buf_ptr = copy_buf_len; /* consume the partial character */ \
+ /* backslash just before EOF, treat as data char */ \
+ result = true; \
+ break; \
+ } \
+} else ((void) 0)
+
+/*
+ * Transfer any approved data to line_buf; must do this to be sure
+ * there is some room in input_buf.
+ */
+#define REFILL_LINEBUF \
+if (1) \
+{ \
+ if (input_buf_ptr > cstate->input_buf_index) \
+ { \
+ appendBinaryStringInfo(&cstate->line_buf, \
+ cstate->input_buf + cstate->input_buf_index, \
+ input_buf_ptr - cstate->input_buf_index); \
+ cstate->input_buf_index = input_buf_ptr; \
+ } \
+} else ((void) 0)
+
+/* Undo any read-ahead and jump out of the block. */
+#define NO_END_OF_COPY_GOTO \
+if (1) \
+{ \
+ input_buf_ptr = prev_raw_ptr + 1; \
+ goto not_end_of_copy; \
+} else ((void) 0)
+
+/* NOTE: there's a copy of this in copyto.c */
+static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
+
+
+/* non-export function prototypes */
+static bool CopyReadLine(CopyFromState cstate);
+static bool CopyReadLineText(CopyFromState cstate);
+static int CopyReadAttributesText(CopyFromState cstate);
+static int CopyReadAttributesCSV(CopyFromState cstate);
+static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
+ Oid typioparam, int32 typmod,
+ bool *isnull);
+
+
+/* Low-level communications functions */
+static int CopyGetData(CopyFromState cstate, void *databuf,
+ int minread, int maxread);
+static inline bool CopyGetInt32(CopyFromState cstate, int32 *val);
+static inline bool CopyGetInt16(CopyFromState cstate, int16 *val);
+static void CopyLoadInputBuf(CopyFromState cstate);
+static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes);
+
+void
+ReceiveCopyBegin(CopyFromState cstate)
+{
+ StringInfoData buf;
+ int natts = list_length(cstate->attnumlist);
+ int16 format = (cstate->opts.binary ? 1 : 0);
+ int i;
+
+ pq_beginmessage(&buf, 'G');
+ pq_sendbyte(&buf, format); /* overall format */
+ pq_sendint16(&buf, natts);
+ for (i = 0; i < natts; i++)
+ pq_sendint16(&buf, format); /* per-column formats */
+ pq_endmessage(&buf);
+ cstate->copy_src = COPY_FRONTEND;
+ cstate->fe_msgbuf = makeStringInfo();
+ /* We *must* flush here to ensure FE knows it can send. */
+ pq_flush();
+}
+
+void
+ReceiveCopyBinaryHeader(CopyFromState cstate)
+{
+ char readSig[11];
+ int32 tmp;
+
+ /* Signature */
+ if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
+ memcmp(readSig, BinarySignature, 11) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("COPY file signature not recognized")));
+ /* Flags field */
+ if (!CopyGetInt32(cstate, &tmp))
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("invalid COPY file header (missing flags)")));
+ if ((tmp & (1 << 16)) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("invalid COPY file header (WITH OIDS)")));
+ tmp &= ~(1 << 16);
+ if ((tmp >> 16) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("unrecognized critical flags in COPY file header")));
+ /* Header extension length */
+ if (!CopyGetInt32(cstate, &tmp) ||
+ tmp < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("invalid COPY file header (missing length)")));
+ /* Skip extension header, if present */
+ while (tmp-- > 0)
+ {
+ if (CopyReadBinaryData(cstate, readSig, 1) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("invalid COPY file header (wrong length)")));
+ }
+}
+
+/*
+ * CopyGetData reads data from the source (file or frontend)
+ *
+ * We attempt to read at least minread, and at most maxread, bytes from
+ * the source. The actual number of bytes read is returned; if this is
+ * less than minread, EOF was detected.
+ *
+ * Note: when copying from the frontend, we expect a proper EOF mark per
+ * protocol; if the frontend simply drops the connection, we raise error.
+ * It seems unwise to allow the COPY IN to complete normally in that case.
+ *
+ * NB: no data conversion is applied here.
+ */
+static int
+CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
+{
+ int bytesread = 0;
+
+ switch (cstate->copy_src)
+ {
+ case COPY_FILE:
+ bytesread = fread(databuf, 1, maxread, cstate->copy_file);
+ if (ferror(cstate->copy_file))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read from COPY file: %m")));
+ if (bytesread == 0)
+ cstate->raw_reached_eof = true;
+ break;
+ case COPY_FRONTEND:
+ while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
+ {
+ int avail;
+
+ while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
+ {
+ /* Try to receive another message */
+ int mtype;
+ int maxmsglen;
+
+ readmessage:
+ HOLD_CANCEL_INTERRUPTS();
+ pq_startmsgread();
+ mtype = pq_getbyte();
+ if (mtype == EOF)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONNECTION_FAILURE),
+ errmsg("unexpected EOF on client connection with an open transaction")));
+ /* Validate message type and set packet size limit */
+ switch (mtype)
+ {
+ case 'd': /* CopyData */
+ maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
+ break;
+ case 'c': /* CopyDone */
+ case 'f': /* CopyFail */
+ case 'H': /* Flush */
+ case 'S': /* Sync */
+ maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_PROTOCOL_VIOLATION),
+ errmsg("unexpected message type 0x%02X during COPY from stdin",
+ mtype)));
+ maxmsglen = 0; /* keep compiler quiet */
+ break;
+ }
+ /* Now collect the message body */
+ if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
+ ereport(ERROR,
+ (errcode(ERRCODE_CONNECTION_FAILURE),
+ errmsg("unexpected EOF on client connection with an open transaction")));
+ RESUME_CANCEL_INTERRUPTS();
+ /* ... and process it */
+ switch (mtype)
+ {
+ case 'd': /* CopyData */
+ break;
+ case 'c': /* CopyDone */
+ /* COPY IN correctly terminated by frontend */
+ cstate->raw_reached_eof = true;
+ return bytesread;
+ case 'f': /* CopyFail */
+ ereport(ERROR,
+ (errcode(ERRCODE_QUERY_CANCELED),
+ errmsg("COPY from stdin failed: %s",
+ pq_getmsgstring(cstate->fe_msgbuf))));
+ break;
+ case 'H': /* Flush */
+ case 'S': /* Sync */
+
+ /*
+ * Ignore Flush/Sync for the convenience of client
+ * libraries (such as libpq) that may send those
+ * without noticing that the command they just
+ * sent was COPY.
+ */
+ goto readmessage;
+ default:
+ Assert(false); /* NOT REACHED */
+ }
+ }
+ avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
+ if (avail > maxread)
+ avail = maxread;
+ pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
+ databuf = (void *) ((char *) databuf + avail);
+ maxread -= avail;
+ bytesread += avail;
+ }
+ break;
+ case COPY_CALLBACK:
+ bytesread = cstate->data_source_cb(databuf, minread, maxread);
+ break;
+ }
+
+ return bytesread;
+}
+
+
+/*
+ * These functions do apply some data conversion
+ */
+
+/*
+ * CopyGetInt32 reads an int32 that appears in network byte order
+ *
+ * Returns true if OK, false if EOF
+ */
+static inline bool
+CopyGetInt32(CopyFromState cstate, int32 *val)
+{
+ uint32 buf;
+
+ if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
+ {
+ *val = 0; /* suppress compiler warning */
+ return false;
+ }
+ *val = (int32) pg_ntoh32(buf);
+ return true;
+}
+
+/*
+ * CopyGetInt16 reads an int16 that appears in network byte order
+ */
+static inline bool
+CopyGetInt16(CopyFromState cstate, int16 *val)
+{
+ uint16 buf;
+
+ if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
+ {
+ *val = 0; /* suppress compiler warning */
+ return false;
+ }
+ *val = (int16) pg_ntoh16(buf);
+ return true;
+}
+
+
+/*
+ * Perform encoding conversion on data in 'raw_buf', writing the converted
+ * data into 'input_buf'.
+ *
+ * On entry, there must be some data to convert in 'raw_buf'.
+ */
+static void
+CopyConvertBuf(CopyFromState cstate)
+{
+ /*
+ * If the file and server encoding are the same, no encoding conversion is
+ * required. However, we still need to verify that the input is valid for
+ * the encoding.
+ */
+ if (!cstate->need_transcoding)
+ {
+ /*
+ * When conversion is not required, input_buf and raw_buf are the
+ * same. raw_buf_len is the total number of bytes in the buffer, and
+ * input_buf_len tracks how many of those bytes have already been
+ * verified.
+ */
+ int preverifiedlen = cstate->input_buf_len;
+ int unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
+ int nverified;
+
+ if (unverifiedlen == 0)
+ {
+ /*
+ * If no more raw data is coming, report the EOF to the caller.
+ */
+ if (cstate->raw_reached_eof)
+ cstate->input_reached_eof = true;
+ return;
+ }
+
+ /*
+ * Verify the new data, including any residual unverified bytes from
+ * previous round.
+ */
+ nverified = pg_encoding_verifymbstr(cstate->file_encoding,
+ cstate->raw_buf + preverifiedlen,
+ unverifiedlen);
+ if (nverified == 0)
+ {
+ /*
+ * Could not verify anything.
+ *
+ * If there is no more raw input data coming, it means that there
+ * was an incomplete multi-byte sequence at the end. Also, if
+ * there's "enough" input left, we should be able to verify at
+ * least one character, and a failure to do so means that we've
+ * hit an invalid byte sequence.
+ */
+ if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding))
+ cstate->input_reached_error = true;
+ return;
+ }
+ cstate->input_buf_len += nverified;
+ }
+ else
+ {
+ /*
+ * Encoding conversion is needed.
+ */
+ int nbytes;
+ unsigned char *src;
+ int srclen;
+ unsigned char *dst;
+ int dstlen;
+ int convertedlen;
+
+ if (RAW_BUF_BYTES(cstate) == 0)
+ {
+ /*
+ * If no more raw data is coming, report the EOF to the caller.
+ */
+ if (cstate->raw_reached_eof)
+ cstate->input_reached_eof = true;
+ return;
+ }
+
+ /*
+ * First, copy down any unprocessed data.
+ */
+ nbytes = INPUT_BUF_BYTES(cstate);
+ if (nbytes > 0 && cstate->input_buf_index > 0)
+ memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
+ nbytes);
+ cstate->input_buf_index = 0;
+ cstate->input_buf_len = nbytes;
+ cstate->input_buf[nbytes] = '\0';
+
+ src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
+ srclen = cstate->raw_buf_len - cstate->raw_buf_index;
+ dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
+ dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
+
+ /*
+ * Do the conversion. This might stop short, if there is an invalid
+ * byte sequence in the input. We'll convert as much as we can in
+ * that case.
+ *
+ * Note: Even if we hit an invalid byte sequence, we don't report the
+ * error until all the valid bytes have been consumed. The input
+ * might contain an end-of-input marker (\.), and we don't want to
+ * report an error if the invalid byte sequence is after the
+ * end-of-input marker. We might unnecessarily convert some data
+ * after the end-of-input marker as long as it's valid for the
+ * encoding, but that's harmless.
+ */
+ convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
+ cstate->file_encoding,
+ GetDatabaseEncoding(),
+ src, srclen,
+ dst, dstlen,
+ true);
+ if (convertedlen == 0)
+ {
+ /*
+ * Could not convert anything. If there is no more raw input data
+ * coming, it means that there was an incomplete multi-byte
+ * sequence at the end. Also, if there is plenty of input left,
+ * we should be able to convert at least one character, so a
+ * failure to do so must mean that we've hit a byte sequence
+ * that's invalid.
+ */
+ if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
+ cstate->input_reached_error = true;
+ return;
+ }
+ cstate->raw_buf_index += convertedlen;
+ cstate->input_buf_len += strlen((char *) dst);
+ }
+}
+
+/*
+ * Report an encoding or conversion error.
+ */
+static void
+CopyConversionError(CopyFromState cstate)
+{
+ Assert(cstate->raw_buf_len > 0);
+ Assert(cstate->input_reached_error);
+
+ if (!cstate->need_transcoding)
+ {
+ /*
+ * Everything up to input_buf_len was successfully verified, and
+ * input_buf_len points to the invalid or incomplete character.
+ */
+ report_invalid_encoding(cstate->file_encoding,
+ cstate->raw_buf + cstate->input_buf_len,
+ cstate->raw_buf_len - cstate->input_buf_len);
+ }
+ else
+ {
+ /*
+ * raw_buf_index points to the invalid or untranslatable character. We
+ * let the conversion routine report the error, because it can provide
+ * a more specific error message than we could here. An earlier call
+ * to the conversion routine in CopyConvertBuf() detected that there
+ * is an error, now we call the conversion routine again with
+ * noError=false, to have it throw the error.
+ */
+ unsigned char *src;
+ int srclen;
+ unsigned char *dst;
+ int dstlen;
+
+ src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
+ srclen = cstate->raw_buf_len - cstate->raw_buf_index;
+ dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
+ dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
+
+ (void) pg_do_encoding_conversion_buf(cstate->conversion_proc,
+ cstate->file_encoding,
+ GetDatabaseEncoding(),
+ src, srclen,
+ dst, dstlen,
+ false);
+
+ /*
+ * The conversion routine should have reported an error, so this
+ * should not be reached.
+ */
+ elog(ERROR, "encoding conversion failed without error");
+ }
+}
+
+/*
+ * Load more data from data source to raw_buf.
+ *
+ * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the
+ * beginning of the buffer, and we load new data after that.
+ */
+static void
+CopyLoadRawBuf(CopyFromState cstate)
+{
+ int nbytes;
+ int inbytes;
+
+ /*
+ * In text mode, if encoding conversion is not required, raw_buf and
+ * input_buf point to the same buffer. Their len/index better agree, too.
+ */
+ if (cstate->raw_buf == cstate->input_buf)
+ {
+ Assert(!cstate->need_transcoding);
+ Assert(cstate->raw_buf_index == cstate->input_buf_index);
+ Assert(cstate->input_buf_len <= cstate->raw_buf_len);
+ }
+
+ /*
+ * Copy down the unprocessed data if any.
+ */
+ nbytes = RAW_BUF_BYTES(cstate);
+ if (nbytes > 0 && cstate->raw_buf_index > 0)
+ memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
+ nbytes);
+ cstate->raw_buf_len -= cstate->raw_buf_index;
+ cstate->raw_buf_index = 0;
+
+ /*
+ * If raw_buf and input_buf are in fact the same buffer, adjust the
+ * input_buf variables, too.
+ */
+ if (cstate->raw_buf == cstate->input_buf)
+ {
+ cstate->input_buf_len -= cstate->input_buf_index;
+ cstate->input_buf_index = 0;
+ }
+
+ /* Load more data */
+ inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
+ 1, RAW_BUF_SIZE - cstate->raw_buf_len);
+ nbytes += inbytes;
+ cstate->raw_buf[nbytes] = '\0';
+ cstate->raw_buf_len = nbytes;
+
+ cstate->bytes_processed += inbytes;
+ pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
+
+ if (inbytes == 0)
+ cstate->raw_reached_eof = true;
+}
+
+/*
+ * CopyLoadInputBuf loads some more data into input_buf
+ *
+ * On return, at least one more input character is loaded into
+ * input_buf, or input_reached_eof is set.
+ *
+ * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
+ * of the buffer and then we load more data after that.
+ */
+static void
+CopyLoadInputBuf(CopyFromState cstate)
+{
+ int nbytes = INPUT_BUF_BYTES(cstate);
+
+ /*
+ * The caller has updated input_buf_index to indicate how much of the
+ * input has been consumed and isn't needed anymore. If input_buf is the
+ * same physical area as raw_buf, update raw_buf_index accordingly.
+ */
+ if (cstate->raw_buf == cstate->input_buf)
+ {
+ Assert(!cstate->need_transcoding);
+ Assert(cstate->input_buf_index >= cstate->raw_buf_index);
+ cstate->raw_buf_index = cstate->input_buf_index;
+ }
+
+ for (;;)
+ {
+ /* If we now have some unconverted data, try to convert it */
+ CopyConvertBuf(cstate);
+
+ /* If we now have some more input bytes ready, return them */
+ if (INPUT_BUF_BYTES(cstate) > nbytes)
+ return;
+
+ /*
+ * If we reached an invalid byte sequence, or we're at an incomplete
+ * multi-byte character but there is no more raw input data, report
+ * conversion error.
+ */
+ if (cstate->input_reached_error)
+ CopyConversionError(cstate);
+
+ /* no more input, and everything has been converted */
+ if (cstate->input_reached_eof)
+ break;
+
+ /* Try to load more raw data */
+ Assert(!cstate->raw_reached_eof);
+ CopyLoadRawBuf(cstate);
+ }
+}
+
+/*
+ * CopyReadBinaryData
+ *
+ * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
+ * and writes them to 'dest'. Returns the number of bytes read (which
+ * would be less than 'nbytes' only if we reach EOF).
+ */
+static int
+CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
+{
+ int copied_bytes = 0;
+
+ if (RAW_BUF_BYTES(cstate) >= nbytes)
+ {
+ /* Enough bytes are present in the buffer. */
+ memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
+ cstate->raw_buf_index += nbytes;
+ copied_bytes = nbytes;
+ }
+ else
+ {
+ /*
+ * Not enough bytes in the buffer, so must read from the file. Need
+ * to loop since 'nbytes' could be larger than the buffer size.
+ */
+ do
+ {
+ int copy_bytes;
+
+ /* Load more data if buffer is empty. */
+ if (RAW_BUF_BYTES(cstate) == 0)
+ {
+ CopyLoadRawBuf(cstate);
+ if (cstate->raw_reached_eof)
+ break; /* EOF */
+ }
+
+ /* Transfer some bytes. */
+ copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
+ memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
+ cstate->raw_buf_index += copy_bytes;
+ dest += copy_bytes;
+ copied_bytes += copy_bytes;
+ } while (copied_bytes < nbytes);
+ }
+
+ return copied_bytes;
+}
+
+/*
+ * Read raw fields in the next line for COPY FROM in text or csv mode.
+ * Return false if no more lines.
+ *
+ * An internal temporary buffer is returned via 'fields'. It is valid until
+ * the next call of the function. Since the function returns all raw fields
+ * in the input file, 'nfields' could be different from the number of columns
+ * in the relation.
+ *
+ * NOTE: force_not_null option are not applied to the returned fields.
+ */
+bool
+NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
+{
+ int fldct;
+ bool done;
+
+ /* only available for text or csv input */
+ Assert(!cstate->opts.binary);
+
+ /* on input check that the header line is correct if needed */
+ if (cstate->cur_lineno == 0 && cstate->opts.header_line)
+ {
+ ListCell *cur;
+ TupleDesc tupDesc;
+
+ tupDesc = RelationGetDescr(cstate->rel);
+
+ cstate->cur_lineno++;
+ done = CopyReadLine(cstate);
+
+ if (cstate->opts.header_line == COPY_HEADER_MATCH)
+ {
+ int fldnum;
+
+ if (cstate->opts.csv_mode)
+ fldct = CopyReadAttributesCSV(cstate);
+ else
+ fldct = CopyReadAttributesText(cstate);
+
+ if (fldct != list_length(cstate->attnumlist))
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("wrong number of fields in header line: got %d, expected %d",
+ fldct, list_length(cstate->attnumlist))));
+
+ fldnum = 0;
+ foreach(cur, cstate->attnumlist)
+ {
+ int attnum = lfirst_int(cur);
+ char *colName;
+ Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+ Assert(fldnum < cstate->max_fields);
+
+ colName = cstate->raw_fields[fldnum++];
+ if (colName == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
+ fldnum, cstate->opts.null_print, NameStr(attr->attname))));
+
+ if (namestrcmp(&attr->attname, colName) != 0)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
+ fldnum, colName, NameStr(attr->attname))));
+ }
+ }
+ }
+
+ if (done)
+ return false;
+ }
+
+ cstate->cur_lineno++;
+
+ /* Actually read the line into memory here */
+ done = CopyReadLine(cstate);
+
+ /*
+ * EOF at start of line means we're done. If we see EOF after some
+ * characters, we act as though it was newline followed by EOF, ie,
+ * process the line and then exit loop on next iteration.
+ */
+ if (done && cstate->line_buf.len == 0)
+ return false;
+
+ /* Parse the line into de-escaped field values */
+ if (cstate->opts.csv_mode)
+ fldct = CopyReadAttributesCSV(cstate);
+ else
+ fldct = CopyReadAttributesText(cstate);
+
+ *fields = cstate->raw_fields;
+ *nfields = fldct;
+ return true;
+}
+
+/*
+ * Read next tuple from file for COPY FROM. Return false if no more tuples.
+ *
+ * 'econtext' is used to evaluate default expression for each column not
+ * read from the file. It can be NULL when no default values are used, i.e.
+ * when all columns are read from the file.
+ *
+ * 'values' and 'nulls' arrays must be the same length as columns of the
+ * relation passed to BeginCopyFrom. This function fills the arrays.
+ */
+bool
+NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
+ Datum *values, bool *nulls)
+{
+ TupleDesc tupDesc;
+ AttrNumber num_phys_attrs,
+ attr_count,
+ num_defaults = cstate->num_defaults;
+ FmgrInfo *in_functions = cstate->in_functions;
+ Oid *typioparams = cstate->typioparams;
+ int i;
+ int *defmap = cstate->defmap;
+ ExprState **defexprs = cstate->defexprs;
+
+ tupDesc = RelationGetDescr(cstate->rel);
+ num_phys_attrs = tupDesc->natts;
+ attr_count = list_length(cstate->attnumlist);
+
+ /* Initialize all values for row to NULL */
+ MemSet(values, 0, num_phys_attrs * sizeof(Datum));
+ MemSet(nulls, true, num_phys_attrs * sizeof(bool));
+
+ if (!cstate->opts.binary)
+ {
+ char **field_strings;
+ ListCell *cur;
+ int fldct;
+ int fieldno;
+ char *string;
+
+ /* read raw fields in the next line */
+ if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
+ return false;
+
+ /* check for overflowing fields */
+ if (attr_count > 0 && fldct > attr_count)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("extra data after last expected column")));
+
+ fieldno = 0;
+
+ /* Loop to read the user attributes on the line. */
+ foreach(cur, cstate->attnumlist)
+ {
+ int attnum = lfirst_int(cur);
+ int m = attnum - 1;
+ Form_pg_attribute att = TupleDescAttr(tupDesc, m);
+
+ if (fieldno >= fldct)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("missing data for column \"%s\"",
+ NameStr(att->attname))));
+ string = field_strings[fieldno++];
+
+ if (cstate->convert_select_flags &&
+ !cstate->convert_select_flags[m])
+ {
+ /* ignore input field, leaving column as NULL */
+ continue;
+ }
+
+ if (cstate->opts.csv_mode)
+ {
+ if (string == NULL &&
+ cstate->opts.force_notnull_flags[m])
+ {
+ /*
+ * FORCE_NOT_NULL option is set and column is NULL -
+ * convert it to the NULL string.
+ */
+ string = cstate->opts.null_print;
+ }
+ else if (string != NULL && cstate->opts.force_null_flags[m]
+ && strcmp(string, cstate->opts.null_print) == 0)
+ {
+ /*
+ * FORCE_NULL option is set and column matches the NULL
+ * string. It must have been quoted, or otherwise the
+ * string would already have been set to NULL. Convert it
+ * to NULL as specified.
+ */
+ string = NULL;
+ }
+ }
+
+ cstate->cur_attname = NameStr(att->attname);
+ cstate->cur_attval = string;
+ values[m] = InputFunctionCall(&in_functions[m],
+ string,
+ typioparams[m],
+ att->atttypmod);
+ if (string != NULL)
+ nulls[m] = false;
+ cstate->cur_attname = NULL;
+ cstate->cur_attval = NULL;
+ }
+
+ Assert(fieldno == attr_count);
+ }
+ else
+ {
+ /* binary */
+ int16 fld_count;
+ ListCell *cur;
+
+ cstate->cur_lineno++;
+
+ if (!CopyGetInt16(cstate, &fld_count))
+ {
+ /* EOF detected (end of file, or protocol-level EOF) */
+ return false;
+ }
+
+ if (fld_count == -1)
+ {
+ /*
+ * Received EOF marker. Wait for the protocol-level EOF, and
+ * complain if it doesn't come immediately. In COPY FROM STDIN,
+ * this ensures that we correctly handle CopyFail, if client
+ * chooses to send that now. When copying from file, we could
+ * ignore the rest of the file like in text mode, but we choose to
+ * be consistent with the COPY FROM STDIN case.
+ */
+ char dummy;
+
+ if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("received copy data after EOF marker")));
+ return false;
+ }
+
+ if (fld_count != attr_count)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("row field count is %d, expected %d",
+ (int) fld_count, attr_count)));
+
+ foreach(cur, cstate->attnumlist)
+ {
+ int attnum = lfirst_int(cur);
+ int m = attnum - 1;
+ Form_pg_attribute att = TupleDescAttr(tupDesc, m);
+
+ cstate->cur_attname = NameStr(att->attname);
+ values[m] = CopyReadBinaryAttribute(cstate,
+ &in_functions[m],
+ typioparams[m],
+ att->atttypmod,
+ &nulls[m]);
+ cstate->cur_attname = NULL;
+ }
+ }
+
+ /*
+ * Now compute and insert any defaults available for the columns not
+ * provided by the input data. Anything not processed here or above will
+ * remain NULL.
+ */
+ for (i = 0; i < num_defaults; i++)
+ {
+ /*
+ * The caller must supply econtext and have switched into the
+ * per-tuple memory context in it.
+ */
+ Assert(econtext != NULL);
+ Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
+
+ values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext,
+ &nulls[defmap[i]]);
+ }
+
+ return true;
+}
+
+/*
+ * Read the next input line and stash it in line_buf.
+ *
+ * Result is true if read was terminated by EOF, false if terminated
+ * by newline. The terminating newline or EOF marker is not included
+ * in the final value of line_buf.
+ */
+static bool
+CopyReadLine(CopyFromState cstate)
+{
+ bool result;
+
+ resetStringInfo(&cstate->line_buf);
+ cstate->line_buf_valid = false;
+
+ /* Parse data and transfer into line_buf */
+ result = CopyReadLineText(cstate);
+
+ if (result)
+ {
+ /*
+ * Reached EOF. In protocol version 3, we should ignore anything
+ * after \. up to the protocol end of copy data. (XXX maybe better
+ * not to treat \. as special?)
+ */
+ if (cstate->copy_src == COPY_FRONTEND)
+ {
+ int inbytes;
+
+ do
+ {
+ inbytes = CopyGetData(cstate, cstate->input_buf,
+ 1, INPUT_BUF_SIZE);
+ } while (inbytes > 0);
+ cstate->input_buf_index = 0;
+ cstate->input_buf_len = 0;
+ cstate->raw_buf_index = 0;
+ cstate->raw_buf_len = 0;
+ }
+ }
+ else
+ {
+ /*
+ * If we didn't hit EOF, then we must have transferred the EOL marker
+ * to line_buf along with the data. Get rid of it.
+ */
+ switch (cstate->eol_type)
+ {
+ case EOL_NL:
+ Assert(cstate->line_buf.len >= 1);
+ Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
+ cstate->line_buf.len--;
+ cstate->line_buf.data[cstate->line_buf.len] = '\0';
+ break;
+ case EOL_CR:
+ Assert(cstate->line_buf.len >= 1);
+ Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
+ cstate->line_buf.len--;
+ cstate->line_buf.data[cstate->line_buf.len] = '\0';
+ break;
+ case EOL_CRNL:
+ Assert(cstate->line_buf.len >= 2);
+ Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
+ Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
+ cstate->line_buf.len -= 2;
+ cstate->line_buf.data[cstate->line_buf.len] = '\0';
+ break;
+ case EOL_UNKNOWN:
+ /* shouldn't get here */
+ Assert(false);
+ break;
+ }
+ }
+
+ /* Now it's safe to use the buffer in error messages */
+ cstate->line_buf_valid = true;
+
+ return result;
+}
+
+/*
+ * CopyReadLineText - inner loop of CopyReadLine for text mode
+ */
+static bool
+CopyReadLineText(CopyFromState cstate)
+{
+ char *copy_input_buf;
+ int input_buf_ptr;
+ int copy_buf_len;
+ bool need_data = false;
+ bool hit_eof = false;
+ bool result = false;
+
+ /* CSV variables */
+ bool first_char_in_line = true;
+ bool in_quote = false,
+ last_was_esc = false;
+ char quotec = '\0';
+ char escapec = '\0';
+
+ if (cstate->opts.csv_mode)
+ {
+ quotec = cstate->opts.quote[0];
+ escapec = cstate->opts.escape[0];
+ /* ignore special escape processing if it's the same as quotec */
+ if (quotec == escapec)
+ escapec = '\0';
+ }
+
+ /*
+ * The objective of this loop is to transfer the entire next input line
+ * into line_buf. Hence, we only care for detecting newlines (\r and/or
+ * \n) and the end-of-copy marker (\.).
+ *
+ * In CSV mode, \r and \n inside a quoted field are just part of the data
+ * value and are put in line_buf. We keep just enough state to know if we
+ * are currently in a quoted field or not.
+ *
+ * These four characters, and the CSV escape and quote characters, are
+ * assumed the same in frontend and backend encodings.
+ *
+ * The input has already been converted to the database encoding. All
+ * supported server encodings have the property that all bytes in a
+ * multi-byte sequence have the high bit set, so a multibyte character
+ * cannot contain any newline or escape characters embedded in the
+ * multibyte sequence. Therefore, we can process the input byte-by-byte,
+ * regardless of the encoding.
+ *
+ * For speed, we try to move data from input_buf to line_buf in chunks
+ * rather than one character at a time. input_buf_ptr points to the next
+ * character to examine; any characters from input_buf_index to
+ * input_buf_ptr have been determined to be part of the line, but not yet
+ * transferred to line_buf.
+ *
+ * For a little extra speed within the loop, we copy input_buf and
+ * input_buf_len into local variables.
+ */
+ copy_input_buf = cstate->input_buf;
+ input_buf_ptr = cstate->input_buf_index;
+ copy_buf_len = cstate->input_buf_len;
+
+ for (;;)
+ {
+ int prev_raw_ptr;
+ char c;
+
+ /*
+ * Load more data if needed.
+ *
+ * TODO: We could just force four bytes of read-ahead and avoid the
+ * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(). That was
+ * unsafe with the old v2 COPY protocol, but we don't support that
+ * anymore.
+ */
+ if (input_buf_ptr >= copy_buf_len || need_data)
+ {
+ REFILL_LINEBUF;
+
+ CopyLoadInputBuf(cstate);
+ /* update our local variables */
+ hit_eof = cstate->input_reached_eof;
+ input_buf_ptr = cstate->input_buf_index;
+ copy_buf_len = cstate->input_buf_len;
+
+ /*
+ * If we are completely out of data, break out of the loop,
+ * reporting EOF.
+ */
+ if (INPUT_BUF_BYTES(cstate) <= 0)
+ {
+ result = true;
+ break;
+ }
+ need_data = false;
+ }
+
+ /* OK to fetch a character */
+ prev_raw_ptr = input_buf_ptr;
+ c = copy_input_buf[input_buf_ptr++];
+
+ if (cstate->opts.csv_mode)
+ {
+ /*
+ * If character is '\\' or '\r', we may need to look ahead below.
+ * Force fetch of the next character if we don't already have it.
+ * We need to do this before changing CSV state, in case one of
+ * these characters is also the quote or escape character.
+ */
+ if (c == '\\' || c == '\r')
+ {
+ IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
+ }
+
+ /*
+ * Dealing with quotes and escapes here is mildly tricky. If the
+ * quote char is also the escape char, there's no problem - we
+ * just use the char as a toggle. If they are different, we need
+ * to ensure that we only take account of an escape inside a
+ * quoted field and immediately preceding a quote char, and not
+ * the second in an escape-escape sequence.
+ */
+ if (in_quote && c == escapec)
+ last_was_esc = !last_was_esc;
+ if (c == quotec && !last_was_esc)
+ in_quote = !in_quote;
+ if (c != escapec)
+ last_was_esc = false;
+
+ /*
+ * Updating the line count for embedded CR and/or LF chars is
+ * necessarily a little fragile - this test is probably about the
+ * best we can do. (XXX it's arguable whether we should do this
+ * at all --- is cur_lineno a physical or logical count?)
+ */
+ if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
+ cstate->cur_lineno++;
+ }
+
+ /* Process \r */
+ if (c == '\r' && (!cstate->opts.csv_mode || !in_quote))
+ {
+ /* Check for \r\n on first line, _and_ handle \r\n. */
+ if (cstate->eol_type == EOL_UNKNOWN ||
+ cstate->eol_type == EOL_CRNL)
+ {
+ /*
+ * If need more data, go back to loop top to load it.
+ *
+ * Note that if we are at EOF, c will wind up as '\0' because
+ * of the guaranteed pad of input_buf.
+ */
+ IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
+
+ /* get next char */
+ c = copy_input_buf[input_buf_ptr];
+
+ if (c == '\n')
+ {
+ input_buf_ptr++; /* eat newline */
+ cstate->eol_type = EOL_CRNL; /* in case not set yet */
+ }
+ else
+ {
+ /* found \r, but no \n */
+ if (cstate->eol_type == EOL_CRNL)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ !cstate->opts.csv_mode ?
+ errmsg("literal carriage return found in data") :
+ errmsg("unquoted carriage return found in data"),
+ !cstate->opts.csv_mode ?
+ errhint("Use \"\\r\" to represent carriage return.") :
+ errhint("Use quoted CSV field to represent carriage return.")));
+
+ /*
+ * if we got here, it is the first line and we didn't find
+ * \n, so don't consume the peeked character
+ */
+ cstate->eol_type = EOL_CR;
+ }
+ }
+ else if (cstate->eol_type == EOL_NL)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ !cstate->opts.csv_mode ?
+ errmsg("literal carriage return found in data") :
+ errmsg("unquoted carriage return found in data"),
+ !cstate->opts.csv_mode ?
+ errhint("Use \"\\r\" to represent carriage return.") :
+ errhint("Use quoted CSV field to represent carriage return.")));
+ /* If reach here, we have found the line terminator */
+ break;
+ }
+
+ /* Process \n */
+ if (c == '\n' && (!cstate->opts.csv_mode || !in_quote))
+ {
+ if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ !cstate->opts.csv_mode ?
+ errmsg("literal newline found in data") :
+ errmsg("unquoted newline found in data"),
+ !cstate->opts.csv_mode ?
+ errhint("Use \"\\n\" to represent newline.") :
+ errhint("Use quoted CSV field to represent newline.")));
+ cstate->eol_type = EOL_NL; /* in case not set yet */
+ /* If reach here, we have found the line terminator */
+ break;
+ }
+
+ /*
+ * In CSV mode, we only recognize \. alone on a line. This is because
+ * \. is a valid CSV data value.
+ */
+ if (c == '\\' && (!cstate->opts.csv_mode || first_char_in_line))
+ {
+ char c2;
+
+ IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
+ IF_NEED_REFILL_AND_EOF_BREAK(0);
+
+ /* -----
+ * get next character
+ * Note: we do not change c so if it isn't \., we can fall
+ * through and continue processing.
+ * -----
+ */
+ c2 = copy_input_buf[input_buf_ptr];
+
+ if (c2 == '.')
+ {
+ input_buf_ptr++; /* consume the '.' */
+
+ /*
+ * Note: if we loop back for more data here, it does not
+ * matter that the CSV state change checks are re-executed; we
+ * will come back here with no important state changed.
+ */
+ if (cstate->eol_type == EOL_CRNL)
+ {
+ /* Get the next character */
+ IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
+ /* if hit_eof, c2 will become '\0' */
+ c2 = copy_input_buf[input_buf_ptr++];
+
+ if (c2 == '\n')
+ {
+ if (!cstate->opts.csv_mode)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("end-of-copy marker does not match previous newline style")));
+ else
+ NO_END_OF_COPY_GOTO;
+ }
+ else if (c2 != '\r')
+ {
+ if (!cstate->opts.csv_mode)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("end-of-copy marker corrupt")));
+ else
+ NO_END_OF_COPY_GOTO;
+ }
+ }
+
+ /* Get the next character */
+ IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
+ /* if hit_eof, c2 will become '\0' */
+ c2 = copy_input_buf[input_buf_ptr++];
+
+ if (c2 != '\r' && c2 != '\n')
+ {
+ if (!cstate->opts.csv_mode)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("end-of-copy marker corrupt")));
+ else
+ NO_END_OF_COPY_GOTO;
+ }
+
+ if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
+ (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
+ (cstate->eol_type == EOL_CR && c2 != '\r'))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("end-of-copy marker does not match previous newline style")));
+ }
+
+ /*
+ * Transfer only the data before the \. into line_buf, then
+ * discard the data and the \. sequence.
+ */
+ if (prev_raw_ptr > cstate->input_buf_index)
+ appendBinaryStringInfo(&cstate->line_buf,
+ cstate->input_buf + cstate->input_buf_index,
+ prev_raw_ptr - cstate->input_buf_index);
+ cstate->input_buf_index = input_buf_ptr;
+ result = true; /* report EOF */
+ break;
+ }
+ else if (!cstate->opts.csv_mode)
+ {
+ /*
+ * If we are here, it means we found a backslash followed by
+ * something other than a period. In non-CSV mode, anything
+ * after a backslash is special, so we skip over that second
+ * character too. If we didn't do that \\. would be
+ * considered an eof-of copy, while in non-CSV mode it is a
+ * literal backslash followed by a period. In CSV mode,
+ * backslashes are not special, so we want to process the
+ * character after the backslash just like a normal character,
+ * so we don't increment in those cases.
+ */
+ input_buf_ptr++;
+ }
+ }
+
+ /*
+ * This label is for CSV cases where \. appears at the start of a
+ * line, but there is more text after it, meaning it was a data value.
+ * We are more strict for \. in CSV mode because \. could be a data
+ * value, while in non-CSV mode, \. cannot be a data value.
+ */
+not_end_of_copy:
+ first_char_in_line = false;
+ } /* end of outer loop */
+
+ /*
+ * Transfer any still-uncopied data to line_buf.
+ */
+ REFILL_LINEBUF;
+
+ return result;
+}
+
+/*
+ * Return decimal value for a hexadecimal digit
+ */
+static int
+GetDecimalFromHex(char hex)
+{
+ if (isdigit((unsigned char) hex))
+ return hex - '0';
+ else
+ return tolower((unsigned char) hex) - 'a' + 10;
+}
+
+/*
+ * Parse the current line into separate attributes (fields),
+ * performing de-escaping as needed.
+ *
+ * The input is in line_buf. We use attribute_buf to hold the result
+ * strings. cstate->raw_fields[k] is set to point to the k'th attribute
+ * string, or NULL when the input matches the null marker string.
+ * This array is expanded as necessary.
+ *
+ * (Note that the caller cannot check for nulls since the returned
+ * string would be the post-de-escaping equivalent, which may look
+ * the same as some valid data string.)
+ *
+ * delim is the column delimiter string (must be just one byte for now).
+ * null_print is the null marker string. Note that this is compared to
+ * the pre-de-escaped input string.
+ *
+ * The return value is the number of fields actually read.
+ */
+static int
+CopyReadAttributesText(CopyFromState cstate)
+{
+ char delimc = cstate->opts.delim[0];
+ int fieldno;
+ char *output_ptr;
+ char *cur_ptr;
+ char *line_end_ptr;
+
+ /*
+ * We need a special case for zero-column tables: check that the input
+ * line is empty, and return.
+ */
+ if (cstate->max_fields <= 0)
+ {
+ if (cstate->line_buf.len != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("extra data after last expected column")));
+ return 0;
+ }
+
+ resetStringInfo(&cstate->attribute_buf);
+
+ /*
+ * The de-escaped attributes will certainly not be longer than the input
+ * data line, so we can just force attribute_buf to be large enough and
+ * then transfer data without any checks for enough space. We need to do
+ * it this way because enlarging attribute_buf mid-stream would invalidate
+ * pointers already stored into cstate->raw_fields[].
+ */
+ if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
+ enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
+ output_ptr = cstate->attribute_buf.data;
+
+ /* set pointer variables for loop */
+ cur_ptr = cstate->line_buf.data;
+ line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
+
+ /* Outer loop iterates over fields */
+ fieldno = 0;
+ for (;;)
+ {
+ bool found_delim = false;
+ char *start_ptr;
+ char *end_ptr;
+ int input_len;
+ bool saw_non_ascii = false;
+
+ /* Make sure there is enough space for the next value */
+ if (fieldno >= cstate->max_fields)
+ {
+ cstate->max_fields *= 2;
+ cstate->raw_fields =
+ repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
+ }
+
+ /* Remember start of field on both input and output sides */
+ start_ptr = cur_ptr;
+ cstate->raw_fields[fieldno] = output_ptr;
+
+ /*
+ * Scan data for field.
+ *
+ * Note that in this loop, we are scanning to locate the end of field
+ * and also speculatively performing de-escaping. Once we find the
+ * end-of-field, we can match the raw field contents against the null
+ * marker string. Only after that comparison fails do we know that
+ * de-escaping is actually the right thing to do; therefore we *must
+ * not* throw any syntax errors before we've done the null-marker
+ * check.
+ */
+ for (;;)
+ {
+ char c;
+
+ end_ptr = cur_ptr;
+ if (cur_ptr >= line_end_ptr)
+ break;
+ c = *cur_ptr++;
+ if (c == delimc)
+ {
+ found_delim = true;
+ break;
+ }
+ if (c == '\\')
+ {
+ if (cur_ptr >= line_end_ptr)
+ break;
+ c = *cur_ptr++;
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ {
+ /* handle \013 */
+ int val;
+
+ val = OCTVALUE(c);
+ if (cur_ptr < line_end_ptr)
+ {
+ c = *cur_ptr;
+ if (ISOCTAL(c))
+ {
+ cur_ptr++;
+ val = (val << 3) + OCTVALUE(c);
+ if (cur_ptr < line_end_ptr)
+ {
+ c = *cur_ptr;
+ if (ISOCTAL(c))
+ {
+ cur_ptr++;
+ val = (val << 3) + OCTVALUE(c);
+ }
+ }
+ }
+ }
+ c = val & 0377;
+ if (c == '\0' || IS_HIGHBIT_SET(c))
+ saw_non_ascii = true;
+ }
+ break;
+ case 'x':
+ /* Handle \x3F */
+ if (cur_ptr < line_end_ptr)
+ {
+ char hexchar = *cur_ptr;
+
+ if (isxdigit((unsigned char) hexchar))
+ {
+ int val = GetDecimalFromHex(hexchar);
+
+ cur_ptr++;
+ if (cur_ptr < line_end_ptr)
+ {
+ hexchar = *cur_ptr;
+ if (isxdigit((unsigned char) hexchar))
+ {
+ cur_ptr++;
+ val = (val << 4) + GetDecimalFromHex(hexchar);
+ }
+ }
+ c = val & 0xff;
+ if (c == '\0' || IS_HIGHBIT_SET(c))
+ saw_non_ascii = true;
+ }
+ }
+ break;
+ case 'b':
+ c = '\b';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ case 'v':
+ c = '\v';
+ break;
+
+ /*
+ * in all other cases, take the char after '\'
+ * literally
+ */
+ }
+ }
+
+ /* Add c to output string */
+ *output_ptr++ = c;
+ }
+
+ /* Check whether raw input matched null marker */
+ input_len = end_ptr - start_ptr;
+ if (input_len == cstate->opts.null_print_len &&
+ strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
+ cstate->raw_fields[fieldno] = NULL;
+ else
+ {
+ /*
+ * At this point we know the field is supposed to contain data.
+ *
+ * If we de-escaped any non-7-bit-ASCII chars, make sure the
+ * resulting string is valid data for the db encoding.
+ */
+ if (saw_non_ascii)
+ {
+ char *fld = cstate->raw_fields[fieldno];
+
+ pg_verifymbstr(fld, output_ptr - fld, false);
+ }
+ }
+
+ /* Terminate attribute value in output area */
+ *output_ptr++ = '\0';
+
+ fieldno++;
+ /* Done if we hit EOL instead of a delim */
+ if (!found_delim)
+ break;
+ }
+
+ /* Clean up state of attribute_buf */
+ output_ptr--;
+ Assert(*output_ptr == '\0');
+ cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
+
+ return fieldno;
+}
+
+/*
+ * Parse the current line into separate attributes (fields),
+ * performing de-escaping as needed. This has exactly the same API as
+ * CopyReadAttributesText, except we parse the fields according to
+ * "standard" (i.e. common) CSV usage.
+ */
+static int
+CopyReadAttributesCSV(CopyFromState cstate)
+{
+ char delimc = cstate->opts.delim[0];
+ char quotec = cstate->opts.quote[0];
+ char escapec = cstate->opts.escape[0];
+ int fieldno;
+ char *output_ptr;
+ char *cur_ptr;
+ char *line_end_ptr;
+
+ /*
+ * We need a special case for zero-column tables: check that the input
+ * line is empty, and return.
+ */
+ if (cstate->max_fields <= 0)
+ {
+ if (cstate->line_buf.len != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("extra data after last expected column")));
+ return 0;
+ }
+
+ resetStringInfo(&cstate->attribute_buf);
+
+ /*
+ * The de-escaped attributes will certainly not be longer than the input
+ * data line, so we can just force attribute_buf to be large enough and
+ * then transfer data without any checks for enough space. We need to do
+ * it this way because enlarging attribute_buf mid-stream would invalidate
+ * pointers already stored into cstate->raw_fields[].
+ */
+ if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
+ enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
+ output_ptr = cstate->attribute_buf.data;
+
+ /* set pointer variables for loop */
+ cur_ptr = cstate->line_buf.data;
+ line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
+
+ /* Outer loop iterates over fields */
+ fieldno = 0;
+ for (;;)
+ {
+ bool found_delim = false;
+ bool saw_quote = false;
+ char *start_ptr;
+ char *end_ptr;
+ int input_len;
+
+ /* Make sure there is enough space for the next value */
+ if (fieldno >= cstate->max_fields)
+ {
+ cstate->max_fields *= 2;
+ cstate->raw_fields =
+ repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
+ }
+
+ /* Remember start of field on both input and output sides */
+ start_ptr = cur_ptr;
+ cstate->raw_fields[fieldno] = output_ptr;
+
+ /*
+ * Scan data for field,
+ *
+ * The loop starts in "not quote" mode and then toggles between that
+ * and "in quote" mode. The loop exits normally if it is in "not
+ * quote" mode and a delimiter or line end is seen.
+ */
+ for (;;)
+ {
+ char c;
+
+ /* Not in quote */
+ for (;;)
+ {
+ end_ptr = cur_ptr;
+ if (cur_ptr >= line_end_ptr)
+ goto endfield;
+ c = *cur_ptr++;
+ /* unquoted field delimiter */
+ if (c == delimc)
+ {
+ found_delim = true;
+ goto endfield;
+ }
+ /* start of quoted field (or part of field) */
+ if (c == quotec)
+ {
+ saw_quote = true;
+ break;
+ }
+ /* Add c to output string */
+ *output_ptr++ = c;
+ }
+
+ /* In quote */
+ for (;;)
+ {
+ end_ptr = cur_ptr;
+ if (cur_ptr >= line_end_ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("unterminated CSV quoted field")));
+
+ c = *cur_ptr++;
+
+ /* escape within a quoted field */
+ if (c == escapec)
+ {
+ /*
+ * peek at the next char if available, and escape it if it
+ * is an escape char or a quote char
+ */
+ if (cur_ptr < line_end_ptr)
+ {
+ char nextc = *cur_ptr;
+
+ if (nextc == escapec || nextc == quotec)
+ {
+ *output_ptr++ = nextc;
+ cur_ptr++;
+ continue;
+ }
+ }
+ }
+
+ /*
+ * end of quoted field. Must do this test after testing for
+ * escape in case quote char and escape char are the same
+ * (which is the common case).
+ */
+ if (c == quotec)
+ break;
+
+ /* Add c to output string */
+ *output_ptr++ = c;
+ }
+ }
+endfield:
+
+ /* Terminate attribute value in output area */
+ *output_ptr++ = '\0';
+
+ /* Check whether raw input matched null marker */
+ input_len = end_ptr - start_ptr;
+ if (!saw_quote && input_len == cstate->opts.null_print_len &&
+ strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
+ cstate->raw_fields[fieldno] = NULL;
+
+ fieldno++;
+ /* Done if we hit EOL instead of a delim */
+ if (!found_delim)
+ break;
+ }
+
+ /* Clean up state of attribute_buf */
+ output_ptr--;
+ Assert(*output_ptr == '\0');
+ cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
+
+ return fieldno;
+}
+
+
+/*
+ * Read a binary attribute
+ */
+static Datum
+CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
+ Oid typioparam, int32 typmod,
+ bool *isnull)
+{
+ int32 fld_size;
+ Datum result;
+
+ if (!CopyGetInt32(cstate, &fld_size))
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("unexpected EOF in COPY data")));
+ if (fld_size == -1)
+ {
+ *isnull = true;
+ return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
+ }
+ if (fld_size < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("invalid field size")));
+
+ /* reset attribute_buf to empty, and load raw data in it */
+ resetStringInfo(&cstate->attribute_buf);
+
+ enlargeStringInfo(&cstate->attribute_buf, fld_size);
+ if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
+ fld_size) != fld_size)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("unexpected EOF in COPY data")));
+
+ cstate->attribute_buf.len = fld_size;
+ cstate->attribute_buf.data[fld_size] = '\0';
+
+ /* Call the column type's binary input converter */
+ result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
+ typioparam, typmod);
+
+ /* Trouble if it didn't eat the whole buffer */
+ if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("incorrect binary data format")));
+
+ *isnull = false;
+ return result;
+}
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
new file mode 100644
index 0000000..73e286f
--- /dev/null
+++ b/src/backend/commands/copyto.c
@@ -0,0 +1,1310 @@
+/*-------------------------------------------------------------------------
+ *
+ * copyto.c
+ * COPY <table> TO file/program/client
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/copyto.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "commands/copy.h"
+#include "commands/progress.h"
+#include "executor/execdesc.h"
+#include "executor/executor.h"
+#include "executor/tuptable.h"
+#include "libpq/libpq.h"
+#include "libpq/pqformat.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "optimizer/optimizer.h"
+#include "pgstat.h"
+#include "rewrite/rewriteHandler.h"
+#include "storage/fd.h"
+#include "tcop/tcopprot.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/partcache.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+
+/*
+ * Represents the different dest cases we need to worry about at
+ * the bottom level
+ */
+typedef enum CopyDest
+{
+ COPY_FILE, /* to file (or a piped program) */
+ COPY_FRONTEND, /* to frontend */
+} CopyDest;
+
+/*
+ * This struct contains all the state variables used throughout a COPY TO
+ * operation.
+ *
+ * Multi-byte encodings: all supported client-side encodings encode multi-byte
+ * characters by having the first byte's high bit set. Subsequent bytes of the
+ * character can have the high bit not set. When scanning data in such an
+ * encoding to look for a match to a single-byte (ie ASCII) character, we must
+ * use the full pg_encoding_mblen() machinery to skip over multibyte
+ * characters, else we might find a false match to a trailing byte. In
+ * supported server encodings, there is no possibility of a false match, and
+ * it's faster to make useless comparisons to trailing bytes than it is to
+ * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
+ * when we have to do it the hard way.
+ */
+typedef struct CopyToStateData
+{
+ /* low-level state data */
+ CopyDest copy_dest; /* type of copy source/destination */
+ FILE *copy_file; /* used if copy_dest == COPY_FILE */
+ StringInfo fe_msgbuf; /* used for all dests during COPY TO */
+
+ int file_encoding; /* file or remote side's character encoding */
+ bool need_transcoding; /* file encoding diff from server? */
+ bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
+
+ /* parameters from the COPY command */
+ Relation rel; /* relation to copy to */
+ QueryDesc *queryDesc; /* executable query to copy from */
+ List *attnumlist; /* integer list of attnums to copy */
+ char *filename; /* filename, or NULL for STDOUT */
+ bool is_program; /* is 'filename' a program to popen? */
+
+ CopyFormatOptions opts;
+ Node *whereClause; /* WHERE condition (or NULL) */
+
+ /*
+ * Working state
+ */
+ MemoryContext copycontext; /* per-copy execution context */
+
+ FmgrInfo *out_functions; /* lookup info for output functions */
+ MemoryContext rowcontext; /* per-row evaluation context */
+ uint64 bytes_processed; /* number of bytes processed so far */
+} CopyToStateData;
+
+/* DestReceiver for COPY (query) TO */
+typedef struct
+{
+ DestReceiver pub; /* publicly-known function pointers */
+ CopyToState cstate; /* CopyToStateData for the command */
+ uint64 processed; /* # of tuples processed */
+} DR_copy;
+
+/* NOTE: there's a copy of this in copyfromparse.c */
+static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
+
+
+/* non-export function prototypes */
+static void EndCopy(CopyToState cstate);
+static void ClosePipeToProgram(CopyToState cstate);
+static void CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot);
+static void CopyAttributeOutText(CopyToState cstate, const char *string);
+static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
+ bool use_quote, bool single_attr);
+
+/* Low-level communications functions */
+static void SendCopyBegin(CopyToState cstate);
+static void SendCopyEnd(CopyToState cstate);
+static void CopySendData(CopyToState cstate, const void *databuf, int datasize);
+static void CopySendString(CopyToState cstate, const char *str);
+static void CopySendChar(CopyToState cstate, char c);
+static void CopySendEndOfRow(CopyToState cstate);
+static void CopySendInt32(CopyToState cstate, int32 val);
+static void CopySendInt16(CopyToState cstate, int16 val);
+
+
+/*
+ * Send copy start/stop messages for frontend copies. These have changed
+ * in past protocol redesigns.
+ */
+static void
+SendCopyBegin(CopyToState cstate)
+{
+ StringInfoData buf;
+ int natts = list_length(cstate->attnumlist);
+ int16 format = (cstate->opts.binary ? 1 : 0);
+ int i;
+
+ pq_beginmessage(&buf, 'H');
+ pq_sendbyte(&buf, format); /* overall format */
+ pq_sendint16(&buf, natts);
+ for (i = 0; i < natts; i++)
+ pq_sendint16(&buf, format); /* per-column formats */
+ pq_endmessage(&buf);
+ cstate->copy_dest = COPY_FRONTEND;
+}
+
+static void
+SendCopyEnd(CopyToState cstate)
+{
+ /* Shouldn't have any unsent data */
+ Assert(cstate->fe_msgbuf->len == 0);
+ /* Send Copy Done message */
+ pq_putemptymessage('c');
+}
+
+/*----------
+ * CopySendData sends output data to the destination (file or frontend)
+ * CopySendString does the same for null-terminated strings
+ * CopySendChar does the same for single characters
+ * CopySendEndOfRow does the appropriate thing at end of each data row
+ * (data is not actually flushed except by CopySendEndOfRow)
+ *
+ * NB: no data conversion is applied by these functions
+ *----------
+ */
+static void
+CopySendData(CopyToState cstate, const void *databuf, int datasize)
+{
+ appendBinaryStringInfo(cstate->fe_msgbuf, databuf, datasize);
+}
+
+static void
+CopySendString(CopyToState cstate, const char *str)
+{
+ appendBinaryStringInfo(cstate->fe_msgbuf, str, strlen(str));
+}
+
+static void
+CopySendChar(CopyToState cstate, char c)
+{
+ appendStringInfoCharMacro(cstate->fe_msgbuf, c);
+}
+
+static void
+CopySendEndOfRow(CopyToState cstate)
+{
+ StringInfo fe_msgbuf = cstate->fe_msgbuf;
+
+ switch (cstate->copy_dest)
+ {
+ case COPY_FILE:
+ if (!cstate->opts.binary)
+ {
+ /* Default line termination depends on platform */
+#ifndef WIN32
+ CopySendChar(cstate, '\n');
+#else
+ CopySendString(cstate, "\r\n");
+#endif
+ }
+
+ if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
+ cstate->copy_file) != 1 ||
+ ferror(cstate->copy_file))
+ {
+ if (cstate->is_program)
+ {
+ if (errno == EPIPE)
+ {
+ /*
+ * The pipe will be closed automatically on error at
+ * the end of transaction, but we might get a better
+ * error message from the subprocess' exit code than
+ * just "Broken Pipe"
+ */
+ ClosePipeToProgram(cstate);
+
+ /*
+ * If ClosePipeToProgram() didn't throw an error, the
+ * program terminated normally, but closed the pipe
+ * first. Restore errno, and throw an error.
+ */
+ errno = EPIPE;
+ }
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to COPY program: %m")));
+ }
+ else
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to COPY file: %m")));
+ }
+ break;
+ case COPY_FRONTEND:
+ /* The FE/BE protocol uses \n as newline for all platforms */
+ if (!cstate->opts.binary)
+ CopySendChar(cstate, '\n');
+
+ /* Dump the accumulated row as one CopyData message */
+ (void) pq_putmessage('d', fe_msgbuf->data, fe_msgbuf->len);
+ break;
+ }
+
+ /* Update the progress */
+ cstate->bytes_processed += fe_msgbuf->len;
+ pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
+
+ resetStringInfo(fe_msgbuf);
+}
+
+/*
+ * These functions do apply some data conversion
+ */
+
+/*
+ * CopySendInt32 sends an int32 in network byte order
+ */
+static inline void
+CopySendInt32(CopyToState cstate, int32 val)
+{
+ uint32 buf;
+
+ buf = pg_hton32((uint32) val);
+ CopySendData(cstate, &buf, sizeof(buf));
+}
+
+/*
+ * CopySendInt16 sends an int16 in network byte order
+ */
+static inline void
+CopySendInt16(CopyToState cstate, int16 val)
+{
+ uint16 buf;
+
+ buf = pg_hton16((uint16) val);
+ CopySendData(cstate, &buf, sizeof(buf));
+}
+
+/*
+ * Closes the pipe to an external program, checking the pclose() return code.
+ */
+static void
+ClosePipeToProgram(CopyToState cstate)
+{
+ int pclose_rc;
+
+ Assert(cstate->is_program);
+
+ pclose_rc = ClosePipeStream(cstate->copy_file);
+ if (pclose_rc == -1)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close pipe to external command: %m")));
+ else if (pclose_rc != 0)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
+ errmsg("program \"%s\" failed",
+ cstate->filename),
+ errdetail_internal("%s", wait_result_to_str(pclose_rc))));
+ }
+}
+
+/*
+ * Release resources allocated in a cstate for COPY TO/FROM.
+ */
+static void
+EndCopy(CopyToState cstate)
+{
+ if (cstate->is_program)
+ {
+ ClosePipeToProgram(cstate);
+ }
+ else
+ {
+ if (cstate->filename != NULL && FreeFile(cstate->copy_file))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close file \"%s\": %m",
+ cstate->filename)));
+ }
+
+ pgstat_progress_end_command();
+
+ MemoryContextDelete(cstate->copycontext);
+ pfree(cstate);
+}
+
+/*
+ * Setup CopyToState to read tuples from a table or a query for COPY TO.
+ */
+CopyToState
+BeginCopyTo(ParseState *pstate,
+ Relation rel,
+ RawStmt *raw_query,
+ Oid queryRelId,
+ const char *filename,
+ bool is_program,
+ List *attnamelist,
+ List *options)
+{
+ CopyToState cstate;
+ bool pipe = (filename == NULL);
+ TupleDesc tupDesc;
+ int num_phys_attrs;
+ MemoryContext oldcontext;
+ const int progress_cols[] = {
+ PROGRESS_COPY_COMMAND,
+ PROGRESS_COPY_TYPE
+ };
+ int64 progress_vals[] = {
+ PROGRESS_COPY_COMMAND_TO,
+ 0
+ };
+
+ if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
+ {
+ if (rel->rd_rel->relkind == RELKIND_VIEW)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot copy from view \"%s\"",
+ RelationGetRelationName(rel)),
+ errhint("Try the COPY (SELECT ...) TO variant.")));
+ else if (rel->rd_rel->relkind == RELKIND_MATVIEW)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot copy from materialized view \"%s\"",
+ RelationGetRelationName(rel)),
+ errhint("Try the COPY (SELECT ...) TO variant.")));
+ else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot copy from foreign table \"%s\"",
+ RelationGetRelationName(rel)),
+ errhint("Try the COPY (SELECT ...) TO variant.")));
+ else if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot copy from sequence \"%s\"",
+ RelationGetRelationName(rel))));
+ else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot copy from partitioned table \"%s\"",
+ RelationGetRelationName(rel)),
+ errhint("Try the COPY (SELECT ...) TO variant.")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot copy from non-table relation \"%s\"",
+ RelationGetRelationName(rel))));
+ }
+
+
+ /* Allocate workspace and zero all fields */
+ cstate = (CopyToStateData *) palloc0(sizeof(CopyToStateData));
+
+ /*
+ * We allocate everything used by a cstate in a new memory context. This
+ * avoids memory leaks during repeated use of COPY in a query.
+ */
+ cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
+ "COPY",
+ ALLOCSET_DEFAULT_SIZES);
+
+ oldcontext = MemoryContextSwitchTo(cstate->copycontext);
+
+ /* Extract options from the statement node tree */
+ ProcessCopyOptions(pstate, &cstate->opts, false /* is_from */ , options);
+
+ /* Process the source/target relation or query */
+ if (rel)
+ {
+ Assert(!raw_query);
+
+ cstate->rel = rel;
+
+ tupDesc = RelationGetDescr(cstate->rel);
+ }
+ else
+ {
+ List *rewritten;
+ Query *query;
+ PlannedStmt *plan;
+ DestReceiver *dest;
+
+ cstate->rel = NULL;
+
+ /*
+ * Run parse analysis and rewrite. Note this also acquires sufficient
+ * locks on the source table(s).
+ */
+ rewritten = pg_analyze_and_rewrite_fixedparams(raw_query,
+ pstate->p_sourcetext, NULL, 0,
+ NULL);
+
+ /* check that we got back something we can work with */
+ if (rewritten == NIL)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("DO INSTEAD NOTHING rules are not supported for COPY")));
+ }
+ else if (list_length(rewritten) > 1)
+ {
+ ListCell *lc;
+
+ /* examine queries to determine which error message to issue */
+ foreach(lc, rewritten)
+ {
+ Query *q = lfirst_node(Query, lc);
+
+ if (q->querySource == QSRC_QUAL_INSTEAD_RULE)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("conditional DO INSTEAD rules are not supported for COPY")));
+ if (q->querySource == QSRC_NON_INSTEAD_RULE)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("DO ALSO rules are not supported for the COPY")));
+ }
+
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("multi-statement DO INSTEAD rules are not supported for COPY")));
+ }
+
+ query = linitial_node(Query, rewritten);
+
+ /* The grammar allows SELECT INTO, but we don't support that */
+ if (query->utilityStmt != NULL &&
+ IsA(query->utilityStmt, CreateTableAsStmt))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY (SELECT INTO) is not supported")));
+
+ Assert(query->utilityStmt == NULL);
+
+ /*
+ * Similarly the grammar doesn't enforce the presence of a RETURNING
+ * clause, but this is required here.
+ */
+ if (query->commandType != CMD_SELECT &&
+ query->returningList == NIL)
+ {
+ Assert(query->commandType == CMD_INSERT ||
+ query->commandType == CMD_UPDATE ||
+ query->commandType == CMD_DELETE);
+
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("COPY query must have a RETURNING clause")));
+ }
+
+ /* plan the query */
+ plan = pg_plan_query(query, pstate->p_sourcetext,
+ CURSOR_OPT_PARALLEL_OK, NULL);
+
+ /*
+ * With row-level security and a user using "COPY relation TO", we
+ * have to convert the "COPY relation TO" to a query-based COPY (eg:
+ * "COPY (SELECT * FROM ONLY relation) TO"), to allow the rewriter to
+ * add in any RLS clauses.
+ *
+ * When this happens, we are passed in the relid of the originally
+ * found relation (which we have locked). As the planner will look up
+ * the relation again, we double-check here to make sure it found the
+ * same one that we have locked.
+ */
+ if (queryRelId != InvalidOid)
+ {
+ /*
+ * Note that with RLS involved there may be multiple relations,
+ * and while the one we need is almost certainly first, we don't
+ * make any guarantees of that in the planner, so check the whole
+ * list and make sure we find the original relation.
+ */
+ if (!list_member_oid(plan->relationOids, queryRelId))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("relation referenced by COPY statement has changed")));
+ }
+
+ /*
+ * Use a snapshot with an updated command ID to ensure this query sees
+ * results of any previously executed queries.
+ */
+ PushCopiedSnapshot(GetActiveSnapshot());
+ UpdateActiveSnapshotCommandId();
+
+ /* Create dest receiver for COPY OUT */
+ dest = CreateDestReceiver(DestCopyOut);
+ ((DR_copy *) dest)->cstate = cstate;
+
+ /* Create a QueryDesc requesting no output */
+ cstate->queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext,
+ GetActiveSnapshot(),
+ InvalidSnapshot,
+ dest, NULL, NULL, 0);
+
+ /*
+ * Call ExecutorStart to prepare the plan for execution.
+ *
+ * ExecutorStart computes a result tupdesc for us
+ */
+ ExecutorStart(cstate->queryDesc, 0);
+
+ tupDesc = cstate->queryDesc->tupDesc;
+ }
+
+ /* Generate or convert list of attributes to process */
+ cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
+
+ num_phys_attrs = tupDesc->natts;
+
+ /* Convert FORCE_QUOTE name list to per-column flags, check validity */
+ cstate->opts.force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+ if (cstate->opts.force_quote_all)
+ {
+ int i;
+
+ for (i = 0; i < num_phys_attrs; i++)
+ cstate->opts.force_quote_flags[i] = true;
+ }
+ else if (cstate->opts.force_quote)
+ {
+ List *attnums;
+ ListCell *cur;
+
+ attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_quote);
+
+ foreach(cur, attnums)
+ {
+ int attnum = lfirst_int(cur);
+ Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+ if (!list_member_int(cstate->attnumlist, attnum))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("FORCE_QUOTE column \"%s\" not referenced by COPY",
+ NameStr(attr->attname))));
+ cstate->opts.force_quote_flags[attnum - 1] = true;
+ }
+ }
+
+ /* Convert FORCE_NOT_NULL name list to per-column flags, check validity */
+ cstate->opts.force_notnull_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+ if (cstate->opts.force_notnull)
+ {
+ List *attnums;
+ ListCell *cur;
+
+ attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_notnull);
+
+ foreach(cur, attnums)
+ {
+ int attnum = lfirst_int(cur);
+ Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+ if (!list_member_int(cstate->attnumlist, attnum))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("FORCE_NOT_NULL column \"%s\" not referenced by COPY",
+ NameStr(attr->attname))));
+ cstate->opts.force_notnull_flags[attnum - 1] = true;
+ }
+ }
+
+ /* Convert FORCE_NULL name list to per-column flags, check validity */
+ cstate->opts.force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
+ if (cstate->opts.force_null)
+ {
+ List *attnums;
+ ListCell *cur;
+
+ attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_null);
+
+ foreach(cur, attnums)
+ {
+ int attnum = lfirst_int(cur);
+ Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+ if (!list_member_int(cstate->attnumlist, attnum))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("FORCE_NULL column \"%s\" not referenced by COPY",
+ NameStr(attr->attname))));
+ cstate->opts.force_null_flags[attnum - 1] = true;
+ }
+ }
+
+ /* Use client encoding when ENCODING option is not specified. */
+ if (cstate->opts.file_encoding < 0)
+ cstate->file_encoding = pg_get_client_encoding();
+ else
+ cstate->file_encoding = cstate->opts.file_encoding;
+
+ /*
+ * Set up encoding conversion info. Even if the file and server encodings
+ * are the same, we must apply pg_any_to_server() to validate data in
+ * multibyte encodings.
+ */
+ cstate->need_transcoding =
+ (cstate->file_encoding != GetDatabaseEncoding() ||
+ pg_database_encoding_max_length() > 1);
+ /* See Multibyte encoding comment above */
+ cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
+
+ cstate->copy_dest = COPY_FILE; /* default */
+
+ if (pipe)
+ {
+ progress_vals[1] = PROGRESS_COPY_TYPE_PIPE;
+
+ Assert(!is_program); /* the grammar does not allow this */
+ if (whereToSendOutput != DestRemote)
+ cstate->copy_file = stdout;
+ }
+ else
+ {
+ cstate->filename = pstrdup(filename);
+ cstate->is_program = is_program;
+
+ if (is_program)
+ {
+ progress_vals[1] = PROGRESS_COPY_TYPE_PROGRAM;
+ cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_W);
+ if (cstate->copy_file == NULL)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not execute command \"%s\": %m",
+ cstate->filename)));
+ }
+ else
+ {
+ mode_t oumask; /* Pre-existing umask value */
+ struct stat st;
+
+ progress_vals[1] = PROGRESS_COPY_TYPE_FILE;
+
+ /*
+ * Prevent write to relative path ... too easy to shoot oneself in
+ * the foot by overwriting a database file ...
+ */
+ if (!is_absolute_path(filename))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("relative path not allowed for COPY to file")));
+
+ oumask = umask(S_IWGRP | S_IWOTH);
+ PG_TRY();
+ {
+ cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W);
+ }
+ PG_FINALLY();
+ {
+ umask(oumask);
+ }
+ PG_END_TRY();
+ if (cstate->copy_file == NULL)
+ {
+ /* copy errno because ereport subfunctions might change it */
+ int save_errno = errno;
+
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\" for writing: %m",
+ cstate->filename),
+ (save_errno == ENOENT || save_errno == EACCES) ?
+ errhint("COPY TO instructs the PostgreSQL server process to write a file. "
+ "You may want a client-side facility such as psql's \\copy.") : 0));
+ }
+
+ if (fstat(fileno(cstate->copy_file), &st))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ cstate->filename)));
+
+ if (S_ISDIR(st.st_mode))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is a directory", cstate->filename)));
+ }
+ }
+
+ /* initialize progress */
+ pgstat_progress_start_command(PROGRESS_COMMAND_COPY,
+ cstate->rel ? RelationGetRelid(cstate->rel) : InvalidOid);
+ pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
+
+ cstate->bytes_processed = 0;
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return cstate;
+}
+
+/*
+ * Clean up storage and release resources for COPY TO.
+ */
+void
+EndCopyTo(CopyToState cstate)
+{
+ if (cstate->queryDesc != NULL)
+ {
+ /* Close down the query and free resources. */
+ ExecutorFinish(cstate->queryDesc);
+ ExecutorEnd(cstate->queryDesc);
+ FreeQueryDesc(cstate->queryDesc);
+ PopActiveSnapshot();
+ }
+
+ /* Clean up storage */
+ EndCopy(cstate);
+}
+
+/*
+ * Copy from relation or query TO file.
+ */
+uint64
+DoCopyTo(CopyToState cstate)
+{
+ bool pipe = (cstate->filename == NULL);
+ bool fe_copy = (pipe && whereToSendOutput == DestRemote);
+ TupleDesc tupDesc;
+ int num_phys_attrs;
+ ListCell *cur;
+ uint64 processed;
+
+ if (fe_copy)
+ SendCopyBegin(cstate);
+
+ if (cstate->rel)
+ tupDesc = RelationGetDescr(cstate->rel);
+ else
+ tupDesc = cstate->queryDesc->tupDesc;
+ num_phys_attrs = tupDesc->natts;
+ cstate->opts.null_print_client = cstate->opts.null_print; /* default */
+
+ /* We use fe_msgbuf as a per-row buffer regardless of copy_dest */
+ cstate->fe_msgbuf = makeStringInfo();
+
+ /* Get info about the columns we need to process. */
+ cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
+ foreach(cur, cstate->attnumlist)
+ {
+ int attnum = lfirst_int(cur);
+ Oid out_func_oid;
+ bool isvarlena;
+ Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+ if (cstate->opts.binary)
+ getTypeBinaryOutputInfo(attr->atttypid,
+ &out_func_oid,
+ &isvarlena);
+ else
+ getTypeOutputInfo(attr->atttypid,
+ &out_func_oid,
+ &isvarlena);
+ fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]);
+ }
+
+ /*
+ * Create a temporary memory context that we can reset once per row to
+ * recover palloc'd memory. This avoids any problems with leaks inside
+ * datatype output routines, and should be faster than retail pfree's
+ * anyway. (We don't need a whole econtext as CopyFrom does.)
+ */
+ cstate->rowcontext = AllocSetContextCreate(CurrentMemoryContext,
+ "COPY TO",
+ ALLOCSET_DEFAULT_SIZES);
+
+ if (cstate->opts.binary)
+ {
+ /* Generate header for a binary copy */
+ int32 tmp;
+
+ /* Signature */
+ CopySendData(cstate, BinarySignature, 11);
+ /* Flags field */
+ tmp = 0;
+ CopySendInt32(cstate, tmp);
+ /* No header extension */
+ tmp = 0;
+ CopySendInt32(cstate, tmp);
+ }
+ else
+ {
+ /*
+ * For non-binary copy, we need to convert null_print to file
+ * encoding, because it will be sent directly with CopySendString.
+ */
+ if (cstate->need_transcoding)
+ cstate->opts.null_print_client = pg_server_to_any(cstate->opts.null_print,
+ cstate->opts.null_print_len,
+ cstate->file_encoding);
+
+ /* if a header has been requested send the line */
+ if (cstate->opts.header_line)
+ {
+ bool hdr_delim = false;
+
+ foreach(cur, cstate->attnumlist)
+ {
+ int attnum = lfirst_int(cur);
+ char *colname;
+
+ if (hdr_delim)
+ CopySendChar(cstate, cstate->opts.delim[0]);
+ hdr_delim = true;
+
+ colname = NameStr(TupleDescAttr(tupDesc, attnum - 1)->attname);
+
+ if (cstate->opts.csv_mode)
+ CopyAttributeOutCSV(cstate, colname, false,
+ list_length(cstate->attnumlist) == 1);
+ else
+ CopyAttributeOutText(cstate, colname);
+ }
+
+ CopySendEndOfRow(cstate);
+ }
+ }
+
+ if (cstate->rel)
+ {
+ TupleTableSlot *slot;
+ TableScanDesc scandesc;
+
+ scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
+ slot = table_slot_create(cstate->rel, NULL);
+
+ processed = 0;
+ while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ /* Deconstruct the tuple ... */
+ slot_getallattrs(slot);
+
+ /* Format and send the data */
+ CopyOneRowTo(cstate, slot);
+
+ /*
+ * Increment the number of processed tuples, and report the
+ * progress.
+ */
+ pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
+ ++processed);
+ }
+
+ ExecDropSingleTupleTableSlot(slot);
+ table_endscan(scandesc);
+ }
+ else
+ {
+ /* run the plan --- the dest receiver will send tuples */
+ ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L, true);
+ processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
+ }
+
+ if (cstate->opts.binary)
+ {
+ /* Generate trailer for a binary copy */
+ CopySendInt16(cstate, -1);
+ /* Need to flush out the trailer */
+ CopySendEndOfRow(cstate);
+ }
+
+ MemoryContextDelete(cstate->rowcontext);
+
+ if (fe_copy)
+ SendCopyEnd(cstate);
+
+ return processed;
+}
+
+/*
+ * Emit one row during DoCopyTo().
+ */
+static void
+CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
+{
+ bool need_delim = false;
+ FmgrInfo *out_functions = cstate->out_functions;
+ MemoryContext oldcontext;
+ ListCell *cur;
+ char *string;
+
+ MemoryContextReset(cstate->rowcontext);
+ oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
+
+ if (cstate->opts.binary)
+ {
+ /* Binary per-tuple header */
+ CopySendInt16(cstate, list_length(cstate->attnumlist));
+ }
+
+ /* Make sure the tuple is fully deconstructed */
+ slot_getallattrs(slot);
+
+ foreach(cur, cstate->attnumlist)
+ {
+ int attnum = lfirst_int(cur);
+ Datum value = slot->tts_values[attnum - 1];
+ bool isnull = slot->tts_isnull[attnum - 1];
+
+ if (!cstate->opts.binary)
+ {
+ if (need_delim)
+ CopySendChar(cstate, cstate->opts.delim[0]);
+ need_delim = true;
+ }
+
+ if (isnull)
+ {
+ if (!cstate->opts.binary)
+ CopySendString(cstate, cstate->opts.null_print_client);
+ else
+ CopySendInt32(cstate, -1);
+ }
+ else
+ {
+ if (!cstate->opts.binary)
+ {
+ string = OutputFunctionCall(&out_functions[attnum - 1],
+ value);
+ if (cstate->opts.csv_mode)
+ CopyAttributeOutCSV(cstate, string,
+ cstate->opts.force_quote_flags[attnum - 1],
+ list_length(cstate->attnumlist) == 1);
+ else
+ CopyAttributeOutText(cstate, string);
+ }
+ else
+ {
+ bytea *outputbytes;
+
+ outputbytes = SendFunctionCall(&out_functions[attnum - 1],
+ value);
+ CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
+ CopySendData(cstate, VARDATA(outputbytes),
+ VARSIZE(outputbytes) - VARHDRSZ);
+ }
+ }
+ }
+
+ CopySendEndOfRow(cstate);
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Send text representation of one attribute, with conversion and escaping
+ */
+#define DUMPSOFAR() \
+ do { \
+ if (ptr > start) \
+ CopySendData(cstate, start, ptr - start); \
+ } while (0)
+
+static void
+CopyAttributeOutText(CopyToState cstate, const char *string)
+{
+ const char *ptr;
+ const char *start;
+ char c;
+ char delimc = cstate->opts.delim[0];
+
+ if (cstate->need_transcoding)
+ ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
+ else
+ ptr = string;
+
+ /*
+ * We have to grovel through the string searching for control characters
+ * and instances of the delimiter character. In most cases, though, these
+ * are infrequent. To avoid overhead from calling CopySendData once per
+ * character, we dump out all characters between escaped characters in a
+ * single call. The loop invariant is that the data from "start" to "ptr"
+ * can be sent literally, but hasn't yet been.
+ *
+ * We can skip pg_encoding_mblen() overhead when encoding is safe, because
+ * in valid backend encodings, extra bytes of a multibyte character never
+ * look like ASCII. This loop is sufficiently performance-critical that
+ * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
+ * of the normal safe-encoding path.
+ */
+ if (cstate->encoding_embeds_ascii)
+ {
+ start = ptr;
+ while ((c = *ptr) != '\0')
+ {
+ if ((unsigned char) c < (unsigned char) 0x20)
+ {
+ /*
+ * \r and \n must be escaped, the others are traditional. We
+ * prefer to dump these using the C-like notation, rather than
+ * a backslash and the literal character, because it makes the
+ * dump file a bit more proof against Microsoftish data
+ * mangling.
+ */
+ switch (c)
+ {
+ case '\b':
+ c = 'b';
+ break;
+ case '\f':
+ c = 'f';
+ break;
+ case '\n':
+ c = 'n';
+ break;
+ case '\r':
+ c = 'r';
+ break;
+ case '\t':
+ c = 't';
+ break;
+ case '\v':
+ c = 'v';
+ break;
+ default:
+ /* If it's the delimiter, must backslash it */
+ if (c == delimc)
+ break;
+ /* All ASCII control chars are length 1 */
+ ptr++;
+ continue; /* fall to end of loop */
+ }
+ /* if we get here, we need to convert the control char */
+ DUMPSOFAR();
+ CopySendChar(cstate, '\\');
+ CopySendChar(cstate, c);
+ start = ++ptr; /* do not include char in next run */
+ }
+ else if (c == '\\' || c == delimc)
+ {
+ DUMPSOFAR();
+ CopySendChar(cstate, '\\');
+ start = ptr++; /* we include char in next run */
+ }
+ else if (IS_HIGHBIT_SET(c))
+ ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
+ else
+ ptr++;
+ }
+ }
+ else
+ {
+ start = ptr;
+ while ((c = *ptr) != '\0')
+ {
+ if ((unsigned char) c < (unsigned char) 0x20)
+ {
+ /*
+ * \r and \n must be escaped, the others are traditional. We
+ * prefer to dump these using the C-like notation, rather than
+ * a backslash and the literal character, because it makes the
+ * dump file a bit more proof against Microsoftish data
+ * mangling.
+ */
+ switch (c)
+ {
+ case '\b':
+ c = 'b';
+ break;
+ case '\f':
+ c = 'f';
+ break;
+ case '\n':
+ c = 'n';
+ break;
+ case '\r':
+ c = 'r';
+ break;
+ case '\t':
+ c = 't';
+ break;
+ case '\v':
+ c = 'v';
+ break;
+ default:
+ /* If it's the delimiter, must backslash it */
+ if (c == delimc)
+ break;
+ /* All ASCII control chars are length 1 */
+ ptr++;
+ continue; /* fall to end of loop */
+ }
+ /* if we get here, we need to convert the control char */
+ DUMPSOFAR();
+ CopySendChar(cstate, '\\');
+ CopySendChar(cstate, c);
+ start = ++ptr; /* do not include char in next run */
+ }
+ else if (c == '\\' || c == delimc)
+ {
+ DUMPSOFAR();
+ CopySendChar(cstate, '\\');
+ start = ptr++; /* we include char in next run */
+ }
+ else
+ ptr++;
+ }
+ }
+
+ DUMPSOFAR();
+}
+
+/*
+ * Send text representation of one attribute, with conversion and
+ * CSV-style escaping
+ */
+static void
+CopyAttributeOutCSV(CopyToState cstate, const char *string,
+ bool use_quote, bool single_attr)
+{
+ const char *ptr;
+ const char *start;
+ char c;
+ char delimc = cstate->opts.delim[0];
+ char quotec = cstate->opts.quote[0];
+ char escapec = cstate->opts.escape[0];
+
+ /* force quoting if it matches null_print (before conversion!) */
+ if (!use_quote && strcmp(string, cstate->opts.null_print) == 0)
+ use_quote = true;
+
+ if (cstate->need_transcoding)
+ ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
+ else
+ ptr = string;
+
+ /*
+ * Make a preliminary pass to discover if it needs quoting
+ */
+ if (!use_quote)
+ {
+ /*
+ * Because '\.' can be a data value, quote it if it appears alone on a
+ * line so it is not interpreted as the end-of-data marker.
+ */
+ if (single_attr && strcmp(ptr, "\\.") == 0)
+ use_quote = true;
+ else
+ {
+ const char *tptr = ptr;
+
+ while ((c = *tptr) != '\0')
+ {
+ if (c == delimc || c == quotec || c == '\n' || c == '\r')
+ {
+ use_quote = true;
+ break;
+ }
+ if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
+ tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
+ else
+ tptr++;
+ }
+ }
+ }
+
+ if (use_quote)
+ {
+ CopySendChar(cstate, quotec);
+
+ /*
+ * We adopt the same optimization strategy as in CopyAttributeOutText
+ */
+ start = ptr;
+ while ((c = *ptr) != '\0')
+ {
+ if (c == quotec || c == escapec)
+ {
+ DUMPSOFAR();
+ CopySendChar(cstate, escapec);
+ start = ptr; /* we include char in next run */
+ }
+ if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
+ ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
+ else
+ ptr++;
+ }
+ DUMPSOFAR();
+
+ CopySendChar(cstate, quotec);
+ }
+ else
+ {
+ /* If it doesn't need quoting, we can just dump it as-is */
+ CopySendString(cstate, ptr);
+ }
+}
+
+/*
+ * copy_dest_startup --- executor startup
+ */
+static void
+copy_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
+{
+ /* no-op */
+}
+
+/*
+ * copy_dest_receive --- receive one tuple
+ */
+static bool
+copy_dest_receive(TupleTableSlot *slot, DestReceiver *self)
+{
+ DR_copy *myState = (DR_copy *) self;
+ CopyToState cstate = myState->cstate;
+
+ /* Send the data */
+ CopyOneRowTo(cstate, slot);
+
+ /* Increment the number of processed tuples, and report the progress */
+ pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
+ ++myState->processed);
+
+ return true;
+}
+
+/*
+ * copy_dest_shutdown --- executor end
+ */
+static void
+copy_dest_shutdown(DestReceiver *self)
+{
+ /* no-op */
+}
+
+/*
+ * copy_dest_destroy --- release DestReceiver object
+ */
+static void
+copy_dest_destroy(DestReceiver *self)
+{
+ pfree(self);
+}
+
+/*
+ * CreateCopyDestReceiver -- create a suitable DestReceiver object
+ */
+DestReceiver *
+CreateCopyDestReceiver(void)
+{
+ DR_copy *self = (DR_copy *) palloc(sizeof(DR_copy));
+
+ self->pub.receiveSlot = copy_dest_receive;
+ self->pub.rStartup = copy_dest_startup;
+ self->pub.rShutdown = copy_dest_shutdown;
+ self->pub.rDestroy = copy_dest_destroy;
+ self->pub.mydest = DestCopyOut;
+
+ self->cstate = NULL; /* will be set later */
+ self->processed = 0;
+
+ return (DestReceiver *) self;
+}
diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c
new file mode 100644
index 0000000..152c29b
--- /dev/null
+++ b/src/backend/commands/createas.c
@@ -0,0 +1,637 @@
+/*-------------------------------------------------------------------------
+ *
+ * createas.c
+ * Execution of CREATE TABLE ... AS, a/k/a SELECT INTO.
+ * Since CREATE MATERIALIZED VIEW shares syntax and most behaviors,
+ * we implement that here, too.
+ *
+ * We implement this by diverting the query's normal output to a
+ * specialized DestReceiver type.
+ *
+ * Formerly, CTAS was implemented as a variant of SELECT, which led
+ * to assorted legacy behaviors that we still try to preserve, notably that
+ * we must return a tuples-processed count in the QueryCompletion. (We no
+ * longer do that for CTAS ... WITH NO DATA, however.)
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/createas.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/reloptions.h"
+#include "access/sysattr.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "catalog/namespace.h"
+#include "catalog/toasting.h"
+#include "commands/createas.h"
+#include "commands/matview.h"
+#include "commands/prepare.h"
+#include "commands/tablecmds.h"
+#include "commands/view.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/parse_clause.h"
+#include "rewrite/rewriteHandler.h"
+#include "storage/smgr.h"
+#include "tcop/tcopprot.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/rls.h"
+#include "utils/snapmgr.h"
+
+typedef struct
+{
+ DestReceiver pub; /* publicly-known function pointers */
+ IntoClause *into; /* target relation specification */
+ /* These fields are filled by intorel_startup: */
+ Relation rel; /* relation to write to */
+ ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */
+ CommandId output_cid; /* cmin to insert in output tuples */
+ int ti_options; /* table_tuple_insert performance options */
+ BulkInsertState bistate; /* bulk insert state */
+} DR_intorel;
+
+/* utility functions for CTAS definition creation */
+static ObjectAddress create_ctas_internal(List *attrList, IntoClause *into);
+static ObjectAddress create_ctas_nodata(List *tlist, IntoClause *into);
+
+/* DestReceiver routines for collecting data */
+static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
+static bool intorel_receive(TupleTableSlot *slot, DestReceiver *self);
+static void intorel_shutdown(DestReceiver *self);
+static void intorel_destroy(DestReceiver *self);
+
+
+/*
+ * create_ctas_internal
+ *
+ * Internal utility used for the creation of the definition of a relation
+ * created via CREATE TABLE AS or a materialized view. Caller needs to
+ * provide a list of attributes (ColumnDef nodes).
+ */
+static ObjectAddress
+create_ctas_internal(List *attrList, IntoClause *into)
+{
+ CreateStmt *create = makeNode(CreateStmt);
+ bool is_matview;
+ char relkind;
+ Datum toast_options;
+ static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
+ ObjectAddress intoRelationAddr;
+
+ /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
+ is_matview = (into->viewQuery != NULL);
+ relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION;
+
+ /*
+ * Create the target relation by faking up a CREATE TABLE parsetree and
+ * passing it to DefineRelation.
+ */
+ create->relation = into->rel;
+ create->tableElts = attrList;
+ create->inhRelations = NIL;
+ create->ofTypename = NULL;
+ create->constraints = NIL;
+ create->options = into->options;
+ create->oncommit = into->onCommit;
+ create->tablespacename = into->tableSpaceName;
+ create->if_not_exists = false;
+ create->accessMethod = into->accessMethod;
+
+ /*
+ * Create the relation. (This will error out if there's an existing view,
+ * so we don't need more code to complain if "replace" is false.)
+ */
+ intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL);
+
+ /*
+ * If necessary, create a TOAST table for the target table. Note that
+ * NewRelationCreateToastTable ends with CommandCounterIncrement(), so
+ * that the TOAST table will be visible for insertion.
+ */
+ CommandCounterIncrement();
+
+ /* parse and validate reloptions for the toast table */
+ toast_options = transformRelOptions((Datum) 0,
+ create->options,
+ "toast",
+ validnsps,
+ true, false);
+
+ (void) heap_reloptions(RELKIND_TOASTVALUE, toast_options, true);
+
+ NewRelationCreateToastTable(intoRelationAddr.objectId, toast_options);
+
+ /* Create the "view" part of a materialized view. */
+ if (is_matview)
+ {
+ /* StoreViewQuery scribbles on tree, so make a copy */
+ Query *query = (Query *) copyObject(into->viewQuery);
+
+ StoreViewQuery(intoRelationAddr.objectId, query, false);
+ CommandCounterIncrement();
+ }
+
+ return intoRelationAddr;
+}
+
+
+/*
+ * create_ctas_nodata
+ *
+ * Create CTAS or materialized view when WITH NO DATA is used, starting from
+ * the targetlist of the SELECT or view definition.
+ */
+static ObjectAddress
+create_ctas_nodata(List *tlist, IntoClause *into)
+{
+ List *attrList;
+ ListCell *t,
+ *lc;
+
+ /*
+ * Build list of ColumnDefs from non-junk elements of the tlist. If a
+ * column name list was specified in CREATE TABLE AS, override the column
+ * names in the query. (Too few column names are OK, too many are not.)
+ */
+ attrList = NIL;
+ lc = list_head(into->colNames);
+ foreach(t, tlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(t);
+
+ if (!tle->resjunk)
+ {
+ ColumnDef *col;
+ char *colname;
+
+ if (lc)
+ {
+ colname = strVal(lfirst(lc));
+ lc = lnext(into->colNames, lc);
+ }
+ else
+ colname = tle->resname;
+
+ col = makeColumnDef(colname,
+ exprType((Node *) tle->expr),
+ exprTypmod((Node *) tle->expr),
+ exprCollation((Node *) tle->expr));
+
+ /*
+ * It's possible that the column is of a collatable type but the
+ * collation could not be resolved, so double-check. (We must
+ * check this here because DefineRelation would adopt the type's
+ * default collation rather than complaining.)
+ */
+ if (!OidIsValid(col->collOid) &&
+ type_is_collatable(col->typeName->typeOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("no collation was derived for column \"%s\" with collatable type %s",
+ col->colname,
+ format_type_be(col->typeName->typeOid)),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+ attrList = lappend(attrList, col);
+ }
+ }
+
+ if (lc != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("too many column names were specified")));
+
+ /* Create the relation definition using the ColumnDef list */
+ return create_ctas_internal(attrList, into);
+}
+
+
+/*
+ * ExecCreateTableAs -- execute a CREATE TABLE AS command
+ */
+ObjectAddress
+ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt,
+ ParamListInfo params, QueryEnvironment *queryEnv,
+ QueryCompletion *qc)
+{
+ Query *query = castNode(Query, stmt->query);
+ IntoClause *into = stmt->into;
+ bool is_matview = (into->viewQuery != NULL);
+ DestReceiver *dest;
+ Oid save_userid = InvalidOid;
+ int save_sec_context = 0;
+ int save_nestlevel = 0;
+ ObjectAddress address;
+ List *rewritten;
+ PlannedStmt *plan;
+ QueryDesc *queryDesc;
+
+ /* Check if the relation exists or not */
+ if (CreateTableAsRelExists(stmt))
+ return InvalidObjectAddress;
+
+ /*
+ * Create the tuple receiver object and insert info it will need
+ */
+ dest = CreateIntoRelDestReceiver(into);
+
+ /*
+ * The contained Query could be a SELECT, or an EXECUTE utility command.
+ * If the latter, we just pass it off to ExecuteQuery.
+ */
+ if (query->commandType == CMD_UTILITY &&
+ IsA(query->utilityStmt, ExecuteStmt))
+ {
+ ExecuteStmt *estmt = castNode(ExecuteStmt, query->utilityStmt);
+
+ Assert(!is_matview); /* excluded by syntax */
+ ExecuteQuery(pstate, estmt, into, params, dest, qc);
+
+ /* get object address that intorel_startup saved for us */
+ address = ((DR_intorel *) dest)->reladdr;
+
+ return address;
+ }
+ Assert(query->commandType == CMD_SELECT);
+
+ /*
+ * For materialized views, lock down security-restricted operations and
+ * arrange to make GUC variable changes local to this command. This is
+ * not necessary for security, but this keeps the behavior similar to
+ * REFRESH MATERIALIZED VIEW. Otherwise, one could create a materialized
+ * view not possible to refresh.
+ */
+ if (is_matview)
+ {
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+ SetUserIdAndSecContext(save_userid,
+ save_sec_context | SECURITY_RESTRICTED_OPERATION);
+ save_nestlevel = NewGUCNestLevel();
+ }
+
+ if (into->skipData)
+ {
+ /*
+ * If WITH NO DATA was specified, do not go through the rewriter,
+ * planner and executor. Just define the relation using a code path
+ * similar to CREATE VIEW. This avoids dump/restore problems stemming
+ * from running the planner before all dependencies are set up.
+ */
+ address = create_ctas_nodata(query->targetList, into);
+ }
+ else
+ {
+ /*
+ * Parse analysis was done already, but we still have to run the rule
+ * rewriter. We do not do AcquireRewriteLocks: we assume the query
+ * either came straight from the parser, or suitable locks were
+ * acquired by plancache.c.
+ */
+ rewritten = QueryRewrite(query);
+
+ /* SELECT should never rewrite to more or less than one SELECT query */
+ if (list_length(rewritten) != 1)
+ elog(ERROR, "unexpected rewrite result for %s",
+ is_matview ? "CREATE MATERIALIZED VIEW" :
+ "CREATE TABLE AS SELECT");
+ query = linitial_node(Query, rewritten);
+ Assert(query->commandType == CMD_SELECT);
+
+ /* plan the query */
+ plan = pg_plan_query(query, pstate->p_sourcetext,
+ CURSOR_OPT_PARALLEL_OK, params);
+
+ /*
+ * Use a snapshot with an updated command ID to ensure this query sees
+ * results of any previously executed queries. (This could only
+ * matter if the planner executed an allegedly-stable function that
+ * changed the database contents, but let's do it anyway to be
+ * parallel to the EXPLAIN code path.)
+ */
+ PushCopiedSnapshot(GetActiveSnapshot());
+ UpdateActiveSnapshotCommandId();
+
+ /* Create a QueryDesc, redirecting output to our tuple receiver */
+ queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext,
+ GetActiveSnapshot(), InvalidSnapshot,
+ dest, params, queryEnv, 0);
+
+ /* call ExecutorStart to prepare the plan for execution */
+ ExecutorStart(queryDesc, GetIntoRelEFlags(into));
+
+ /* run the plan to completion */
+ ExecutorRun(queryDesc, ForwardScanDirection, 0L, true);
+
+ /* save the rowcount if we're given a qc to fill */
+ if (qc)
+ SetQueryCompletion(qc, CMDTAG_SELECT, queryDesc->estate->es_processed);
+
+ /* get object address that intorel_startup saved for us */
+ address = ((DR_intorel *) dest)->reladdr;
+
+ /* and clean up */
+ ExecutorFinish(queryDesc);
+ ExecutorEnd(queryDesc);
+
+ FreeQueryDesc(queryDesc);
+
+ PopActiveSnapshot();
+ }
+
+ if (is_matview)
+ {
+ /* Roll back any GUC changes */
+ AtEOXact_GUC(false, save_nestlevel);
+
+ /* Restore userid and security context */
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+ }
+
+ return address;
+}
+
+/*
+ * GetIntoRelEFlags --- compute executor flags needed for CREATE TABLE AS
+ *
+ * This is exported because EXPLAIN and PREPARE need it too. (Note: those
+ * callers still need to deal explicitly with the skipData flag; since they
+ * use different methods for suppressing execution, it doesn't seem worth
+ * trying to encapsulate that part.)
+ */
+int
+GetIntoRelEFlags(IntoClause *intoClause)
+{
+ int flags = 0;
+
+ if (intoClause->skipData)
+ flags |= EXEC_FLAG_WITH_NO_DATA;
+
+ return flags;
+}
+
+/*
+ * CreateTableAsRelExists --- check existence of relation for CreateTableAsStmt
+ *
+ * Utility wrapper checking if the relation pending for creation in this
+ * CreateTableAsStmt query already exists or not. Returns true if the
+ * relation exists, otherwise false.
+ */
+bool
+CreateTableAsRelExists(CreateTableAsStmt *ctas)
+{
+ Oid nspid;
+ Oid oldrelid;
+ ObjectAddress address;
+ IntoClause *into = ctas->into;
+
+ nspid = RangeVarGetCreationNamespace(into->rel);
+
+ oldrelid = get_relname_relid(into->rel->relname, nspid);
+ if (OidIsValid(oldrelid))
+ {
+ if (!ctas->if_not_exists)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_TABLE),
+ errmsg("relation \"%s\" already exists",
+ into->rel->relname)));
+
+ /*
+ * The relation exists and IF NOT EXISTS has been specified.
+ *
+ * If we are in an extension script, insist that the pre-existing
+ * object be a member of the extension, to avoid security risks.
+ */
+ ObjectAddressSet(address, RelationRelationId, oldrelid);
+ checkMembershipInCurrentExtension(&address);
+
+ /* OK to skip */
+ ereport(NOTICE,
+ (errcode(ERRCODE_DUPLICATE_TABLE),
+ errmsg("relation \"%s\" already exists, skipping",
+ into->rel->relname)));
+ return true;
+ }
+
+ /* Relation does not exist, it can be created */
+ return false;
+}
+
+/*
+ * CreateIntoRelDestReceiver -- create a suitable DestReceiver object
+ *
+ * intoClause will be NULL if called from CreateDestReceiver(), in which
+ * case it has to be provided later. However, it is convenient to allow
+ * self->into to be filled in immediately for other callers.
+ */
+DestReceiver *
+CreateIntoRelDestReceiver(IntoClause *intoClause)
+{
+ DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel));
+
+ self->pub.receiveSlot = intorel_receive;
+ self->pub.rStartup = intorel_startup;
+ self->pub.rShutdown = intorel_shutdown;
+ self->pub.rDestroy = intorel_destroy;
+ self->pub.mydest = DestIntoRel;
+ self->into = intoClause;
+ /* other private fields will be set during intorel_startup */
+
+ return (DestReceiver *) self;
+}
+
+/*
+ * intorel_startup --- executor startup
+ */
+static void
+intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
+{
+ DR_intorel *myState = (DR_intorel *) self;
+ IntoClause *into = myState->into;
+ bool is_matview;
+ List *attrList;
+ ObjectAddress intoRelationAddr;
+ Relation intoRelationDesc;
+ ListCell *lc;
+ int attnum;
+
+ Assert(into != NULL); /* else somebody forgot to set it */
+
+ /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
+ is_matview = (into->viewQuery != NULL);
+
+ /*
+ * Build column definitions using "pre-cooked" type and collation info. If
+ * a column name list was specified in CREATE TABLE AS, override the
+ * column names derived from the query. (Too few column names are OK, too
+ * many are not.)
+ */
+ attrList = NIL;
+ lc = list_head(into->colNames);
+ for (attnum = 0; attnum < typeinfo->natts; attnum++)
+ {
+ Form_pg_attribute attribute = TupleDescAttr(typeinfo, attnum);
+ ColumnDef *col;
+ char *colname;
+
+ if (lc)
+ {
+ colname = strVal(lfirst(lc));
+ lc = lnext(into->colNames, lc);
+ }
+ else
+ colname = NameStr(attribute->attname);
+
+ col = makeColumnDef(colname,
+ attribute->atttypid,
+ attribute->atttypmod,
+ attribute->attcollation);
+
+ /*
+ * It's possible that the column is of a collatable type but the
+ * collation could not be resolved, so double-check. (We must check
+ * this here because DefineRelation would adopt the type's default
+ * collation rather than complaining.)
+ */
+ if (!OidIsValid(col->collOid) &&
+ type_is_collatable(col->typeName->typeOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("no collation was derived for column \"%s\" with collatable type %s",
+ col->colname,
+ format_type_be(col->typeName->typeOid)),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+ attrList = lappend(attrList, col);
+ }
+
+ if (lc != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("too many column names were specified")));
+
+ /*
+ * Actually create the target table
+ */
+ intoRelationAddr = create_ctas_internal(attrList, into);
+
+ /*
+ * Finally we can open the target table
+ */
+ intoRelationDesc = table_open(intoRelationAddr.objectId, AccessExclusiveLock);
+
+ /*
+ * Make sure the constructed table does not have RLS enabled.
+ *
+ * check_enable_rls() will ereport(ERROR) itself if the user has requested
+ * something invalid, and otherwise will return RLS_ENABLED if RLS should
+ * be enabled here. We don't actually support that currently, so throw
+ * our own ereport(ERROR) if that happens.
+ */
+ if (check_enable_rls(intoRelationAddr.objectId, InvalidOid, false) == RLS_ENABLED)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("policies not yet implemented for this command")));
+
+ /*
+ * Tentatively mark the target as populated, if it's a matview and we're
+ * going to fill it; otherwise, no change needed.
+ */
+ if (is_matview && !into->skipData)
+ SetMatViewPopulatedState(intoRelationDesc, true);
+
+ /*
+ * Fill private fields of myState for use by later routines
+ */
+ myState->rel = intoRelationDesc;
+ myState->reladdr = intoRelationAddr;
+ myState->output_cid = GetCurrentCommandId(true);
+ myState->ti_options = TABLE_INSERT_SKIP_FSM;
+
+ /*
+ * If WITH NO DATA is specified, there is no need to set up the state for
+ * bulk inserts as there are no tuples to insert.
+ */
+ if (!into->skipData)
+ myState->bistate = GetBulkInsertState();
+ else
+ myState->bistate = NULL;
+
+ /*
+ * Valid smgr_targblock implies something already wrote to the relation.
+ * This may be harmless, but this function hasn't planned for it.
+ */
+ Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
+}
+
+/*
+ * intorel_receive --- receive one tuple
+ */
+static bool
+intorel_receive(TupleTableSlot *slot, DestReceiver *self)
+{
+ DR_intorel *myState = (DR_intorel *) self;
+
+ /* Nothing to insert if WITH NO DATA is specified. */
+ if (!myState->into->skipData)
+ {
+ /*
+ * Note that the input slot might not be of the type of the target
+ * relation. That's supported by table_tuple_insert(), but slightly
+ * less efficient than inserting with the right slot - but the
+ * alternative would be to copy into a slot of the right type, which
+ * would not be cheap either. This also doesn't allow accessing per-AM
+ * data (say a tuple's xmin), but since we don't do that here...
+ */
+ table_tuple_insert(myState->rel,
+ slot,
+ myState->output_cid,
+ myState->ti_options,
+ myState->bistate);
+ }
+
+ /* We know this is a newly created relation, so there are no indexes */
+
+ return true;
+}
+
+/*
+ * intorel_shutdown --- executor end
+ */
+static void
+intorel_shutdown(DestReceiver *self)
+{
+ DR_intorel *myState = (DR_intorel *) self;
+ IntoClause *into = myState->into;
+
+ if (!into->skipData)
+ {
+ FreeBulkInsertState(myState->bistate);
+ table_finish_bulk_insert(myState->rel, myState->ti_options);
+ }
+
+ /* close rel, but keep lock until commit */
+ table_close(myState->rel, NoLock);
+ myState->rel = NULL;
+}
+
+/*
+ * intorel_destroy --- release DestReceiver object
+ */
+static void
+intorel_destroy(DestReceiver *self)
+{
+ pfree(self);
+}
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
new file mode 100644
index 0000000..93f0c73
--- /dev/null
+++ b/src/backend/commands/dbcommands.c
@@ -0,0 +1,3285 @@
+/*-------------------------------------------------------------------------
+ *
+ * dbcommands.c
+ * Database management commands (create/drop database).
+ *
+ * Note: database creation/destruction commands use exclusive locks on
+ * the database objects (as expressed by LockSharedObject()) to avoid
+ * stepping on each others' toes. Formerly we used table-level locks
+ * on pg_database, but that's too coarse-grained.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/dbcommands.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/multixact.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "access/xloginsert.h"
+#include "access/xlogrecovery.h"
+#include "access/xlogutils.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_database.h"
+#include "catalog/pg_db_role_setting.h"
+#include "catalog/pg_subscription.h"
+#include "catalog/pg_tablespace.h"
+#include "commands/comment.h"
+#include "commands/dbcommands.h"
+#include "commands/dbcommands_xlog.h"
+#include "commands/defrem.h"
+#include "commands/seclabel.h"
+#include "commands/tablespace.h"
+#include "common/file_perm.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "postmaster/bgwriter.h"
+#include "replication/slot.h"
+#include "storage/copydir.h"
+#include "storage/fd.h"
+#include "storage/ipc.h"
+#include "storage/lmgr.h"
+#include "storage/md.h"
+#include "storage/procarray.h"
+#include "storage/smgr.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/pg_locale.h"
+#include "utils/relmapper.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+
+/*
+ * Create database strategy.
+ *
+ * CREATEDB_WAL_LOG will copy the database at the block level and WAL log each
+ * copied block.
+ *
+ * CREATEDB_FILE_COPY will simply perform a file system level copy of the
+ * database and log a single record for each tablespace copied. To make this
+ * safe, it also triggers checkpoints before and after the operation.
+ */
+typedef enum CreateDBStrategy
+{
+ CREATEDB_WAL_LOG,
+ CREATEDB_FILE_COPY
+} CreateDBStrategy;
+
+typedef struct
+{
+ Oid src_dboid; /* source (template) DB */
+ Oid dest_dboid; /* DB we are trying to create */
+ CreateDBStrategy strategy; /* create db strategy */
+} createdb_failure_params;
+
+typedef struct
+{
+ Oid dest_dboid; /* DB we are trying to move */
+ Oid dest_tsoid; /* tablespace we are trying to move to */
+} movedb_failure_params;
+
+/*
+ * Information about a relation to be copied when creating a database.
+ */
+typedef struct CreateDBRelInfo
+{
+ RelFileNode rnode; /* physical relation identifier */
+ Oid reloid; /* relation oid */
+ bool permanent; /* relation is permanent or unlogged */
+} CreateDBRelInfo;
+
+
+/* non-export function prototypes */
+static void createdb_failure_callback(int code, Datum arg);
+static void movedb(const char *dbname, const char *tblspcname);
+static void movedb_failure_callback(int code, Datum arg);
+static bool get_db_info(const char *name, LOCKMODE lockmode,
+ Oid *dbIdP, Oid *ownerIdP,
+ int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
+ TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP,
+ Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale,
+ char *dbLocProvider,
+ char **dbCollversion);
+static bool have_createdb_privilege(void);
+static void remove_dbtablespaces(Oid db_id);
+static bool check_db_file_conflict(Oid db_id);
+static int errdetail_busy_db(int notherbackends, int npreparedxacts);
+static void CreateDatabaseUsingWalLog(Oid src_dboid, Oid dboid, Oid src_tsid,
+ Oid dst_tsid);
+static List *ScanSourceDatabasePgClass(Oid srctbid, Oid srcdbid, char *srcpath);
+static List *ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid,
+ Oid dbid, char *srcpath,
+ List *rnodelist, Snapshot snapshot);
+static CreateDBRelInfo *ScanSourceDatabasePgClassTuple(HeapTupleData *tuple,
+ Oid tbid, Oid dbid,
+ char *srcpath);
+static void CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid,
+ bool isRedo);
+static void CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dboid, Oid src_tsid,
+ Oid dst_tsid);
+static void recovery_create_dbdir(char *path, bool only_tblspc);
+
+/*
+ * Create a new database using the WAL_LOG strategy.
+ *
+ * Each copied block is separately written to the write-ahead log.
+ */
+static void
+CreateDatabaseUsingWalLog(Oid src_dboid, Oid dst_dboid,
+ Oid src_tsid, Oid dst_tsid)
+{
+ char *srcpath;
+ char *dstpath;
+ List *rnodelist = NULL;
+ ListCell *cell;
+ LockRelId srcrelid;
+ LockRelId dstrelid;
+ RelFileNode srcrnode;
+ RelFileNode dstrnode;
+ CreateDBRelInfo *relinfo;
+
+ /* Get source and destination database paths. */
+ srcpath = GetDatabasePath(src_dboid, src_tsid);
+ dstpath = GetDatabasePath(dst_dboid, dst_tsid);
+
+ /* Create database directory and write PG_VERSION file. */
+ CreateDirAndVersionFile(dstpath, dst_dboid, dst_tsid, false);
+
+ /* Copy relmap file from source database to the destination database. */
+ RelationMapCopy(dst_dboid, dst_tsid, srcpath, dstpath);
+
+ /* Get list of relfilenodes to copy from the source database. */
+ rnodelist = ScanSourceDatabasePgClass(src_tsid, src_dboid, srcpath);
+ Assert(rnodelist != NIL);
+
+ /*
+ * Database IDs will be the same for all relations so set them before
+ * entering the loop.
+ */
+ srcrelid.dbId = src_dboid;
+ dstrelid.dbId = dst_dboid;
+
+ /* Loop over our list of relfilenodes and copy each one. */
+ foreach(cell, rnodelist)
+ {
+ relinfo = lfirst(cell);
+ srcrnode = relinfo->rnode;
+
+ /*
+ * If the relation is from the source db's default tablespace then we
+ * need to create it in the destinations db's default tablespace.
+ * Otherwise, we need to create in the same tablespace as it is in the
+ * source database.
+ */
+ if (srcrnode.spcNode == src_tsid)
+ dstrnode.spcNode = dst_tsid;
+ else
+ dstrnode.spcNode = srcrnode.spcNode;
+
+ dstrnode.dbNode = dst_dboid;
+ dstrnode.relNode = srcrnode.relNode;
+
+ /*
+ * Acquire locks on source and target relations before copying.
+ *
+ * We typically do not read relation data into shared_buffers without
+ * holding a relation lock. It's unclear what could go wrong if we
+ * skipped it in this case, because nobody can be modifying either the
+ * source or destination database at this point, and we have locks on
+ * both databases, too, but let's take the conservative route.
+ */
+ dstrelid.relId = srcrelid.relId = relinfo->reloid;
+ LockRelationId(&srcrelid, AccessShareLock);
+ LockRelationId(&dstrelid, AccessShareLock);
+
+ /* Copy relation storage from source to the destination. */
+ CreateAndCopyRelationData(srcrnode, dstrnode, relinfo->permanent);
+
+ /* Release the relation locks. */
+ UnlockRelationId(&srcrelid, AccessShareLock);
+ UnlockRelationId(&dstrelid, AccessShareLock);
+ }
+
+ pfree(srcpath);
+ pfree(dstpath);
+ list_free_deep(rnodelist);
+}
+
+/*
+ * Scan the pg_class table in the source database to identify the relations
+ * that need to be copied to the destination database.
+ *
+ * This is an exception to the usual rule that cross-database access is
+ * not possible. We can make it work here because we know that there are no
+ * connections to the source database and (since there can't be prepared
+ * transactions touching that database) no in-doubt tuples either. This
+ * means that we don't need to worry about pruning removing anything from
+ * under us, and we don't need to be too picky about our snapshot either.
+ * As long as it sees all previously-committed XIDs as committed and all
+ * aborted XIDs as aborted, we should be fine: nothing else is possible
+ * here.
+ *
+ * We can't rely on the relcache for anything here, because that only knows
+ * about the database to which we are connected, and can't handle access to
+ * other databases. That also means we can't rely on the heap scan
+ * infrastructure, which would be a bad idea anyway since it might try
+ * to do things like HOT pruning which we definitely can't do safely in
+ * a database to which we're not even connected.
+ */
+static List *
+ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath)
+{
+ RelFileNode rnode;
+ BlockNumber nblocks;
+ BlockNumber blkno;
+ Buffer buf;
+ Oid relfilenode;
+ Page page;
+ List *rnodelist = NIL;
+ LockRelId relid;
+ Snapshot snapshot;
+ SMgrRelation smgr;
+ BufferAccessStrategy bstrategy;
+
+ /* Get pg_class relfilenode. */
+ relfilenode = RelationMapOidToFilenodeForDatabase(srcpath,
+ RelationRelationId);
+
+ /* Don't read data into shared_buffers without holding a relation lock. */
+ relid.dbId = dbid;
+ relid.relId = RelationRelationId;
+ LockRelationId(&relid, AccessShareLock);
+
+ /* Prepare a RelFileNode for the pg_class relation. */
+ rnode.spcNode = tbid;
+ rnode.dbNode = dbid;
+ rnode.relNode = relfilenode;
+
+ smgr = smgropen(rnode, InvalidBackendId);
+ nblocks = smgrnblocks(smgr, MAIN_FORKNUM);
+ smgrclose(smgr);
+
+ /* Use a buffer access strategy since this is a bulk read operation. */
+ bstrategy = GetAccessStrategy(BAS_BULKREAD);
+
+ /*
+ * As explained in the function header comments, we need a snapshot that
+ * will see all committed transactions as committed, and our transaction
+ * snapshot - or the active snapshot - might not be new enough for that,
+ * but the return value of GetLatestSnapshot() should work fine.
+ */
+ snapshot = GetLatestSnapshot();
+
+ /* Process the relation block by block. */
+ for (blkno = 0; blkno < nblocks; blkno++)
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ buf = ReadBufferWithoutRelcache(rnode, MAIN_FORKNUM, blkno,
+ RBM_NORMAL, bstrategy, true);
+
+ LockBuffer(buf, BUFFER_LOCK_SHARE);
+ page = BufferGetPage(buf);
+ if (PageIsNew(page) || PageIsEmpty(page))
+ {
+ UnlockReleaseBuffer(buf);
+ continue;
+ }
+
+ /* Append relevant pg_class tuples for current page to rnodelist. */
+ rnodelist = ScanSourceDatabasePgClassPage(page, buf, tbid, dbid,
+ srcpath, rnodelist,
+ snapshot);
+
+ UnlockReleaseBuffer(buf);
+ }
+
+ /* Release relation lock. */
+ UnlockRelationId(&relid, AccessShareLock);
+
+ return rnodelist;
+}
+
+/*
+ * Scan one page of the source database's pg_class relation and add relevant
+ * entries to rnodelist. The return value is the updated list.
+ */
+static List *
+ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid,
+ char *srcpath, List *rnodelist,
+ Snapshot snapshot)
+{
+ BlockNumber blkno = BufferGetBlockNumber(buf);
+ OffsetNumber offnum;
+ OffsetNumber maxoff;
+ HeapTupleData tuple;
+
+ maxoff = PageGetMaxOffsetNumber(page);
+
+ /* Loop over offsets. */
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ ItemId itemid;
+
+ itemid = PageGetItemId(page, offnum);
+
+ /* Nothing to do if slot is empty or already dead. */
+ if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid) ||
+ ItemIdIsRedirected(itemid))
+ continue;
+
+ Assert(ItemIdIsNormal(itemid));
+ ItemPointerSet(&(tuple.t_self), blkno, offnum);
+
+ /* Initialize a HeapTupleData structure. */
+ tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = RelationRelationId;
+
+ /* Skip tuples that are not visible to this snapshot. */
+ if (HeapTupleSatisfiesVisibility(&tuple, snapshot, buf))
+ {
+ CreateDBRelInfo *relinfo;
+
+ /*
+ * ScanSourceDatabasePgClassTuple is in charge of constructing a
+ * CreateDBRelInfo object for this tuple, but can also decide that
+ * this tuple isn't something we need to copy. If we do need to
+ * copy the relation, add it to the list.
+ */
+ relinfo = ScanSourceDatabasePgClassTuple(&tuple, tbid, dbid,
+ srcpath);
+ if (relinfo != NULL)
+ rnodelist = lappend(rnodelist, relinfo);
+ }
+ }
+
+ return rnodelist;
+}
+
+/*
+ * Decide whether a certain pg_class tuple represents something that
+ * needs to be copied from the source database to the destination database,
+ * and if so, construct a CreateDBRelInfo for it.
+ *
+ * Visibility checks are handled by the caller, so our job here is just
+ * to assess the data stored in the tuple.
+ */
+CreateDBRelInfo *
+ScanSourceDatabasePgClassTuple(HeapTupleData *tuple, Oid tbid, Oid dbid,
+ char *srcpath)
+{
+ CreateDBRelInfo *relinfo;
+ Form_pg_class classForm;
+ Oid relfilenode = InvalidOid;
+
+ classForm = (Form_pg_class) GETSTRUCT(tuple);
+
+ /*
+ * Return NULL if this object does not need to be copied.
+ *
+ * Shared objects don't need to be copied, because they are shared.
+ * Objects without storage can't be copied, because there's nothing to
+ * copy. Temporary relations don't need to be copied either, because they
+ * are inaccessible outside of the session that created them, which must
+ * be gone already, and couldn't connect to a different database if it
+ * still existed. autovacuum will eventually remove the pg_class entries
+ * as well.
+ */
+ if (classForm->reltablespace == GLOBALTABLESPACE_OID ||
+ !RELKIND_HAS_STORAGE(classForm->relkind) ||
+ classForm->relpersistence == RELPERSISTENCE_TEMP)
+ return NULL;
+
+ /*
+ * If relfilenode is valid then directly use it. Otherwise, consult the
+ * relmap.
+ */
+ if (OidIsValid(classForm->relfilenode))
+ relfilenode = classForm->relfilenode;
+ else
+ relfilenode = RelationMapOidToFilenodeForDatabase(srcpath,
+ classForm->oid);
+
+ /* We must have a valid relfilenode oid. */
+ if (!OidIsValid(relfilenode))
+ elog(ERROR, "relation with OID %u does not have a valid relfilenode",
+ classForm->oid);
+
+ /* Prepare a rel info element and add it to the list. */
+ relinfo = (CreateDBRelInfo *) palloc(sizeof(CreateDBRelInfo));
+ if (OidIsValid(classForm->reltablespace))
+ relinfo->rnode.spcNode = classForm->reltablespace;
+ else
+ relinfo->rnode.spcNode = tbid;
+
+ relinfo->rnode.dbNode = dbid;
+ relinfo->rnode.relNode = relfilenode;
+ relinfo->reloid = classForm->oid;
+
+ /* Temporary relations were rejected above. */
+ Assert(classForm->relpersistence != RELPERSISTENCE_TEMP);
+ relinfo->permanent =
+ (classForm->relpersistence == RELPERSISTENCE_PERMANENT) ? true : false;
+
+ return relinfo;
+}
+
+/*
+ * Create database directory and write out the PG_VERSION file in the database
+ * path. If isRedo is true, it's okay for the database directory to exist
+ * already.
+ */
+static void
+CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo)
+{
+ int fd;
+ int nbytes;
+ char versionfile[MAXPGPATH];
+ char buf[16];
+
+ /*
+ * Prepare version data before starting a critical section.
+ *
+ * Note that we don't have to copy this from the source database; there's
+ * only one legal value.
+ */
+ sprintf(buf, "%s\n", PG_MAJORVERSION);
+ nbytes = strlen(PG_MAJORVERSION) + 1;
+
+ /* If we are not in WAL replay then write the WAL. */
+ if (!isRedo)
+ {
+ xl_dbase_create_wal_log_rec xlrec;
+ XLogRecPtr lsn;
+
+ START_CRIT_SECTION();
+
+ xlrec.db_id = dbid;
+ xlrec.tablespace_id = tsid;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec),
+ sizeof(xl_dbase_create_wal_log_rec));
+
+ lsn = XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG);
+
+ /* As always, WAL must hit the disk before the data update does. */
+ XLogFlush(lsn);
+ }
+
+ /* Create database directory. */
+ if (MakePGDirectory(dbpath) < 0)
+ {
+ /* Failure other than already exists or not in WAL replay? */
+ if (errno != EEXIST || !isRedo)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create directory \"%s\": %m", dbpath)));
+ }
+
+ /*
+ * Create PG_VERSION file in the database path. If the file already
+ * exists and we are in WAL replay then try again to open it in write
+ * mode.
+ */
+ snprintf(versionfile, sizeof(versionfile), "%s/%s", dbpath, "PG_VERSION");
+
+ fd = OpenTransientFile(versionfile, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY);
+ if (fd < 0 && errno == EEXIST && isRedo)
+ fd = OpenTransientFile(versionfile, O_WRONLY | O_TRUNC | PG_BINARY);
+
+ if (fd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m", versionfile)));
+
+ /* Write PG_MAJORVERSION in the PG_VERSION file. */
+ pgstat_report_wait_start(WAIT_EVENT_VERSION_FILE_WRITE);
+ errno = 0;
+ if ((int) write(fd, buf, nbytes) != nbytes)
+ {
+ /* If write didn't set errno, assume problem is no disk space. */
+ if (errno == 0)
+ errno = ENOSPC;
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to file \"%s\": %m", versionfile)));
+ }
+ pgstat_report_wait_end();
+
+ /* Close the version file. */
+ CloseTransientFile(fd);
+
+ /* Critical section done. */
+ if (!isRedo)
+ END_CRIT_SECTION();
+}
+
+/*
+ * Create a new database using the FILE_COPY strategy.
+ *
+ * Copy each tablespace at the filesystem level, and log a single WAL record
+ * for each tablespace copied. This requires a checkpoint before and after the
+ * copy, which may be expensive, but it does greatly reduce WAL generation
+ * if the copied database is large.
+ */
+static void
+CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid,
+ Oid dst_tsid)
+{
+ TableScanDesc scan;
+ Relation rel;
+ HeapTuple tuple;
+
+ /*
+ * Force a checkpoint before starting the copy. This will force all dirty
+ * buffers, including those of unlogged tables, out to disk, to ensure
+ * source database is up-to-date on disk for the copy.
+ * FlushDatabaseBuffers() would suffice for that, but we also want to
+ * process any pending unlink requests. Otherwise, if a checkpoint
+ * happened while we're copying files, a file might be deleted just when
+ * we're about to copy it, causing the lstat() call in copydir() to fail
+ * with ENOENT.
+ */
+ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE |
+ CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL);
+
+ /*
+ * Iterate through all tablespaces of the template database, and copy each
+ * one to the new database.
+ */
+ rel = table_open(TableSpaceRelationId, AccessShareLock);
+ scan = table_beginscan_catalog(rel, 0, NULL);
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ Form_pg_tablespace spaceform = (Form_pg_tablespace) GETSTRUCT(tuple);
+ Oid srctablespace = spaceform->oid;
+ Oid dsttablespace;
+ char *srcpath;
+ char *dstpath;
+ struct stat st;
+
+ /* No need to copy global tablespace */
+ if (srctablespace == GLOBALTABLESPACE_OID)
+ continue;
+
+ srcpath = GetDatabasePath(src_dboid, srctablespace);
+
+ if (stat(srcpath, &st) < 0 || !S_ISDIR(st.st_mode) ||
+ directory_is_empty(srcpath))
+ {
+ /* Assume we can ignore it */
+ pfree(srcpath);
+ continue;
+ }
+
+ if (srctablespace == src_tsid)
+ dsttablespace = dst_tsid;
+ else
+ dsttablespace = srctablespace;
+
+ dstpath = GetDatabasePath(dst_dboid, dsttablespace);
+
+ /*
+ * Copy this subdirectory to the new location
+ *
+ * We don't need to copy subdirectories
+ */
+ copydir(srcpath, dstpath, false);
+
+ /* Record the filesystem change in XLOG */
+ {
+ xl_dbase_create_file_copy_rec xlrec;
+
+ xlrec.db_id = dst_dboid;
+ xlrec.tablespace_id = dsttablespace;
+ xlrec.src_db_id = src_dboid;
+ xlrec.src_tablespace_id = srctablespace;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec,
+ sizeof(xl_dbase_create_file_copy_rec));
+
+ (void) XLogInsert(RM_DBASE_ID,
+ XLOG_DBASE_CREATE_FILE_COPY | XLR_SPECIAL_REL_UPDATE);
+ }
+ pfree(srcpath);
+ pfree(dstpath);
+ }
+ table_endscan(scan);
+ table_close(rel, AccessShareLock);
+
+ /*
+ * We force a checkpoint before committing. This effectively means that
+ * committed XLOG_DBASE_CREATE_FILE_COPY operations will never need to be
+ * replayed (at least not in ordinary crash recovery; we still have to
+ * make the XLOG entry for the benefit of PITR operations). This avoids
+ * two nasty scenarios:
+ *
+ * #1: When PITR is off, we don't XLOG the contents of newly created
+ * indexes; therefore the drop-and-recreate-whole-directory behavior of
+ * DBASE_CREATE replay would lose such indexes.
+ *
+ * #2: Since we have to recopy the source database during DBASE_CREATE
+ * replay, we run the risk of copying changes in it that were committed
+ * after the original CREATE DATABASE command but before the system crash
+ * that led to the replay. This is at least unexpected and at worst could
+ * lead to inconsistencies, eg duplicate table names.
+ *
+ * (Both of these were real bugs in releases 8.0 through 8.0.3.)
+ *
+ * In PITR replay, the first of these isn't an issue, and the second is
+ * only a risk if the CREATE DATABASE and subsequent template database
+ * change both occur while a base backup is being taken. There doesn't
+ * seem to be much we can do about that except document it as a
+ * limitation.
+ *
+ * See CreateDatabaseUsingWalLog() for a less cheesy CREATE DATABASE
+ * strategy that avoids these problems.
+ */
+ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+}
+
+/*
+ * CREATE DATABASE
+ */
+Oid
+createdb(ParseState *pstate, const CreatedbStmt *stmt)
+{
+ Oid src_dboid;
+ Oid src_owner;
+ int src_encoding = -1;
+ char *src_collate = NULL;
+ char *src_ctype = NULL;
+ char *src_iculocale = NULL;
+ char src_locprovider = '\0';
+ char *src_collversion = NULL;
+ bool src_istemplate;
+ bool src_allowconn;
+ TransactionId src_frozenxid = InvalidTransactionId;
+ MultiXactId src_minmxid = InvalidMultiXactId;
+ Oid src_deftablespace;
+ volatile Oid dst_deftablespace;
+ Relation pg_database_rel;
+ HeapTuple tuple;
+ Datum new_record[Natts_pg_database];
+ bool new_record_nulls[Natts_pg_database];
+ Oid dboid = InvalidOid;
+ Oid datdba;
+ ListCell *option;
+ DefElem *dtablespacename = NULL;
+ DefElem *downer = NULL;
+ DefElem *dtemplate = NULL;
+ DefElem *dencoding = NULL;
+ DefElem *dlocale = NULL;
+ DefElem *dcollate = NULL;
+ DefElem *dctype = NULL;
+ DefElem *diculocale = NULL;
+ DefElem *dlocprovider = NULL;
+ DefElem *distemplate = NULL;
+ DefElem *dallowconnections = NULL;
+ DefElem *dconnlimit = NULL;
+ DefElem *dcollversion = NULL;
+ DefElem *dstrategy = NULL;
+ char *dbname = stmt->dbname;
+ char *dbowner = NULL;
+ const char *dbtemplate = NULL;
+ char *dbcollate = NULL;
+ char *dbctype = NULL;
+ char *dbiculocale = NULL;
+ char dblocprovider = '\0';
+ char *canonname;
+ int encoding = -1;
+ bool dbistemplate = false;
+ bool dballowconnections = true;
+ int dbconnlimit = DATCONNLIMIT_UNLIMITED;
+ char *dbcollversion = NULL;
+ int notherbackends;
+ int npreparedxacts;
+ CreateDBStrategy dbstrategy = CREATEDB_WAL_LOG;
+ createdb_failure_params fparms;
+
+ /* Extract options from the statement node tree */
+ foreach(option, stmt->options)
+ {
+ DefElem *defel = (DefElem *) lfirst(option);
+
+ if (strcmp(defel->defname, "tablespace") == 0)
+ {
+ if (dtablespacename)
+ errorConflictingDefElem(defel, pstate);
+ dtablespacename = defel;
+ }
+ else if (strcmp(defel->defname, "owner") == 0)
+ {
+ if (downer)
+ errorConflictingDefElem(defel, pstate);
+ downer = defel;
+ }
+ else if (strcmp(defel->defname, "template") == 0)
+ {
+ if (dtemplate)
+ errorConflictingDefElem(defel, pstate);
+ dtemplate = defel;
+ }
+ else if (strcmp(defel->defname, "encoding") == 0)
+ {
+ if (dencoding)
+ errorConflictingDefElem(defel, pstate);
+ dencoding = defel;
+ }
+ else if (strcmp(defel->defname, "locale") == 0)
+ {
+ if (dlocale)
+ errorConflictingDefElem(defel, pstate);
+ dlocale = defel;
+ }
+ else if (strcmp(defel->defname, "lc_collate") == 0)
+ {
+ if (dcollate)
+ errorConflictingDefElem(defel, pstate);
+ dcollate = defel;
+ }
+ else if (strcmp(defel->defname, "lc_ctype") == 0)
+ {
+ if (dctype)
+ errorConflictingDefElem(defel, pstate);
+ dctype = defel;
+ }
+ else if (strcmp(defel->defname, "icu_locale") == 0)
+ {
+ if (diculocale)
+ errorConflictingDefElem(defel, pstate);
+ diculocale = defel;
+ }
+ else if (strcmp(defel->defname, "locale_provider") == 0)
+ {
+ if (dlocprovider)
+ errorConflictingDefElem(defel, pstate);
+ dlocprovider = defel;
+ }
+ else if (strcmp(defel->defname, "is_template") == 0)
+ {
+ if (distemplate)
+ errorConflictingDefElem(defel, pstate);
+ distemplate = defel;
+ }
+ else if (strcmp(defel->defname, "allow_connections") == 0)
+ {
+ if (dallowconnections)
+ errorConflictingDefElem(defel, pstate);
+ dallowconnections = defel;
+ }
+ else if (strcmp(defel->defname, "connection_limit") == 0)
+ {
+ if (dconnlimit)
+ errorConflictingDefElem(defel, pstate);
+ dconnlimit = defel;
+ }
+ else if (strcmp(defel->defname, "collation_version") == 0)
+ {
+ if (dcollversion)
+ errorConflictingDefElem(defel, pstate);
+ dcollversion = defel;
+ }
+ else if (strcmp(defel->defname, "location") == 0)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("LOCATION is not supported anymore"),
+ errhint("Consider using tablespaces instead."),
+ parser_errposition(pstate, defel->location)));
+ }
+ else if (strcmp(defel->defname, "oid") == 0)
+ {
+ dboid = defGetObjectId(defel);
+
+ /*
+ * We don't normally permit new databases to be created with
+ * system-assigned OIDs. pg_upgrade tries to preserve database
+ * OIDs, so we can't allow any database to be created with an OID
+ * that might be in use in a freshly-initialized cluster created
+ * by some future version. We assume all such OIDs will be from
+ * the system-managed OID range.
+ *
+ * As an exception, however, we permit any OID to be assigned when
+ * allow_system_table_mods=on (so that initdb can assign system
+ * OIDs to template0 and postgres) or when performing a binary
+ * upgrade (so that pg_upgrade can preserve whatever OIDs it finds
+ * in the source cluster).
+ */
+ if (dboid < FirstNormalObjectId &&
+ !allowSystemTableMods && !IsBinaryUpgrade)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE)),
+ errmsg("OIDs less than %u are reserved for system objects", FirstNormalObjectId));
+ }
+ else if (strcmp(defel->defname, "strategy") == 0)
+ {
+ if (dstrategy)
+ errorConflictingDefElem(defel, pstate);
+ dstrategy = defel;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("option \"%s\" not recognized", defel->defname),
+ parser_errposition(pstate, defel->location)));
+ }
+
+ if (downer && downer->arg)
+ dbowner = defGetString(downer);
+ if (dtemplate && dtemplate->arg)
+ dbtemplate = defGetString(dtemplate);
+ if (dencoding && dencoding->arg)
+ {
+ const char *encoding_name;
+
+ if (IsA(dencoding->arg, Integer))
+ {
+ encoding = defGetInt32(dencoding);
+ encoding_name = pg_encoding_to_char(encoding);
+ if (strcmp(encoding_name, "") == 0 ||
+ pg_valid_server_encoding(encoding_name) < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("%d is not a valid encoding code",
+ encoding),
+ parser_errposition(pstate, dencoding->location)));
+ }
+ else
+ {
+ encoding_name = defGetString(dencoding);
+ encoding = pg_valid_server_encoding(encoding_name);
+ if (encoding < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("%s is not a valid encoding name",
+ encoding_name),
+ parser_errposition(pstate, dencoding->location)));
+ }
+ }
+ if (dlocale && dlocale->arg)
+ {
+ dbcollate = defGetString(dlocale);
+ dbctype = defGetString(dlocale);
+ }
+ if (dcollate && dcollate->arg)
+ dbcollate = defGetString(dcollate);
+ if (dctype && dctype->arg)
+ dbctype = defGetString(dctype);
+ if (diculocale && diculocale->arg)
+ dbiculocale = defGetString(diculocale);
+ if (dlocprovider && dlocprovider->arg)
+ {
+ char *locproviderstr = defGetString(dlocprovider);
+
+ if (pg_strcasecmp(locproviderstr, "icu") == 0)
+ dblocprovider = COLLPROVIDER_ICU;
+ else if (pg_strcasecmp(locproviderstr, "libc") == 0)
+ dblocprovider = COLLPROVIDER_LIBC;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("unrecognized locale provider: %s",
+ locproviderstr)));
+ }
+ if (distemplate && distemplate->arg)
+ dbistemplate = defGetBoolean(distemplate);
+ if (dallowconnections && dallowconnections->arg)
+ dballowconnections = defGetBoolean(dallowconnections);
+ if (dconnlimit && dconnlimit->arg)
+ {
+ dbconnlimit = defGetInt32(dconnlimit);
+ if (dbconnlimit < DATCONNLIMIT_UNLIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid connection limit: %d", dbconnlimit)));
+ }
+ if (dcollversion)
+ dbcollversion = defGetString(dcollversion);
+
+ /* obtain OID of proposed owner */
+ if (dbowner)
+ datdba = get_role_oid(dbowner, false);
+ else
+ datdba = GetUserId();
+
+ /*
+ * To create a database, must have createdb privilege and must be able to
+ * become the target role (this does not imply that the target role itself
+ * must have createdb privilege). The latter provision guards against
+ * "giveaway" attacks. Note that a superuser will always have both of
+ * these privileges a fortiori.
+ */
+ if (!have_createdb_privilege())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to create database")));
+
+ check_is_member_of_role(GetUserId(), datdba);
+
+ /*
+ * Lookup database (template) to be cloned, and obtain share lock on it.
+ * ShareLock allows two CREATE DATABASEs to work from the same template
+ * concurrently, while ensuring no one is busy dropping it in parallel
+ * (which would be Very Bad since we'd likely get an incomplete copy
+ * without knowing it). This also prevents any new connections from being
+ * made to the source until we finish copying it, so we can be sure it
+ * won't change underneath us.
+ */
+ if (!dbtemplate)
+ dbtemplate = "template1"; /* Default template database name */
+
+ if (!get_db_info(dbtemplate, ShareLock,
+ &src_dboid, &src_owner, &src_encoding,
+ &src_istemplate, &src_allowconn,
+ &src_frozenxid, &src_minmxid, &src_deftablespace,
+ &src_collate, &src_ctype, &src_iculocale, &src_locprovider,
+ &src_collversion))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_DATABASE),
+ errmsg("template database \"%s\" does not exist",
+ dbtemplate)));
+
+ /*
+ * If the source database was in the process of being dropped, we can't
+ * use it as a template.
+ */
+ if (database_is_invalid_oid(src_dboid))
+ ereport(ERROR,
+ errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot use invalid database \"%s\" as template", dbtemplate),
+ errhint("Use DROP DATABASE to drop invalid databases."));
+
+ /*
+ * Permission check: to copy a DB that's not marked datistemplate, you
+ * must be superuser or the owner thereof.
+ */
+ if (!src_istemplate)
+ {
+ if (!pg_database_ownercheck(src_dboid, GetUserId()))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to copy database \"%s\"",
+ dbtemplate)));
+ }
+
+ /* Validate the database creation strategy. */
+ if (dstrategy && dstrategy->arg)
+ {
+ char *strategy;
+
+ strategy = defGetString(dstrategy);
+ if (strcmp(strategy, "wal_log") == 0)
+ dbstrategy = CREATEDB_WAL_LOG;
+ else if (strcmp(strategy, "file_copy") == 0)
+ dbstrategy = CREATEDB_FILE_COPY;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid create database strategy \"%s\"", strategy),
+ errhint("Valid strategies are \"wal_log\", and \"file_copy\".")));
+ }
+
+ /* If encoding or locales are defaulted, use source's setting */
+ if (encoding < 0)
+ encoding = src_encoding;
+ if (dbcollate == NULL)
+ dbcollate = src_collate;
+ if (dbctype == NULL)
+ dbctype = src_ctype;
+ if (dblocprovider == '\0')
+ dblocprovider = src_locprovider;
+ if (dbiculocale == NULL && dblocprovider == COLLPROVIDER_ICU)
+ dbiculocale = src_iculocale;
+
+ /* Some encodings are client only */
+ if (!PG_VALID_BE_ENCODING(encoding))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid server encoding %d", encoding)));
+
+ /* Check that the chosen locales are valid, and get canonical spellings */
+ if (!check_locale(LC_COLLATE, dbcollate, &canonname))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid locale name: \"%s\"", dbcollate)));
+ dbcollate = canonname;
+ if (!check_locale(LC_CTYPE, dbctype, &canonname))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid locale name: \"%s\"", dbctype)));
+ dbctype = canonname;
+
+ check_encoding_locale_matches(encoding, dbcollate, dbctype);
+
+ if (dblocprovider == COLLPROVIDER_ICU)
+ {
+ if (!(is_encoding_supported_by_icu(encoding)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("encoding \"%s\" is not supported with ICU provider",
+ pg_encoding_to_char(encoding))));
+
+ /*
+ * This would happen if template0 uses the libc provider but the new
+ * database uses icu.
+ */
+ if (!dbiculocale)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("ICU locale must be specified")));
+
+ check_icu_locale(dbiculocale);
+ }
+ else
+ {
+ if (dbiculocale)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("ICU locale cannot be specified unless locale provider is ICU")));
+ }
+
+ /*
+ * Check that the new encoding and locale settings match the source
+ * database. We insist on this because we simply copy the source data ---
+ * any non-ASCII data would be wrongly encoded, and any indexes sorted
+ * according to the source locale would be wrong.
+ *
+ * However, we assume that template0 doesn't contain any non-ASCII data
+ * nor any indexes that depend on collation or ctype, so template0 can be
+ * used as template for creating a database with any encoding or locale.
+ */
+ if (strcmp(dbtemplate, "template0") != 0)
+ {
+ if (encoding != src_encoding)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("new encoding (%s) is incompatible with the encoding of the template database (%s)",
+ pg_encoding_to_char(encoding),
+ pg_encoding_to_char(src_encoding)),
+ errhint("Use the same encoding as in the template database, or use template0 as template.")));
+
+ if (strcmp(dbcollate, src_collate) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("new collation (%s) is incompatible with the collation of the template database (%s)",
+ dbcollate, src_collate),
+ errhint("Use the same collation as in the template database, or use template0 as template.")));
+
+ if (strcmp(dbctype, src_ctype) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("new LC_CTYPE (%s) is incompatible with the LC_CTYPE of the template database (%s)",
+ dbctype, src_ctype),
+ errhint("Use the same LC_CTYPE as in the template database, or use template0 as template.")));
+
+ if (dblocprovider != src_locprovider)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("new locale provider (%s) does not match locale provider of the template database (%s)",
+ collprovider_name(dblocprovider), collprovider_name(src_locprovider)),
+ errhint("Use the same locale provider as in the template database, or use template0 as template.")));
+
+ if (dblocprovider == COLLPROVIDER_ICU)
+ {
+ Assert(dbiculocale);
+ Assert(src_iculocale);
+ if (strcmp(dbiculocale, src_iculocale) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("new ICU locale (%s) is incompatible with the ICU locale of the template database (%s)",
+ dbiculocale, src_iculocale),
+ errhint("Use the same ICU locale as in the template database, or use template0 as template.")));
+ }
+ }
+
+ /*
+ * If we got a collation version for the template database, check that it
+ * matches the actual OS collation version. Otherwise error; the user
+ * needs to fix the template database first. Don't complain if a
+ * collation version was specified explicitly as a statement option; that
+ * is used by pg_upgrade to reproduce the old state exactly.
+ *
+ * (If the template database has no collation version, then either the
+ * platform/provider does not support collation versioning, or it's
+ * template0, for which we stipulate that it does not contain
+ * collation-using objects.)
+ */
+ if (src_collversion && !dcollversion)
+ {
+ char *actual_versionstr;
+
+ actual_versionstr = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dbiculocale : dbcollate);
+ if (!actual_versionstr)
+ ereport(ERROR,
+ (errmsg("template database \"%s\" has a collation version, but no actual collation version could be determined",
+ dbtemplate)));
+
+ if (strcmp(actual_versionstr, src_collversion) != 0)
+ ereport(ERROR,
+ (errmsg("template database \"%s\" has a collation version mismatch",
+ dbtemplate),
+ errdetail("The template database was created using collation version %s, "
+ "but the operating system provides version %s.",
+ src_collversion, actual_versionstr),
+ errhint("Rebuild all objects in the template database that use the default collation and run "
+ "ALTER DATABASE %s REFRESH COLLATION VERSION, "
+ "or build PostgreSQL with the right library version.",
+ quote_identifier(dbtemplate))));
+ }
+
+ if (dbcollversion == NULL)
+ dbcollversion = src_collversion;
+
+ /*
+ * Normally, we copy the collation version from the template database.
+ * This last resort only applies if the template database does not have a
+ * collation version, which is normally only the case for template0.
+ */
+ if (dbcollversion == NULL)
+ dbcollversion = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dbiculocale : dbcollate);
+
+ /* Resolve default tablespace for new database */
+ if (dtablespacename && dtablespacename->arg)
+ {
+ char *tablespacename;
+ AclResult aclresult;
+
+ tablespacename = defGetString(dtablespacename);
+ dst_deftablespace = get_tablespace_oid(tablespacename, false);
+ /* check permissions */
+ aclresult = pg_tablespace_aclcheck(dst_deftablespace, GetUserId(),
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_TABLESPACE,
+ tablespacename);
+
+ /* pg_global must never be the default tablespace */
+ if (dst_deftablespace == GLOBALTABLESPACE_OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("pg_global cannot be used as default tablespace")));
+
+ /*
+ * If we are trying to change the default tablespace of the template,
+ * we require that the template not have any files in the new default
+ * tablespace. This is necessary because otherwise the copied
+ * database would contain pg_class rows that refer to its default
+ * tablespace both explicitly (by OID) and implicitly (as zero), which
+ * would cause problems. For example another CREATE DATABASE using
+ * the copied database as template, and trying to change its default
+ * tablespace again, would yield outright incorrect results (it would
+ * improperly move tables to the new default tablespace that should
+ * stay in the same tablespace).
+ */
+ if (dst_deftablespace != src_deftablespace)
+ {
+ char *srcpath;
+ struct stat st;
+
+ srcpath = GetDatabasePath(src_dboid, dst_deftablespace);
+
+ if (stat(srcpath, &st) == 0 &&
+ S_ISDIR(st.st_mode) &&
+ !directory_is_empty(srcpath))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot assign new default tablespace \"%s\"",
+ tablespacename),
+ errdetail("There is a conflict because database \"%s\" already has some tables in this tablespace.",
+ dbtemplate)));
+ pfree(srcpath);
+ }
+ }
+ else
+ {
+ /* Use template database's default tablespace */
+ dst_deftablespace = src_deftablespace;
+ /* Note there is no additional permission check in this path */
+ }
+
+ /*
+ * If built with appropriate switch, whine when regression-testing
+ * conventions for database names are violated. But don't complain during
+ * initdb.
+ */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+ if (IsUnderPostmaster && strstr(dbname, "regression") == NULL)
+ elog(WARNING, "databases created by regression test cases should have names including \"regression\"");
+#endif
+
+ /*
+ * Check for db name conflict. This is just to give a more friendly error
+ * message than "unique index violation". There's a race condition but
+ * we're willing to accept the less friendly message in that case.
+ */
+ if (OidIsValid(get_database_oid(dbname, true)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_DATABASE),
+ errmsg("database \"%s\" already exists", dbname)));
+
+ /*
+ * The source DB can't have any active backends, except this one
+ * (exception is to allow CREATE DB while connected to template1).
+ * Otherwise we might copy inconsistent data.
+ *
+ * This should be last among the basic error checks, because it involves
+ * potential waiting; we may as well throw an error first if we're gonna
+ * throw one.
+ */
+ if (CountOtherDBBackends(src_dboid, &notherbackends, &npreparedxacts))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ errmsg("source database \"%s\" is being accessed by other users",
+ dbtemplate),
+ errdetail_busy_db(notherbackends, npreparedxacts)));
+
+ /*
+ * Select an OID for the new database, checking that it doesn't have a
+ * filename conflict with anything already existing in the tablespace
+ * directories.
+ */
+ pg_database_rel = table_open(DatabaseRelationId, RowExclusiveLock);
+
+ /*
+ * If database OID is configured, check if the OID is already in use or
+ * data directory already exists.
+ */
+ if (OidIsValid(dboid))
+ {
+ char *existing_dbname = get_database_name(dboid);
+
+ if (existing_dbname != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE)),
+ errmsg("database OID %u is already in use by database \"%s\"",
+ dboid, existing_dbname));
+
+ if (check_db_file_conflict(dboid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE)),
+ errmsg("data directory with the specified OID %u already exists", dboid));
+ }
+ else
+ {
+ /* Select an OID for the new database if is not explicitly configured. */
+ do
+ {
+ dboid = GetNewOidWithIndex(pg_database_rel, DatabaseOidIndexId,
+ Anum_pg_database_oid);
+ } while (check_db_file_conflict(dboid));
+ }
+
+ /*
+ * Insert a new tuple into pg_database. This establishes our ownership of
+ * the new database name (anyone else trying to insert the same name will
+ * block on the unique index, and fail after we commit).
+ */
+
+ Assert((dblocprovider == COLLPROVIDER_ICU && dbiculocale) ||
+ (dblocprovider != COLLPROVIDER_ICU && !dbiculocale));
+
+ /* Form tuple */
+ MemSet(new_record, 0, sizeof(new_record));
+ MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+
+ new_record[Anum_pg_database_oid - 1] = ObjectIdGetDatum(dboid);
+ new_record[Anum_pg_database_datname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(dbname));
+ new_record[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(datdba);
+ new_record[Anum_pg_database_encoding - 1] = Int32GetDatum(encoding);
+ new_record[Anum_pg_database_datlocprovider - 1] = CharGetDatum(dblocprovider);
+ new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate);
+ new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(dballowconnections);
+ new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit);
+ new_record[Anum_pg_database_datfrozenxid - 1] = TransactionIdGetDatum(src_frozenxid);
+ new_record[Anum_pg_database_datminmxid - 1] = TransactionIdGetDatum(src_minmxid);
+ new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_deftablespace);
+ new_record[Anum_pg_database_datcollate - 1] = CStringGetTextDatum(dbcollate);
+ new_record[Anum_pg_database_datctype - 1] = CStringGetTextDatum(dbctype);
+ if (dbiculocale)
+ new_record[Anum_pg_database_daticulocale - 1] = CStringGetTextDatum(dbiculocale);
+ else
+ new_record_nulls[Anum_pg_database_daticulocale - 1] = true;
+ if (dbcollversion)
+ new_record[Anum_pg_database_datcollversion - 1] = CStringGetTextDatum(dbcollversion);
+ else
+ new_record_nulls[Anum_pg_database_datcollversion - 1] = true;
+
+ /*
+ * We deliberately set datacl to default (NULL), rather than copying it
+ * from the template database. Copying it would be a bad idea when the
+ * owner is not the same as the template's owner.
+ */
+ new_record_nulls[Anum_pg_database_datacl - 1] = true;
+
+ tuple = heap_form_tuple(RelationGetDescr(pg_database_rel),
+ new_record, new_record_nulls);
+
+ CatalogTupleInsert(pg_database_rel, tuple);
+
+ /*
+ * Now generate additional catalog entries associated with the new DB
+ */
+
+ /* Register owner dependency */
+ recordDependencyOnOwner(DatabaseRelationId, dboid, datdba);
+
+ /* Create pg_shdepend entries for objects within database */
+ copyTemplateDependencies(src_dboid, dboid);
+
+ /* Post creation hook for new database */
+ InvokeObjectPostCreateHook(DatabaseRelationId, dboid, 0);
+
+ /*
+ * If we're going to be reading data for the to-be-created database into
+ * shared_buffers, take a lock on it. Nobody should know that this
+ * database exists yet, but it's good to maintain the invariant that a
+ * lock an AccessExclusiveLock on the database is sufficient to drop all
+ * of its buffers without worrying about more being read later.
+ *
+ * Note that we need to do this before entering the
+ * PG_ENSURE_ERROR_CLEANUP block below, because createdb_failure_callback
+ * expects this lock to be held already.
+ */
+ if (dbstrategy == CREATEDB_WAL_LOG)
+ LockSharedObject(DatabaseRelationId, dboid, 0, AccessShareLock);
+
+ /*
+ * Once we start copying subdirectories, we need to be able to clean 'em
+ * up if we fail. Use an ENSURE block to make sure this happens. (This
+ * is not a 100% solution, because of the possibility of failure during
+ * transaction commit after we leave this routine, but it should handle
+ * most scenarios.)
+ */
+ fparms.src_dboid = src_dboid;
+ fparms.dest_dboid = dboid;
+ fparms.strategy = dbstrategy;
+
+ PG_ENSURE_ERROR_CLEANUP(createdb_failure_callback,
+ PointerGetDatum(&fparms));
+ {
+ /*
+ * If the user has asked to create a database with WAL_LOG strategy
+ * then call CreateDatabaseUsingWalLog, which will copy the database
+ * at the block level and it will WAL log each copied block.
+ * Otherwise, call CreateDatabaseUsingFileCopy that will copy the
+ * database file by file.
+ */
+ if (dbstrategy == CREATEDB_WAL_LOG)
+ CreateDatabaseUsingWalLog(src_dboid, dboid, src_deftablespace,
+ dst_deftablespace);
+ else
+ CreateDatabaseUsingFileCopy(src_dboid, dboid, src_deftablespace,
+ dst_deftablespace);
+
+ /*
+ * Close pg_database, but keep lock till commit.
+ */
+ table_close(pg_database_rel, NoLock);
+
+ /*
+ * Force synchronous commit, thus minimizing the window between
+ * creation of the database files and committal of the transaction. If
+ * we crash before committing, we'll have a DB that's taking up disk
+ * space but is not in pg_database, which is not good.
+ */
+ ForceSyncCommit();
+ }
+ PG_END_ENSURE_ERROR_CLEANUP(createdb_failure_callback,
+ PointerGetDatum(&fparms));
+
+ return dboid;
+}
+
+/*
+ * Check whether chosen encoding matches chosen locale settings. This
+ * restriction is necessary because libc's locale-specific code usually
+ * fails when presented with data in an encoding it's not expecting. We
+ * allow mismatch in four cases:
+ *
+ * 1. locale encoding = SQL_ASCII, which means that the locale is C/POSIX
+ * which works with any encoding.
+ *
+ * 2. locale encoding = -1, which means that we couldn't determine the
+ * locale's encoding and have to trust the user to get it right.
+ *
+ * 3. selected encoding is UTF8 and platform is win32. This is because
+ * UTF8 is a pseudo codepage that is supported in all locales since it's
+ * converted to UTF16 before being used.
+ *
+ * 4. selected encoding is SQL_ASCII, but only if you're a superuser. This
+ * is risky but we have historically allowed it --- notably, the
+ * regression tests require it.
+ *
+ * Note: if you change this policy, fix initdb to match.
+ */
+void
+check_encoding_locale_matches(int encoding, const char *collate, const char *ctype)
+{
+ int ctype_encoding = pg_get_encoding_from_locale(ctype, true);
+ int collate_encoding = pg_get_encoding_from_locale(collate, true);
+
+ if (!(ctype_encoding == encoding ||
+ ctype_encoding == PG_SQL_ASCII ||
+ ctype_encoding == -1 ||
+#ifdef WIN32
+ encoding == PG_UTF8 ||
+#endif
+ (encoding == PG_SQL_ASCII && superuser())))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("encoding \"%s\" does not match locale \"%s\"",
+ pg_encoding_to_char(encoding),
+ ctype),
+ errdetail("The chosen LC_CTYPE setting requires encoding \"%s\".",
+ pg_encoding_to_char(ctype_encoding))));
+
+ if (!(collate_encoding == encoding ||
+ collate_encoding == PG_SQL_ASCII ||
+ collate_encoding == -1 ||
+#ifdef WIN32
+ encoding == PG_UTF8 ||
+#endif
+ (encoding == PG_SQL_ASCII && superuser())))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("encoding \"%s\" does not match locale \"%s\"",
+ pg_encoding_to_char(encoding),
+ collate),
+ errdetail("The chosen LC_COLLATE setting requires encoding \"%s\".",
+ pg_encoding_to_char(collate_encoding))));
+}
+
+/* Error cleanup callback for createdb */
+static void
+createdb_failure_callback(int code, Datum arg)
+{
+ createdb_failure_params *fparms = (createdb_failure_params *) DatumGetPointer(arg);
+
+ /*
+ * If we were copying database at block levels then drop pages for the
+ * destination database that are in the shared buffer cache. And tell
+ * checkpointer to forget any pending fsync and unlink requests for files
+ * in the database. The reasoning behind doing this is same as explained
+ * in dropdb function. But unlike dropdb we don't need to call
+ * pgstat_drop_database because this database is still not created so
+ * there should not be any stat for this.
+ */
+ if (fparms->strategy == CREATEDB_WAL_LOG)
+ {
+ DropDatabaseBuffers(fparms->dest_dboid);
+ ForgetDatabaseSyncRequests(fparms->dest_dboid);
+
+ /* Release lock on the target database. */
+ UnlockSharedObject(DatabaseRelationId, fparms->dest_dboid, 0,
+ AccessShareLock);
+ }
+
+ /*
+ * Release lock on source database before doing recursive remove. This is
+ * not essential but it seems desirable to release the lock as soon as
+ * possible.
+ */
+ UnlockSharedObject(DatabaseRelationId, fparms->src_dboid, 0, ShareLock);
+
+ /* Throw away any successfully copied subdirectories */
+ remove_dbtablespaces(fparms->dest_dboid);
+}
+
+
+/*
+ * DROP DATABASE
+ */
+void
+dropdb(const char *dbname, bool missing_ok, bool force)
+{
+ Oid db_id;
+ bool db_istemplate;
+ Relation pgdbrel;
+ HeapTuple tup;
+ Form_pg_database datform;
+ int notherbackends;
+ int npreparedxacts;
+ int nslots,
+ nslots_active;
+ int nsubscriptions;
+
+ /*
+ * Look up the target database's OID, and get exclusive lock on it. We
+ * need this to ensure that no new backend starts up in the target
+ * database while we are deleting it (see postinit.c), and that no one is
+ * using it as a CREATE DATABASE template or trying to delete it for
+ * themselves.
+ */
+ pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock);
+
+ if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
+ &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
+ {
+ if (!missing_ok)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_DATABASE),
+ errmsg("database \"%s\" does not exist", dbname)));
+ }
+ else
+ {
+ /* Close pg_database, release the lock, since we changed nothing */
+ table_close(pgdbrel, RowExclusiveLock);
+ ereport(NOTICE,
+ (errmsg("database \"%s\" does not exist, skipping",
+ dbname)));
+ return;
+ }
+ }
+
+ /*
+ * Permission checks
+ */
+ if (!pg_database_ownercheck(db_id, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+ dbname);
+
+ /* DROP hook for the database being removed */
+ InvokeObjectDropHook(DatabaseRelationId, db_id, 0);
+
+ /*
+ * Disallow dropping a DB that is marked istemplate. This is just to
+ * prevent people from accidentally dropping template0 or template1; they
+ * can do so if they're really determined ...
+ */
+ if (db_istemplate)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot drop a template database")));
+
+ /* Obviously can't drop my own database */
+ if (db_id == MyDatabaseId)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ errmsg("cannot drop the currently open database")));
+
+ /*
+ * Check whether there are active logical slots that refer to the
+ * to-be-dropped database. The database lock we are holding prevents the
+ * creation of new slots using the database or existing slots becoming
+ * active.
+ */
+ (void) ReplicationSlotsCountDBSlots(db_id, &nslots, &nslots_active);
+ if (nslots_active)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ errmsg("database \"%s\" is used by an active logical replication slot",
+ dbname),
+ errdetail_plural("There is %d active slot.",
+ "There are %d active slots.",
+ nslots_active, nslots_active)));
+ }
+
+ /*
+ * Check if there are subscriptions defined in the target database.
+ *
+ * We can't drop them automatically because they might be holding
+ * resources in other databases/instances.
+ */
+ if ((nsubscriptions = CountDBSubscriptions(db_id)) > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ errmsg("database \"%s\" is being used by logical replication subscription",
+ dbname),
+ errdetail_plural("There is %d subscription.",
+ "There are %d subscriptions.",
+ nsubscriptions, nsubscriptions)));
+
+
+ /*
+ * Attempt to terminate all existing connections to the target database if
+ * the user has requested to do so.
+ */
+ if (force)
+ TerminateOtherDBBackends(db_id);
+
+ /*
+ * Check for other backends in the target database. (Because we hold the
+ * database lock, no new ones can start after this.)
+ *
+ * As in CREATE DATABASE, check this after other error conditions.
+ */
+ if (CountOtherDBBackends(db_id, &notherbackends, &npreparedxacts))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ errmsg("database \"%s\" is being accessed by other users",
+ dbname),
+ errdetail_busy_db(notherbackends, npreparedxacts)));
+
+ /*
+ * Delete any comments or security labels associated with the database.
+ */
+ DeleteSharedComments(db_id, DatabaseRelationId);
+ DeleteSharedSecurityLabel(db_id, DatabaseRelationId);
+
+ /*
+ * Remove settings associated with this database
+ */
+ DropSetting(db_id, InvalidOid);
+
+ /*
+ * Remove shared dependency references for the database.
+ */
+ dropDatabaseDependencies(db_id);
+
+ /*
+ * Tell the cumulative stats system to forget it immediately, too.
+ */
+ pgstat_drop_database(db_id);
+
+ tup = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(db_id));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for database %u", db_id);
+ datform = (Form_pg_database) GETSTRUCT(tup);
+
+ /*
+ * Except for the deletion of the catalog row, subsequent actions are not
+ * transactional (consider DropDatabaseBuffers() discarding modified
+ * buffers). But we might crash or get interrupted below. To prevent
+ * accesses to a database with invalid contents, mark the database as
+ * invalid using an in-place update.
+ *
+ * We need to flush the WAL before continuing, to guarantee the
+ * modification is durable before performing irreversible filesystem
+ * operations.
+ */
+ datform->datconnlimit = DATCONNLIMIT_INVALID_DB;
+ heap_inplace_update(pgdbrel, tup);
+ XLogFlush(XactLastRecEnd);
+
+ /*
+ * Also delete the tuple - transactionally. If this transaction commits,
+ * the row will be gone, but if we fail, dropdb() can be invoked again.
+ */
+ CatalogTupleDelete(pgdbrel, &tup->t_self);
+
+ /*
+ * Drop db-specific replication slots.
+ */
+ ReplicationSlotsDropDBSlots(db_id);
+
+ /*
+ * Drop pages for this database that are in the shared buffer cache. This
+ * is important to ensure that no remaining backend tries to write out a
+ * dirty buffer to the dead database later...
+ */
+ DropDatabaseBuffers(db_id);
+
+ /*
+ * Tell checkpointer to forget any pending fsync and unlink requests for
+ * files in the database; else the fsyncs will fail at next checkpoint, or
+ * worse, it will delete files that belong to a newly created database
+ * with the same OID.
+ */
+ ForgetDatabaseSyncRequests(db_id);
+
+ /*
+ * Force a checkpoint to make sure the checkpointer has received the
+ * message sent by ForgetDatabaseSyncRequests.
+ */
+ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+
+ /* Close all smgr fds in all backends. */
+ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
+
+ /*
+ * Remove all tablespace subdirs belonging to the database.
+ */
+ remove_dbtablespaces(db_id);
+
+ /*
+ * Close pg_database, but keep lock till commit.
+ */
+ table_close(pgdbrel, NoLock);
+
+ /*
+ * Force synchronous commit, thus minimizing the window between removal of
+ * the database files and committal of the transaction. If we crash before
+ * committing, we'll have a DB that's gone on disk but still there
+ * according to pg_database, which is not good.
+ */
+ ForceSyncCommit();
+}
+
+
+/*
+ * Rename database
+ */
+ObjectAddress
+RenameDatabase(const char *oldname, const char *newname)
+{
+ Oid db_id;
+ HeapTuple newtup;
+ Relation rel;
+ int notherbackends;
+ int npreparedxacts;
+ ObjectAddress address;
+
+ /*
+ * Look up the target database's OID, and get exclusive lock on it. We
+ * need this for the same reasons as DROP DATABASE.
+ */
+ rel = table_open(DatabaseRelationId, RowExclusiveLock);
+
+ if (!get_db_info(oldname, AccessExclusiveLock, &db_id, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_DATABASE),
+ errmsg("database \"%s\" does not exist", oldname)));
+
+ /* must be owner */
+ if (!pg_database_ownercheck(db_id, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+ oldname);
+
+ /* must have createdb rights */
+ if (!have_createdb_privilege())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to rename database")));
+
+ /*
+ * If built with appropriate switch, whine when regression-testing
+ * conventions for database names are violated.
+ */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+ if (strstr(newname, "regression") == NULL)
+ elog(WARNING, "databases created by regression test cases should have names including \"regression\"");
+#endif
+
+ /*
+ * Make sure the new name doesn't exist. See notes for same error in
+ * CREATE DATABASE.
+ */
+ if (OidIsValid(get_database_oid(newname, true)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_DATABASE),
+ errmsg("database \"%s\" already exists", newname)));
+
+ /*
+ * XXX Client applications probably store the current database somewhere,
+ * so renaming it could cause confusion. On the other hand, there may not
+ * be an actual problem besides a little confusion, so think about this
+ * and decide.
+ */
+ if (db_id == MyDatabaseId)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("current database cannot be renamed")));
+
+ /*
+ * Make sure the database does not have active sessions. This is the same
+ * concern as above, but applied to other sessions.
+ *
+ * As in CREATE DATABASE, check this after other error conditions.
+ */
+ if (CountOtherDBBackends(db_id, &notherbackends, &npreparedxacts))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ errmsg("database \"%s\" is being accessed by other users",
+ oldname),
+ errdetail_busy_db(notherbackends, npreparedxacts)));
+
+ /* rename */
+ newtup = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(db_id));
+ if (!HeapTupleIsValid(newtup))
+ elog(ERROR, "cache lookup failed for database %u", db_id);
+ namestrcpy(&(((Form_pg_database) GETSTRUCT(newtup))->datname), newname);
+ CatalogTupleUpdate(rel, &newtup->t_self, newtup);
+
+ InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0);
+
+ ObjectAddressSet(address, DatabaseRelationId, db_id);
+
+ /*
+ * Close pg_database, but keep lock till commit.
+ */
+ table_close(rel, NoLock);
+
+ return address;
+}
+
+
+/*
+ * ALTER DATABASE SET TABLESPACE
+ */
+static void
+movedb(const char *dbname, const char *tblspcname)
+{
+ Oid db_id;
+ Relation pgdbrel;
+ int notherbackends;
+ int npreparedxacts;
+ HeapTuple oldtuple,
+ newtuple;
+ Oid src_tblspcoid,
+ dst_tblspcoid;
+ Datum new_record[Natts_pg_database];
+ bool new_record_nulls[Natts_pg_database];
+ bool new_record_repl[Natts_pg_database];
+ ScanKeyData scankey;
+ SysScanDesc sysscan;
+ AclResult aclresult;
+ char *src_dbpath;
+ char *dst_dbpath;
+ DIR *dstdir;
+ struct dirent *xlde;
+ movedb_failure_params fparms;
+
+ /*
+ * Look up the target database's OID, and get exclusive lock on it. We
+ * need this to ensure that no new backend starts up in the database while
+ * we are moving it, and that no one is using it as a CREATE DATABASE
+ * template or trying to delete it.
+ */
+ pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock);
+
+ if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
+ NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL, NULL, NULL, NULL))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_DATABASE),
+ errmsg("database \"%s\" does not exist", dbname)));
+
+ /*
+ * We actually need a session lock, so that the lock will persist across
+ * the commit/restart below. (We could almost get away with letting the
+ * lock be released at commit, except that someone could try to move
+ * relations of the DB back into the old directory while we rmtree() it.)
+ */
+ LockSharedObjectForSession(DatabaseRelationId, db_id, 0,
+ AccessExclusiveLock);
+
+ /*
+ * Permission checks
+ */
+ if (!pg_database_ownercheck(db_id, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+ dbname);
+
+ /*
+ * Obviously can't move the tables of my own database
+ */
+ if (db_id == MyDatabaseId)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ errmsg("cannot change the tablespace of the currently open database")));
+
+ /*
+ * Get tablespace's oid
+ */
+ dst_tblspcoid = get_tablespace_oid(tblspcname, false);
+
+ /*
+ * Permission checks
+ */
+ aclresult = pg_tablespace_aclcheck(dst_tblspcoid, GetUserId(),
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_TABLESPACE,
+ tblspcname);
+
+ /*
+ * pg_global must never be the default tablespace
+ */
+ if (dst_tblspcoid == GLOBALTABLESPACE_OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("pg_global cannot be used as default tablespace")));
+
+ /*
+ * No-op if same tablespace
+ */
+ if (src_tblspcoid == dst_tblspcoid)
+ {
+ table_close(pgdbrel, NoLock);
+ UnlockSharedObjectForSession(DatabaseRelationId, db_id, 0,
+ AccessExclusiveLock);
+ return;
+ }
+
+ /*
+ * Check for other backends in the target database. (Because we hold the
+ * database lock, no new ones can start after this.)
+ *
+ * As in CREATE DATABASE, check this after other error conditions.
+ */
+ if (CountOtherDBBackends(db_id, &notherbackends, &npreparedxacts))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ errmsg("database \"%s\" is being accessed by other users",
+ dbname),
+ errdetail_busy_db(notherbackends, npreparedxacts)));
+
+ /*
+ * Get old and new database paths
+ */
+ src_dbpath = GetDatabasePath(db_id, src_tblspcoid);
+ dst_dbpath = GetDatabasePath(db_id, dst_tblspcoid);
+
+ /*
+ * Force a checkpoint before proceeding. This will force all dirty
+ * buffers, including those of unlogged tables, out to disk, to ensure
+ * source database is up-to-date on disk for the copy.
+ * FlushDatabaseBuffers() would suffice for that, but we also want to
+ * process any pending unlink requests. Otherwise, the check for existing
+ * files in the target directory might fail unnecessarily, not to mention
+ * that the copy might fail due to source files getting deleted under it.
+ * On Windows, this also ensures that background procs don't hold any open
+ * files, which would cause rmdir() to fail.
+ */
+ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT
+ | CHECKPOINT_FLUSH_ALL);
+
+ /* Close all smgr fds in all backends. */
+ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
+
+ /*
+ * Now drop all buffers holding data of the target database; they should
+ * no longer be dirty so DropDatabaseBuffers is safe.
+ *
+ * It might seem that we could just let these buffers age out of shared
+ * buffers naturally, since they should not get referenced anymore. The
+ * problem with that is that if the user later moves the database back to
+ * its original tablespace, any still-surviving buffers would appear to
+ * contain valid data again --- but they'd be missing any changes made in
+ * the database while it was in the new tablespace. In any case, freeing
+ * buffers that should never be used again seems worth the cycles.
+ *
+ * Note: it'd be sufficient to get rid of buffers matching db_id and
+ * src_tblspcoid, but bufmgr.c presently provides no API for that.
+ */
+ DropDatabaseBuffers(db_id);
+
+ /*
+ * Check for existence of files in the target directory, i.e., objects of
+ * this database that are already in the target tablespace. We can't
+ * allow the move in such a case, because we would need to change those
+ * relations' pg_class.reltablespace entries to zero, and we don't have
+ * access to the DB's pg_class to do so.
+ */
+ dstdir = AllocateDir(dst_dbpath);
+ if (dstdir != NULL)
+ {
+ while ((xlde = ReadDir(dstdir, dst_dbpath)) != NULL)
+ {
+ if (strcmp(xlde->d_name, ".") == 0 ||
+ strcmp(xlde->d_name, "..") == 0)
+ continue;
+
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("some relations of database \"%s\" are already in tablespace \"%s\"",
+ dbname, tblspcname),
+ errhint("You must move them back to the database's default tablespace before using this command.")));
+ }
+
+ FreeDir(dstdir);
+
+ /*
+ * The directory exists but is empty. We must remove it before using
+ * the copydir function.
+ */
+ if (rmdir(dst_dbpath) != 0)
+ elog(ERROR, "could not remove directory \"%s\": %m",
+ dst_dbpath);
+ }
+
+ /*
+ * Use an ENSURE block to make sure we remove the debris if the copy fails
+ * (eg, due to out-of-disk-space). This is not a 100% solution, because
+ * of the possibility of failure during transaction commit, but it should
+ * handle most scenarios.
+ */
+ fparms.dest_dboid = db_id;
+ fparms.dest_tsoid = dst_tblspcoid;
+ PG_ENSURE_ERROR_CLEANUP(movedb_failure_callback,
+ PointerGetDatum(&fparms));
+ {
+ /*
+ * Copy files from the old tablespace to the new one
+ */
+ copydir(src_dbpath, dst_dbpath, false);
+
+ /*
+ * Record the filesystem change in XLOG
+ */
+ {
+ xl_dbase_create_file_copy_rec xlrec;
+
+ xlrec.db_id = db_id;
+ xlrec.tablespace_id = dst_tblspcoid;
+ xlrec.src_db_id = db_id;
+ xlrec.src_tablespace_id = src_tblspcoid;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec,
+ sizeof(xl_dbase_create_file_copy_rec));
+
+ (void) XLogInsert(RM_DBASE_ID,
+ XLOG_DBASE_CREATE_FILE_COPY | XLR_SPECIAL_REL_UPDATE);
+ }
+
+ /*
+ * Update the database's pg_database tuple
+ */
+ ScanKeyInit(&scankey,
+ Anum_pg_database_datname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(dbname));
+ sysscan = systable_beginscan(pgdbrel, DatabaseNameIndexId, true,
+ NULL, 1, &scankey);
+ oldtuple = systable_getnext(sysscan);
+ if (!HeapTupleIsValid(oldtuple)) /* shouldn't happen... */
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_DATABASE),
+ errmsg("database \"%s\" does not exist", dbname)));
+
+ MemSet(new_record, 0, sizeof(new_record));
+ MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+ MemSet(new_record_repl, false, sizeof(new_record_repl));
+
+ new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_tblspcoid);
+ new_record_repl[Anum_pg_database_dattablespace - 1] = true;
+
+ newtuple = heap_modify_tuple(oldtuple, RelationGetDescr(pgdbrel),
+ new_record,
+ new_record_nulls, new_record_repl);
+ CatalogTupleUpdate(pgdbrel, &oldtuple->t_self, newtuple);
+
+ InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0);
+
+ systable_endscan(sysscan);
+
+ /*
+ * Force another checkpoint here. As in CREATE DATABASE, this is to
+ * ensure that we don't have to replay a committed
+ * XLOG_DBASE_CREATE_FILE_COPY operation, which would cause us to lose
+ * any unlogged operations done in the new DB tablespace before the
+ * next checkpoint.
+ */
+ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+
+ /*
+ * Force synchronous commit, thus minimizing the window between
+ * copying the database files and committal of the transaction. If we
+ * crash before committing, we'll leave an orphaned set of files on
+ * disk, which is not fatal but not good either.
+ */
+ ForceSyncCommit();
+
+ /*
+ * Close pg_database, but keep lock till commit.
+ */
+ table_close(pgdbrel, NoLock);
+ }
+ PG_END_ENSURE_ERROR_CLEANUP(movedb_failure_callback,
+ PointerGetDatum(&fparms));
+
+ /*
+ * Commit the transaction so that the pg_database update is committed. If
+ * we crash while removing files, the database won't be corrupt, we'll
+ * just leave some orphaned files in the old directory.
+ *
+ * (This is OK because we know we aren't inside a transaction block.)
+ *
+ * XXX would it be safe/better to do this inside the ensure block? Not
+ * convinced it's a good idea; consider elog just after the transaction
+ * really commits.
+ */
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+
+ /* Start new transaction for the remaining work; don't need a snapshot */
+ StartTransactionCommand();
+
+ /*
+ * Remove files from the old tablespace
+ */
+ if (!rmtree(src_dbpath, true))
+ ereport(WARNING,
+ (errmsg("some useless files may be left behind in old database directory \"%s\"",
+ src_dbpath)));
+
+ /*
+ * Record the filesystem change in XLOG
+ */
+ {
+ xl_dbase_drop_rec xlrec;
+
+ xlrec.db_id = db_id;
+ xlrec.ntablespaces = 1;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec));
+ XLogRegisterData((char *) &src_tblspcoid, sizeof(Oid));
+
+ (void) XLogInsert(RM_DBASE_ID,
+ XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE);
+ }
+
+ /* Now it's safe to release the database lock */
+ UnlockSharedObjectForSession(DatabaseRelationId, db_id, 0,
+ AccessExclusiveLock);
+
+ pfree(src_dbpath);
+ pfree(dst_dbpath);
+}
+
+/* Error cleanup callback for movedb */
+static void
+movedb_failure_callback(int code, Datum arg)
+{
+ movedb_failure_params *fparms = (movedb_failure_params *) DatumGetPointer(arg);
+ char *dstpath;
+
+ /* Get rid of anything we managed to copy to the target directory */
+ dstpath = GetDatabasePath(fparms->dest_dboid, fparms->dest_tsoid);
+
+ (void) rmtree(dstpath, true);
+
+ pfree(dstpath);
+}
+
+/*
+ * Process options and call dropdb function.
+ */
+void
+DropDatabase(ParseState *pstate, DropdbStmt *stmt)
+{
+ bool force = false;
+ ListCell *lc;
+
+ foreach(lc, stmt->options)
+ {
+ DefElem *opt = (DefElem *) lfirst(lc);
+
+ if (strcmp(opt->defname, "force") == 0)
+ force = true;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized DROP DATABASE option \"%s\"", opt->defname),
+ parser_errposition(pstate, opt->location)));
+ }
+
+ dropdb(stmt->dbname, stmt->missing_ok, force);
+}
+
+/*
+ * ALTER DATABASE name ...
+ */
+Oid
+AlterDatabase(ParseState *pstate, AlterDatabaseStmt *stmt, bool isTopLevel)
+{
+ Relation rel;
+ Oid dboid;
+ HeapTuple tuple,
+ newtuple;
+ Form_pg_database datform;
+ ScanKeyData scankey;
+ SysScanDesc scan;
+ ListCell *option;
+ bool dbistemplate = false;
+ bool dballowconnections = true;
+ int dbconnlimit = DATCONNLIMIT_UNLIMITED;
+ DefElem *distemplate = NULL;
+ DefElem *dallowconnections = NULL;
+ DefElem *dconnlimit = NULL;
+ DefElem *dtablespace = NULL;
+ Datum new_record[Natts_pg_database];
+ bool new_record_nulls[Natts_pg_database];
+ bool new_record_repl[Natts_pg_database];
+
+ /* Extract options from the statement node tree */
+ foreach(option, stmt->options)
+ {
+ DefElem *defel = (DefElem *) lfirst(option);
+
+ if (strcmp(defel->defname, "is_template") == 0)
+ {
+ if (distemplate)
+ errorConflictingDefElem(defel, pstate);
+ distemplate = defel;
+ }
+ else if (strcmp(defel->defname, "allow_connections") == 0)
+ {
+ if (dallowconnections)
+ errorConflictingDefElem(defel, pstate);
+ dallowconnections = defel;
+ }
+ else if (strcmp(defel->defname, "connection_limit") == 0)
+ {
+ if (dconnlimit)
+ errorConflictingDefElem(defel, pstate);
+ dconnlimit = defel;
+ }
+ else if (strcmp(defel->defname, "tablespace") == 0)
+ {
+ if (dtablespace)
+ errorConflictingDefElem(defel, pstate);
+ dtablespace = defel;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("option \"%s\" not recognized", defel->defname),
+ parser_errposition(pstate, defel->location)));
+ }
+
+ if (dtablespace)
+ {
+ /*
+ * While the SET TABLESPACE syntax doesn't allow any other options,
+ * somebody could write "WITH TABLESPACE ...". Forbid any other
+ * options from being specified in that case.
+ */
+ if (list_length(stmt->options) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("option \"%s\" cannot be specified with other options",
+ dtablespace->defname),
+ parser_errposition(pstate, dtablespace->location)));
+ /* this case isn't allowed within a transaction block */
+ PreventInTransactionBlock(isTopLevel, "ALTER DATABASE SET TABLESPACE");
+ movedb(stmt->dbname, defGetString(dtablespace));
+ return InvalidOid;
+ }
+
+ if (distemplate && distemplate->arg)
+ dbistemplate = defGetBoolean(distemplate);
+ if (dallowconnections && dallowconnections->arg)
+ dballowconnections = defGetBoolean(dallowconnections);
+ if (dconnlimit && dconnlimit->arg)
+ {
+ dbconnlimit = defGetInt32(dconnlimit);
+ if (dbconnlimit < DATCONNLIMIT_UNLIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid connection limit: %d", dbconnlimit)));
+ }
+
+ /*
+ * Get the old tuple. We don't need a lock on the database per se,
+ * because we're not going to do anything that would mess up incoming
+ * connections.
+ */
+ rel = table_open(DatabaseRelationId, RowExclusiveLock);
+ ScanKeyInit(&scankey,
+ Anum_pg_database_datname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(stmt->dbname));
+ scan = systable_beginscan(rel, DatabaseNameIndexId, true,
+ NULL, 1, &scankey);
+ tuple = systable_getnext(scan);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_DATABASE),
+ errmsg("database \"%s\" does not exist", stmt->dbname)));
+
+ datform = (Form_pg_database) GETSTRUCT(tuple);
+ dboid = datform->oid;
+
+ if (database_is_invalid_form(datform))
+ {
+ ereport(FATAL,
+ errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot alter invalid database \"%s\"", stmt->dbname),
+ errhint("Use DROP DATABASE to drop invalid databases."));
+ }
+
+ if (!pg_database_ownercheck(dboid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+ stmt->dbname);
+
+ /*
+ * In order to avoid getting locked out and having to go through
+ * standalone mode, we refuse to disallow connections to the database
+ * we're currently connected to. Lockout can still happen with concurrent
+ * sessions but the likeliness of that is not high enough to worry about.
+ */
+ if (!dballowconnections && dboid == MyDatabaseId)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot disallow connections for current database")));
+
+ /*
+ * Build an updated tuple, perusing the information just obtained
+ */
+ MemSet(new_record, 0, sizeof(new_record));
+ MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+ MemSet(new_record_repl, false, sizeof(new_record_repl));
+
+ if (distemplate)
+ {
+ new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate);
+ new_record_repl[Anum_pg_database_datistemplate - 1] = true;
+ }
+ if (dallowconnections)
+ {
+ new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(dballowconnections);
+ new_record_repl[Anum_pg_database_datallowconn - 1] = true;
+ }
+ if (dconnlimit)
+ {
+ new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit);
+ new_record_repl[Anum_pg_database_datconnlimit - 1] = true;
+ }
+
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(rel), new_record,
+ new_record_nulls, new_record_repl);
+ CatalogTupleUpdate(rel, &tuple->t_self, newtuple);
+
+ InvokeObjectPostAlterHook(DatabaseRelationId, dboid, 0);
+
+ systable_endscan(scan);
+
+ /* Close pg_database, but keep lock till commit */
+ table_close(rel, NoLock);
+
+ return dboid;
+}
+
+
+/*
+ * ALTER DATABASE name REFRESH COLLATION VERSION
+ */
+ObjectAddress
+AlterDatabaseRefreshColl(AlterDatabaseRefreshCollStmt *stmt)
+{
+ Relation rel;
+ ScanKeyData scankey;
+ SysScanDesc scan;
+ Oid db_id;
+ HeapTuple tuple;
+ Form_pg_database datForm;
+ ObjectAddress address;
+ Datum datum;
+ bool isnull;
+ char *oldversion;
+ char *newversion;
+
+ rel = table_open(DatabaseRelationId, RowExclusiveLock);
+ ScanKeyInit(&scankey,
+ Anum_pg_database_datname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(stmt->dbname));
+ scan = systable_beginscan(rel, DatabaseNameIndexId, true,
+ NULL, 1, &scankey);
+ tuple = systable_getnext(scan);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_DATABASE),
+ errmsg("database \"%s\" does not exist", stmt->dbname)));
+
+ datForm = (Form_pg_database) GETSTRUCT(tuple);
+ db_id = datForm->oid;
+
+ if (!pg_database_ownercheck(db_id, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+ stmt->dbname);
+
+ datum = heap_getattr(tuple, Anum_pg_database_datcollversion, RelationGetDescr(rel), &isnull);
+ oldversion = isnull ? NULL : TextDatumGetCString(datum);
+
+ datum = heap_getattr(tuple, datForm->datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_daticulocale : Anum_pg_database_datcollate, RelationGetDescr(rel), &isnull);
+ if (isnull)
+ elog(ERROR, "unexpected null in pg_database");
+ newversion = get_collation_actual_version(datForm->datlocprovider, TextDatumGetCString(datum));
+
+ /* cannot change from NULL to non-NULL or vice versa */
+ if ((!oldversion && newversion) || (oldversion && !newversion))
+ elog(ERROR, "invalid collation version change");
+ else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
+ {
+ bool nulls[Natts_pg_database] = {0};
+ bool replaces[Natts_pg_database] = {0};
+ Datum values[Natts_pg_database] = {0};
+
+ ereport(NOTICE,
+ (errmsg("changing version from %s to %s",
+ oldversion, newversion)));
+
+ values[Anum_pg_database_datcollversion - 1] = CStringGetTextDatum(newversion);
+ replaces[Anum_pg_database_datcollversion - 1] = true;
+
+ tuple = heap_modify_tuple(tuple, RelationGetDescr(rel),
+ values, nulls, replaces);
+ CatalogTupleUpdate(rel, &tuple->t_self, tuple);
+ heap_freetuple(tuple);
+ }
+ else
+ ereport(NOTICE,
+ (errmsg("version has not changed")));
+
+ InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0);
+
+ ObjectAddressSet(address, DatabaseRelationId, db_id);
+
+ systable_endscan(scan);
+
+ table_close(rel, NoLock);
+
+ return address;
+}
+
+
+/*
+ * ALTER DATABASE name SET ...
+ */
+Oid
+AlterDatabaseSet(AlterDatabaseSetStmt *stmt)
+{
+ Oid datid = get_database_oid(stmt->dbname, false);
+
+ /*
+ * Obtain a lock on the database and make sure it didn't go away in the
+ * meantime.
+ */
+ shdepLockAndCheckObject(DatabaseRelationId, datid);
+
+ if (!pg_database_ownercheck(datid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+ stmt->dbname);
+
+ AlterSetting(datid, InvalidOid, stmt->setstmt);
+
+ UnlockSharedObject(DatabaseRelationId, datid, 0, AccessShareLock);
+
+ return datid;
+}
+
+
+/*
+ * ALTER DATABASE name OWNER TO newowner
+ */
+ObjectAddress
+AlterDatabaseOwner(const char *dbname, Oid newOwnerId)
+{
+ Oid db_id;
+ HeapTuple tuple;
+ Relation rel;
+ ScanKeyData scankey;
+ SysScanDesc scan;
+ Form_pg_database datForm;
+ ObjectAddress address;
+
+ /*
+ * Get the old tuple. We don't need a lock on the database per se,
+ * because we're not going to do anything that would mess up incoming
+ * connections.
+ */
+ rel = table_open(DatabaseRelationId, RowExclusiveLock);
+ ScanKeyInit(&scankey,
+ Anum_pg_database_datname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(dbname));
+ scan = systable_beginscan(rel, DatabaseNameIndexId, true,
+ NULL, 1, &scankey);
+ tuple = systable_getnext(scan);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_DATABASE),
+ errmsg("database \"%s\" does not exist", dbname)));
+
+ datForm = (Form_pg_database) GETSTRUCT(tuple);
+ db_id = datForm->oid;
+
+ /*
+ * If the new owner is the same as the existing owner, consider the
+ * command to have succeeded. This is to be consistent with other
+ * objects.
+ */
+ if (datForm->datdba != newOwnerId)
+ {
+ Datum repl_val[Natts_pg_database];
+ bool repl_null[Natts_pg_database];
+ bool repl_repl[Natts_pg_database];
+ Acl *newAcl;
+ Datum aclDatum;
+ bool isNull;
+ HeapTuple newtuple;
+
+ /* Otherwise, must be owner of the existing object */
+ if (!pg_database_ownercheck(db_id, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+ dbname);
+
+ /* Must be able to become new owner */
+ check_is_member_of_role(GetUserId(), newOwnerId);
+
+ /*
+ * must have createdb rights
+ *
+ * NOTE: This is different from other alter-owner checks in that the
+ * current user is checked for createdb privileges instead of the
+ * destination owner. This is consistent with the CREATE case for
+ * databases. Because superusers will always have this right, we need
+ * no special case for them.
+ */
+ if (!have_createdb_privilege())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to change owner of database")));
+
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ repl_repl[Anum_pg_database_datdba - 1] = true;
+ repl_val[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(newOwnerId);
+
+ /*
+ * Determine the modified ACL for the new owner. This is only
+ * necessary when the ACL is non-null.
+ */
+ aclDatum = heap_getattr(tuple,
+ Anum_pg_database_datacl,
+ RelationGetDescr(rel),
+ &isNull);
+ if (!isNull)
+ {
+ newAcl = aclnewowner(DatumGetAclP(aclDatum),
+ datForm->datdba, newOwnerId);
+ repl_repl[Anum_pg_database_datacl - 1] = true;
+ repl_val[Anum_pg_database_datacl - 1] = PointerGetDatum(newAcl);
+ }
+
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(rel), repl_val, repl_null, repl_repl);
+ CatalogTupleUpdate(rel, &newtuple->t_self, newtuple);
+
+ heap_freetuple(newtuple);
+
+ /* Update owner dependency reference */
+ changeDependencyOnOwner(DatabaseRelationId, db_id, newOwnerId);
+ }
+
+ InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0);
+
+ ObjectAddressSet(address, DatabaseRelationId, db_id);
+
+ systable_endscan(scan);
+
+ /* Close pg_database, but keep lock till commit */
+ table_close(rel, NoLock);
+
+ return address;
+}
+
+
+Datum
+pg_database_collation_actual_version(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ HeapTuple tp;
+ char datlocprovider;
+ Datum datum;
+ bool isnull;
+ char *version;
+
+ tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbid));
+ if (!HeapTupleIsValid(tp))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("database with OID %u does not exist", dbid)));
+
+ datlocprovider = ((Form_pg_database) GETSTRUCT(tp))->datlocprovider;
+
+ datum = SysCacheGetAttr(DATABASEOID, tp, datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_daticulocale : Anum_pg_database_datcollate, &isnull);
+ if (isnull)
+ elog(ERROR, "unexpected null in pg_database");
+ version = get_collation_actual_version(datlocprovider, TextDatumGetCString(datum));
+
+ ReleaseSysCache(tp);
+
+ if (version)
+ PG_RETURN_TEXT_P(cstring_to_text(version));
+ else
+ PG_RETURN_NULL();
+}
+
+
+/*
+ * Helper functions
+ */
+
+/*
+ * Look up info about the database named "name". If the database exists,
+ * obtain the specified lock type on it, fill in any of the remaining
+ * parameters that aren't NULL, and return true. If no such database,
+ * return false.
+ */
+static bool
+get_db_info(const char *name, LOCKMODE lockmode,
+ Oid *dbIdP, Oid *ownerIdP,
+ int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
+ TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP,
+ Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale,
+ char *dbLocProvider,
+ char **dbCollversion)
+{
+ bool result = false;
+ Relation relation;
+
+ AssertArg(name);
+
+ /* Caller may wish to grab a better lock on pg_database beforehand... */
+ relation = table_open(DatabaseRelationId, AccessShareLock);
+
+ /*
+ * Loop covers the rare case where the database is renamed before we can
+ * lock it. We try again just in case we can find a new one of the same
+ * name.
+ */
+ for (;;)
+ {
+ ScanKeyData scanKey;
+ SysScanDesc scan;
+ HeapTuple tuple;
+ Oid dbOid;
+
+ /*
+ * there's no syscache for database-indexed-by-name, so must do it the
+ * hard way
+ */
+ ScanKeyInit(&scanKey,
+ Anum_pg_database_datname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(name));
+
+ scan = systable_beginscan(relation, DatabaseNameIndexId, true,
+ NULL, 1, &scanKey);
+
+ tuple = systable_getnext(scan);
+
+ if (!HeapTupleIsValid(tuple))
+ {
+ /* definitely no database of that name */
+ systable_endscan(scan);
+ break;
+ }
+
+ dbOid = ((Form_pg_database) GETSTRUCT(tuple))->oid;
+
+ systable_endscan(scan);
+
+ /*
+ * Now that we have a database OID, we can try to lock the DB.
+ */
+ if (lockmode != NoLock)
+ LockSharedObject(DatabaseRelationId, dbOid, 0, lockmode);
+
+ /*
+ * And now, re-fetch the tuple by OID. If it's still there and still
+ * the same name, we win; else, drop the lock and loop back to try
+ * again.
+ */
+ tuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbOid));
+ if (HeapTupleIsValid(tuple))
+ {
+ Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
+
+ if (strcmp(name, NameStr(dbform->datname)) == 0)
+ {
+ Datum datum;
+ bool isnull;
+
+ /* oid of the database */
+ if (dbIdP)
+ *dbIdP = dbOid;
+ /* oid of the owner */
+ if (ownerIdP)
+ *ownerIdP = dbform->datdba;
+ /* character encoding */
+ if (encodingP)
+ *encodingP = dbform->encoding;
+ /* allowed as template? */
+ if (dbIsTemplateP)
+ *dbIsTemplateP = dbform->datistemplate;
+ /* allowing connections? */
+ if (dbAllowConnP)
+ *dbAllowConnP = dbform->datallowconn;
+ /* limit of frozen XIDs */
+ if (dbFrozenXidP)
+ *dbFrozenXidP = dbform->datfrozenxid;
+ /* minimum MultiXactId */
+ if (dbMinMultiP)
+ *dbMinMultiP = dbform->datminmxid;
+ /* default tablespace for this database */
+ if (dbTablespace)
+ *dbTablespace = dbform->dattablespace;
+ /* default locale settings for this database */
+ if (dbLocProvider)
+ *dbLocProvider = dbform->datlocprovider;
+ if (dbCollate)
+ {
+ datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datcollate, &isnull);
+ Assert(!isnull);
+ *dbCollate = TextDatumGetCString(datum);
+ }
+ if (dbCtype)
+ {
+ datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datctype, &isnull);
+ Assert(!isnull);
+ *dbCtype = TextDatumGetCString(datum);
+ }
+ if (dbIculocale)
+ {
+ datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_daticulocale, &isnull);
+ if (isnull)
+ *dbIculocale = NULL;
+ else
+ *dbIculocale = TextDatumGetCString(datum);
+ }
+ if (dbCollversion)
+ {
+ datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datcollversion, &isnull);
+ if (isnull)
+ *dbCollversion = NULL;
+ else
+ *dbCollversion = TextDatumGetCString(datum);
+ }
+ ReleaseSysCache(tuple);
+ result = true;
+ break;
+ }
+ /* can only get here if it was just renamed */
+ ReleaseSysCache(tuple);
+ }
+
+ if (lockmode != NoLock)
+ UnlockSharedObject(DatabaseRelationId, dbOid, 0, lockmode);
+ }
+
+ table_close(relation, AccessShareLock);
+
+ return result;
+}
+
+/* Check if current user has createdb privileges */
+static bool
+have_createdb_privilege(void)
+{
+ bool result = false;
+ HeapTuple utup;
+
+ /* Superusers can always do everything */
+ if (superuser())
+ return true;
+
+ utup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(GetUserId()));
+ if (HeapTupleIsValid(utup))
+ {
+ result = ((Form_pg_authid) GETSTRUCT(utup))->rolcreatedb;
+ ReleaseSysCache(utup);
+ }
+ return result;
+}
+
+/*
+ * Remove tablespace directories
+ *
+ * We don't know what tablespaces db_id is using, so iterate through all
+ * tablespaces removing <tablespace>/db_id
+ */
+static void
+remove_dbtablespaces(Oid db_id)
+{
+ Relation rel;
+ TableScanDesc scan;
+ HeapTuple tuple;
+ List *ltblspc = NIL;
+ ListCell *cell;
+ int ntblspc;
+ int i;
+ Oid *tablespace_ids;
+
+ rel = table_open(TableSpaceRelationId, AccessShareLock);
+ scan = table_beginscan_catalog(rel, 0, NULL);
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple);
+ Oid dsttablespace = spcform->oid;
+ char *dstpath;
+ struct stat st;
+
+ /* Don't mess with the global tablespace */
+ if (dsttablespace == GLOBALTABLESPACE_OID)
+ continue;
+
+ dstpath = GetDatabasePath(db_id, dsttablespace);
+
+ if (lstat(dstpath, &st) < 0 || !S_ISDIR(st.st_mode))
+ {
+ /* Assume we can ignore it */
+ pfree(dstpath);
+ continue;
+ }
+
+ if (!rmtree(dstpath, true))
+ ereport(WARNING,
+ (errmsg("some useless files may be left behind in old database directory \"%s\"",
+ dstpath)));
+
+ ltblspc = lappend_oid(ltblspc, dsttablespace);
+ pfree(dstpath);
+ }
+
+ ntblspc = list_length(ltblspc);
+ if (ntblspc == 0)
+ {
+ table_endscan(scan);
+ table_close(rel, AccessShareLock);
+ return;
+ }
+
+ tablespace_ids = (Oid *) palloc(ntblspc * sizeof(Oid));
+ i = 0;
+ foreach(cell, ltblspc)
+ tablespace_ids[i++] = lfirst_oid(cell);
+
+ /* Record the filesystem change in XLOG */
+ {
+ xl_dbase_drop_rec xlrec;
+
+ xlrec.db_id = db_id;
+ xlrec.ntablespaces = ntblspc;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, MinSizeOfDbaseDropRec);
+ XLogRegisterData((char *) tablespace_ids, ntblspc * sizeof(Oid));
+
+ (void) XLogInsert(RM_DBASE_ID,
+ XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE);
+ }
+
+ list_free(ltblspc);
+ pfree(tablespace_ids);
+
+ table_endscan(scan);
+ table_close(rel, AccessShareLock);
+}
+
+/*
+ * Check for existing files that conflict with a proposed new DB OID;
+ * return true if there are any
+ *
+ * If there were a subdirectory in any tablespace matching the proposed new
+ * OID, we'd get a create failure due to the duplicate name ... and then we'd
+ * try to remove that already-existing subdirectory during the cleanup in
+ * remove_dbtablespaces. Nuking existing files seems like a bad idea, so
+ * instead we make this extra check before settling on the OID of the new
+ * database. This exactly parallels what GetNewRelFileNode() does for table
+ * relfilenode values.
+ */
+static bool
+check_db_file_conflict(Oid db_id)
+{
+ bool result = false;
+ Relation rel;
+ TableScanDesc scan;
+ HeapTuple tuple;
+
+ rel = table_open(TableSpaceRelationId, AccessShareLock);
+ scan = table_beginscan_catalog(rel, 0, NULL);
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple);
+ Oid dsttablespace = spcform->oid;
+ char *dstpath;
+ struct stat st;
+
+ /* Don't mess with the global tablespace */
+ if (dsttablespace == GLOBALTABLESPACE_OID)
+ continue;
+
+ dstpath = GetDatabasePath(db_id, dsttablespace);
+
+ if (lstat(dstpath, &st) == 0)
+ {
+ /* Found a conflicting file (or directory, whatever) */
+ pfree(dstpath);
+ result = true;
+ break;
+ }
+
+ pfree(dstpath);
+ }
+
+ table_endscan(scan);
+ table_close(rel, AccessShareLock);
+
+ return result;
+}
+
+/*
+ * Issue a suitable errdetail message for a busy database
+ */
+static int
+errdetail_busy_db(int notherbackends, int npreparedxacts)
+{
+ if (notherbackends > 0 && npreparedxacts > 0)
+
+ /*
+ * We don't deal with singular versus plural here, since gettext
+ * doesn't support multiple plurals in one string.
+ */
+ errdetail("There are %d other session(s) and %d prepared transaction(s) using the database.",
+ notherbackends, npreparedxacts);
+ else if (notherbackends > 0)
+ errdetail_plural("There is %d other session using the database.",
+ "There are %d other sessions using the database.",
+ notherbackends,
+ notherbackends);
+ else
+ errdetail_plural("There is %d prepared transaction using the database.",
+ "There are %d prepared transactions using the database.",
+ npreparedxacts,
+ npreparedxacts);
+ return 0; /* just to keep ereport macro happy */
+}
+
+/*
+ * get_database_oid - given a database name, look up the OID
+ *
+ * If missing_ok is false, throw an error if database name not found. If
+ * true, just return InvalidOid.
+ */
+Oid
+get_database_oid(const char *dbname, bool missing_ok)
+{
+ Relation pg_database;
+ ScanKeyData entry[1];
+ SysScanDesc scan;
+ HeapTuple dbtuple;
+ Oid oid;
+
+ /*
+ * There's no syscache for pg_database indexed by name, so we must look
+ * the hard way.
+ */
+ pg_database = table_open(DatabaseRelationId, AccessShareLock);
+ ScanKeyInit(&entry[0],
+ Anum_pg_database_datname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(dbname));
+ scan = systable_beginscan(pg_database, DatabaseNameIndexId, true,
+ NULL, 1, entry);
+
+ dbtuple = systable_getnext(scan);
+
+ /* We assume that there can be at most one matching tuple */
+ if (HeapTupleIsValid(dbtuple))
+ oid = ((Form_pg_database) GETSTRUCT(dbtuple))->oid;
+ else
+ oid = InvalidOid;
+
+ systable_endscan(scan);
+ table_close(pg_database, AccessShareLock);
+
+ if (!OidIsValid(oid) && !missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_DATABASE),
+ errmsg("database \"%s\" does not exist",
+ dbname)));
+
+ return oid;
+}
+
+
+/*
+ * get_database_name - given a database OID, look up the name
+ *
+ * Returns a palloc'd string, or NULL if no such database.
+ */
+char *
+get_database_name(Oid dbid)
+{
+ HeapTuple dbtuple;
+ char *result;
+
+ dbtuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbid));
+ if (HeapTupleIsValid(dbtuple))
+ {
+ result = pstrdup(NameStr(((Form_pg_database) GETSTRUCT(dbtuple))->datname));
+ ReleaseSysCache(dbtuple);
+ }
+ else
+ result = NULL;
+
+ return result;
+}
+
+
+/*
+ * While dropping a database the pg_database row is marked invalid, but the
+ * catalog contents still exist. Connections to such a database are not
+ * allowed.
+ */
+bool
+database_is_invalid_form(Form_pg_database datform)
+{
+ return datform->datconnlimit == DATCONNLIMIT_INVALID_DB;
+}
+
+
+/*
+ * Convenience wrapper around database_is_invalid_form()
+ */
+bool
+database_is_invalid_oid(Oid dboid)
+{
+ HeapTuple dbtup;
+ Form_pg_database dbform;
+ bool invalid;
+
+ dbtup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dboid));
+ if (!HeapTupleIsValid(dbtup))
+ elog(ERROR, "cache lookup failed for database %u", dboid);
+ dbform = (Form_pg_database) GETSTRUCT(dbtup);
+
+ invalid = database_is_invalid_form(dbform);
+
+ ReleaseSysCache(dbtup);
+
+ return invalid;
+}
+
+
+/*
+ * recovery_create_dbdir()
+ *
+ * During recovery, there's a case where we validly need to recover a missing
+ * tablespace directory so that recovery can continue. This happens when
+ * recovery wants to create a database but the holding tablespace has been
+ * removed before the server stopped. Since we expect that the directory will
+ * be gone before reaching recovery consistency, and we have no knowledge about
+ * the tablespace other than its OID here, we create a real directory under
+ * pg_tblspc here instead of restoring the symlink.
+ *
+ * If only_tblspc is true, then the requested directory must be in pg_tblspc/
+ */
+static void
+recovery_create_dbdir(char *path, bool only_tblspc)
+{
+ struct stat st;
+
+ Assert(RecoveryInProgress());
+
+ if (stat(path, &st) == 0)
+ return;
+
+ if (only_tblspc && strstr(path, "pg_tblspc/") == NULL)
+ elog(PANIC, "requested to created invalid directory: %s", path);
+
+ if (reachedConsistency && !allow_in_place_tablespaces)
+ ereport(PANIC,
+ errmsg("missing directory \"%s\"", path));
+
+ elog(reachedConsistency ? WARNING : DEBUG1,
+ "creating missing directory: %s", path);
+
+ if (pg_mkdir_p(path, pg_dir_create_mode) != 0)
+ ereport(PANIC,
+ errmsg("could not create missing directory \"%s\": %m", path));
+}
+
+
+/*
+ * DATABASE resource manager's routines
+ */
+void
+dbase_redo(XLogReaderState *record)
+{
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+
+ /* Backup blocks are not used in dbase records */
+ Assert(!XLogRecHasAnyBlockRefs(record));
+
+ if (info == XLOG_DBASE_CREATE_FILE_COPY)
+ {
+ xl_dbase_create_file_copy_rec *xlrec =
+ (xl_dbase_create_file_copy_rec *) XLogRecGetData(record);
+ char *src_path;
+ char *dst_path;
+ char *parent_path;
+ struct stat st;
+
+ src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id);
+ dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);
+
+ /*
+ * Our theory for replaying a CREATE is to forcibly drop the target
+ * subdirectory if present, then re-copy the source data. This may be
+ * more work than needed, but it is simple to implement.
+ */
+ if (stat(dst_path, &st) == 0 && S_ISDIR(st.st_mode))
+ {
+ if (!rmtree(dst_path, true))
+ /* If this failed, copydir() below is going to error. */
+ ereport(WARNING,
+ (errmsg("some useless files may be left behind in old database directory \"%s\"",
+ dst_path)));
+ }
+
+ /*
+ * If the parent of the target path doesn't exist, create it now. This
+ * enables us to create the target underneath later. Note that if
+ * the database dir is not in a tablespace, the parent will always
+ * exist, so this never runs in that case.
+ */
+ parent_path = pstrdup(dst_path);
+ get_parent_directory(parent_path);
+ if (stat(parent_path, &st) < 0)
+ {
+ if (errno != ENOENT)
+ ereport(FATAL,
+ errmsg("could not stat directory \"%s\": %m",
+ dst_path));
+
+ recovery_create_dbdir(parent_path, true);
+ }
+ pfree(parent_path);
+
+ /*
+ * There's a case where the copy source directory is missing for the
+ * same reason above. Create the emtpy source directory so that
+ * copydir below doesn't fail. The directory will be dropped soon by
+ * recovery.
+ */
+ if (stat(src_path, &st) < 0 && errno == ENOENT)
+ recovery_create_dbdir(src_path, false);
+
+ /*
+ * Force dirty buffers out to disk, to ensure source database is
+ * up-to-date for the copy.
+ */
+ FlushDatabaseBuffers(xlrec->src_db_id);
+
+ /* Close all sgmr fds in all backends. */
+ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
+
+ /*
+ * Copy this subdirectory to the new location
+ *
+ * We don't need to copy subdirectories
+ */
+ copydir(src_path, dst_path, false);
+
+ pfree(src_path);
+ pfree(dst_path);
+ }
+ else if (info == XLOG_DBASE_CREATE_WAL_LOG)
+ {
+ xl_dbase_create_wal_log_rec *xlrec =
+ (xl_dbase_create_wal_log_rec *) XLogRecGetData(record);
+ char *dbpath;
+ char *parent_path;
+
+ dbpath = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);
+
+ /* create the parent directory if needed and valid */
+ parent_path = pstrdup(dbpath);
+ get_parent_directory(parent_path);
+ recovery_create_dbdir(parent_path, true);
+
+ /* Create the database directory with the version file. */
+ CreateDirAndVersionFile(dbpath, xlrec->db_id, xlrec->tablespace_id,
+ true);
+ pfree(dbpath);
+ }
+ else if (info == XLOG_DBASE_DROP)
+ {
+ xl_dbase_drop_rec *xlrec = (xl_dbase_drop_rec *) XLogRecGetData(record);
+ char *dst_path;
+ int i;
+
+ if (InHotStandby)
+ {
+ /*
+ * Lock database while we resolve conflicts to ensure that
+ * InitPostgres() cannot fully re-execute concurrently. This
+ * avoids backends re-connecting automatically to same database,
+ * which can happen in some cases.
+ *
+ * This will lock out walsenders trying to connect to db-specific
+ * slots for logical decoding too, so it's safe for us to drop
+ * slots.
+ */
+ LockSharedObjectForSession(DatabaseRelationId, xlrec->db_id, 0, AccessExclusiveLock);
+ ResolveRecoveryConflictWithDatabase(xlrec->db_id);
+ }
+
+ /* Drop any database-specific replication slots */
+ ReplicationSlotsDropDBSlots(xlrec->db_id);
+
+ /* Drop pages for this database that are in the shared buffer cache */
+ DropDatabaseBuffers(xlrec->db_id);
+
+ /* Also, clean out any fsync requests that might be pending in md.c */
+ ForgetDatabaseSyncRequests(xlrec->db_id);
+
+ /* Clean out the xlog relcache too */
+ XLogDropDatabase(xlrec->db_id);
+
+ /* Close all sgmr fds in all backends. */
+ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
+
+ for (i = 0; i < xlrec->ntablespaces; i++)
+ {
+ dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_ids[i]);
+
+ /* And remove the physical files */
+ if (!rmtree(dst_path, true))
+ ereport(WARNING,
+ (errmsg("some useless files may be left behind in old database directory \"%s\"",
+ dst_path)));
+ pfree(dst_path);
+ }
+
+ if (InHotStandby)
+ {
+ /*
+ * Release locks prior to commit. XXX There is a race condition
+ * here that may allow backends to reconnect, but the window for
+ * this is small because the gap between here and commit is mostly
+ * fairly small and it is unlikely that people will be dropping
+ * databases that we are trying to connect to anyway.
+ */
+ UnlockSharedObjectForSession(DatabaseRelationId, xlrec->db_id, 0, AccessExclusiveLock);
+ }
+ }
+ else
+ elog(PANIC, "dbase_redo: unknown op code %u", info);
+}
diff --git a/src/backend/commands/define.c b/src/backend/commands/define.c
new file mode 100644
index 0000000..1e07fa9
--- /dev/null
+++ b/src/backend/commands/define.c
@@ -0,0 +1,391 @@
+/*-------------------------------------------------------------------------
+ *
+ * define.c
+ * Support routines for various kinds of object creation.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/define.c
+ *
+ * DESCRIPTION
+ * The "DefineFoo" routines take the parse tree and pick out the
+ * appropriate arguments/flags, passing the results to the
+ * corresponding "FooDefine" routines (in src/catalog) that do
+ * the actual catalog-munging. These routines also verify permission
+ * of the user to execute the command.
+ *
+ * NOTES
+ * These things must be defined and committed in the following order:
+ * "create function":
+ * input/output, recv/send procedures
+ * "create type":
+ * type
+ * "create operator":
+ * operators
+ *
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <math.h>
+
+#include "catalog/namespace.h"
+#include "commands/defrem.h"
+#include "nodes/makefuncs.h"
+#include "parser/parse_type.h"
+#include "parser/scansup.h"
+#include "utils/builtins.h"
+
+/*
+ * Extract a string value (otherwise uninterpreted) from a DefElem.
+ */
+char *
+defGetString(DefElem *def)
+{
+ if (def->arg == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires a parameter",
+ def->defname)));
+ switch (nodeTag(def->arg))
+ {
+ case T_Integer:
+ return psprintf("%ld", (long) intVal(def->arg));
+ case T_Float:
+ return castNode(Float, def->arg)->fval;
+ case T_Boolean:
+ return boolVal(def->arg) ? "true" : "false";
+ case T_String:
+ return strVal(def->arg);
+ case T_TypeName:
+ return TypeNameToString((TypeName *) def->arg);
+ case T_List:
+ return NameListToString((List *) def->arg);
+ case T_A_Star:
+ return pstrdup("*");
+ default:
+ elog(ERROR, "unrecognized node type: %d", (int) nodeTag(def->arg));
+ }
+ return NULL; /* keep compiler quiet */
+}
+
+/*
+ * Extract a numeric value (actually double) from a DefElem.
+ */
+double
+defGetNumeric(DefElem *def)
+{
+ if (def->arg == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires a numeric value",
+ def->defname)));
+ switch (nodeTag(def->arg))
+ {
+ case T_Integer:
+ return (double) intVal(def->arg);
+ case T_Float:
+ return floatVal(def->arg);
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires a numeric value",
+ def->defname)));
+ }
+ return 0; /* keep compiler quiet */
+}
+
+/*
+ * Extract a boolean value from a DefElem.
+ */
+bool
+defGetBoolean(DefElem *def)
+{
+ /*
+ * If no parameter given, assume "true" is meant.
+ */
+ if (def->arg == NULL)
+ return true;
+
+ /*
+ * Allow 0, 1, "true", "false", "on", "off"
+ */
+ switch (nodeTag(def->arg))
+ {
+ case T_Integer:
+ switch (intVal(def->arg))
+ {
+ case 0:
+ return false;
+ case 1:
+ return true;
+ default:
+ /* otherwise, error out below */
+ break;
+ }
+ break;
+ default:
+ {
+ char *sval = defGetString(def);
+
+ /*
+ * The set of strings accepted here should match up with the
+ * grammar's opt_boolean_or_string production.
+ */
+ if (pg_strcasecmp(sval, "true") == 0)
+ return true;
+ if (pg_strcasecmp(sval, "false") == 0)
+ return false;
+ if (pg_strcasecmp(sval, "on") == 0)
+ return true;
+ if (pg_strcasecmp(sval, "off") == 0)
+ return false;
+ }
+ break;
+ }
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires a Boolean value",
+ def->defname)));
+ return false; /* keep compiler quiet */
+}
+
+/*
+ * Extract an int32 value from a DefElem.
+ */
+int32
+defGetInt32(DefElem *def)
+{
+ if (def->arg == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires an integer value",
+ def->defname)));
+ switch (nodeTag(def->arg))
+ {
+ case T_Integer:
+ return (int32) intVal(def->arg);
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires an integer value",
+ def->defname)));
+ }
+ return 0; /* keep compiler quiet */
+}
+
+/*
+ * Extract an int64 value from a DefElem.
+ */
+int64
+defGetInt64(DefElem *def)
+{
+ if (def->arg == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires a numeric value",
+ def->defname)));
+ switch (nodeTag(def->arg))
+ {
+ case T_Integer:
+ return (int64) intVal(def->arg);
+ case T_Float:
+
+ /*
+ * Values too large for int4 will be represented as Float
+ * constants by the lexer. Accept these if they are valid int8
+ * strings.
+ */
+ return DatumGetInt64(DirectFunctionCall1(int8in,
+ CStringGetDatum(castNode(Float, def->arg)->fval)));
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires a numeric value",
+ def->defname)));
+ }
+ return 0; /* keep compiler quiet */
+}
+
+/*
+ * Extract an OID value from a DefElem.
+ */
+Oid
+defGetObjectId(DefElem *def)
+{
+ if (def->arg == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires a numeric value",
+ def->defname)));
+ switch (nodeTag(def->arg))
+ {
+ case T_Integer:
+ return (Oid) intVal(def->arg);
+ case T_Float:
+
+ /*
+ * Values too large for int4 will be represented as Float
+ * constants by the lexer. Accept these if they are valid OID
+ * strings.
+ */
+ return DatumGetObjectId(DirectFunctionCall1(oidin,
+ CStringGetDatum(castNode(Float, def->arg)->fval)));
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires a numeric value",
+ def->defname)));
+ }
+ return 0; /* keep compiler quiet */
+}
+
+/*
+ * Extract a possibly-qualified name (as a List of Strings) from a DefElem.
+ */
+List *
+defGetQualifiedName(DefElem *def)
+{
+ if (def->arg == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires a parameter",
+ def->defname)));
+ switch (nodeTag(def->arg))
+ {
+ case T_TypeName:
+ return ((TypeName *) def->arg)->names;
+ case T_List:
+ return (List *) def->arg;
+ case T_String:
+ /* Allow quoted name for backwards compatibility */
+ return list_make1(def->arg);
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("argument of %s must be a name",
+ def->defname)));
+ }
+ return NIL; /* keep compiler quiet */
+}
+
+/*
+ * Extract a TypeName from a DefElem.
+ *
+ * Note: we do not accept a List arg here, because the parser will only
+ * return a bare List when the name looks like an operator name.
+ */
+TypeName *
+defGetTypeName(DefElem *def)
+{
+ if (def->arg == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires a parameter",
+ def->defname)));
+ switch (nodeTag(def->arg))
+ {
+ case T_TypeName:
+ return (TypeName *) def->arg;
+ case T_String:
+ /* Allow quoted typename for backwards compatibility */
+ return makeTypeNameFromNameList(list_make1(def->arg));
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("argument of %s must be a type name",
+ def->defname)));
+ }
+ return NULL; /* keep compiler quiet */
+}
+
+/*
+ * Extract a type length indicator (either absolute bytes, or
+ * -1 for "variable") from a DefElem.
+ */
+int
+defGetTypeLength(DefElem *def)
+{
+ if (def->arg == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires a parameter",
+ def->defname)));
+ switch (nodeTag(def->arg))
+ {
+ case T_Integer:
+ return intVal(def->arg);
+ case T_Float:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires an integer value",
+ def->defname)));
+ break;
+ case T_String:
+ if (pg_strcasecmp(strVal(def->arg), "variable") == 0)
+ return -1; /* variable length */
+ break;
+ case T_TypeName:
+ /* cope if grammar chooses to believe "variable" is a typename */
+ if (pg_strcasecmp(TypeNameToString((TypeName *) def->arg),
+ "variable") == 0)
+ return -1; /* variable length */
+ break;
+ case T_List:
+ /* must be an operator name */
+ break;
+ default:
+ elog(ERROR, "unrecognized node type: %d", (int) nodeTag(def->arg));
+ }
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid argument for %s: \"%s\"",
+ def->defname, defGetString(def))));
+ return 0; /* keep compiler quiet */
+}
+
+/*
+ * Extract a list of string values (otherwise uninterpreted) from a DefElem.
+ */
+List *
+defGetStringList(DefElem *def)
+{
+ ListCell *cell;
+
+ if (def->arg == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires a parameter",
+ def->defname)));
+ if (nodeTag(def->arg) != T_List)
+ elog(ERROR, "unrecognized node type: %d", (int) nodeTag(def->arg));
+
+ foreach(cell, (List *) def->arg)
+ {
+ Node *str = (Node *) lfirst(cell);
+
+ if (!IsA(str, String))
+ elog(ERROR, "unexpected node type in name list: %d",
+ (int) nodeTag(str));
+ }
+
+ return (List *) def->arg;
+}
+
+/*
+ * Raise an error about a conflicting DefElem.
+ */
+void
+errorConflictingDefElem(DefElem *defel, ParseState *pstate)
+{
+ ereport(ERROR,
+ errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options"),
+ parser_errposition(pstate, defel->location));
+}
diff --git a/src/backend/commands/discard.c b/src/backend/commands/discard.c
new file mode 100644
index 0000000..c583539
--- /dev/null
+++ b/src/backend/commands/discard.c
@@ -0,0 +1,78 @@
+/*-------------------------------------------------------------------------
+ *
+ * discard.c
+ * The implementation of the DISCARD command
+ *
+ * Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/discard.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/xact.h"
+#include "catalog/namespace.h"
+#include "commands/async.h"
+#include "commands/discard.h"
+#include "commands/prepare.h"
+#include "commands/sequence.h"
+#include "utils/guc.h"
+#include "utils/portal.h"
+
+static void DiscardAll(bool isTopLevel);
+
+/*
+ * DISCARD { ALL | SEQUENCES | TEMP | PLANS }
+ */
+void
+DiscardCommand(DiscardStmt *stmt, bool isTopLevel)
+{
+ switch (stmt->target)
+ {
+ case DISCARD_ALL:
+ DiscardAll(isTopLevel);
+ break;
+
+ case DISCARD_PLANS:
+ ResetPlanCache();
+ break;
+
+ case DISCARD_SEQUENCES:
+ ResetSequenceCaches();
+ break;
+
+ case DISCARD_TEMP:
+ ResetTempTableNamespace();
+ break;
+
+ default:
+ elog(ERROR, "unrecognized DISCARD target: %d", stmt->target);
+ }
+}
+
+static void
+DiscardAll(bool isTopLevel)
+{
+ /*
+ * Disallow DISCARD ALL in a transaction block. This is arguably
+ * inconsistent (we don't make a similar check in the command sequence
+ * that DISCARD ALL is equivalent to), but the idea is to catch mistakes:
+ * DISCARD ALL inside a transaction block would leave the transaction
+ * still uncommitted.
+ */
+ PreventInTransactionBlock(isTopLevel, "DISCARD ALL");
+
+ /* Closing portals might run user-defined code, so do that first. */
+ PortalHashTableDeleteAll();
+ SetPGVariable("session_authorization", NIL, false);
+ ResetAllOptions();
+ DropAllPreparedStatements();
+ Async_UnlistenAll();
+ LockReleaseAll(USER_LOCKMETHOD, true);
+ ResetPlanCache();
+ ResetTempTableNamespace();
+ ResetSequenceCaches();
+}
diff --git a/src/backend/commands/dropcmds.c b/src/backend/commands/dropcmds.c
new file mode 100644
index 0000000..c9b5732
--- /dev/null
+++ b/src/backend/commands/dropcmds.c
@@ -0,0 +1,493 @@
+/*-------------------------------------------------------------------------
+ *
+ * dropcmds.c
+ * handle various "DROP" operations
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/dropcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/dependency.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaddress.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_proc.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "parser/parse_type.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+
+static void does_not_exist_skipping(ObjectType objtype,
+ Node *object);
+static bool owningrel_does_not_exist_skipping(List *object,
+ const char **msg, char **name);
+static bool schema_does_not_exist_skipping(List *object,
+ const char **msg, char **name);
+static bool type_in_list_does_not_exist_skipping(List *typenames,
+ const char **msg, char **name);
+
+
+/*
+ * Drop one or more objects.
+ *
+ * We don't currently handle all object types here. Relations, for example,
+ * require special handling, because (for example) indexes have additional
+ * locking requirements.
+ *
+ * We look up all the objects first, and then delete them in a single
+ * performMultipleDeletions() call. This avoids unnecessary DROP RESTRICT
+ * errors if there are dependencies between them.
+ */
+void
+RemoveObjects(DropStmt *stmt)
+{
+ ObjectAddresses *objects;
+ ListCell *cell1;
+
+ objects = new_object_addresses();
+
+ foreach(cell1, stmt->objects)
+ {
+ ObjectAddress address;
+ Node *object = lfirst(cell1);
+ Relation relation = NULL;
+ Oid namespaceId;
+
+ /* Get an ObjectAddress for the object. */
+ address = get_object_address(stmt->removeType,
+ object,
+ &relation,
+ AccessExclusiveLock,
+ stmt->missing_ok);
+
+ /*
+ * Issue NOTICE if supplied object was not found. Note this is only
+ * relevant in the missing_ok case, because otherwise
+ * get_object_address would have thrown an error.
+ */
+ if (!OidIsValid(address.objectId))
+ {
+ Assert(stmt->missing_ok);
+ does_not_exist_skipping(stmt->removeType, object);
+ continue;
+ }
+
+ /*
+ * Although COMMENT ON FUNCTION, SECURITY LABEL ON FUNCTION, etc. are
+ * happy to operate on an aggregate as on any other function, we have
+ * historically not allowed this for DROP FUNCTION.
+ */
+ if (stmt->removeType == OBJECT_FUNCTION)
+ {
+ if (get_func_prokind(address.objectId) == PROKIND_AGGREGATE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is an aggregate function",
+ NameListToString(castNode(ObjectWithArgs, object)->objname)),
+ errhint("Use DROP AGGREGATE to drop aggregate functions.")));
+ }
+
+ /* Check permissions. */
+ namespaceId = get_object_namespace(&address);
+ if (!OidIsValid(namespaceId) ||
+ !pg_namespace_ownercheck(namespaceId, GetUserId()))
+ check_object_ownership(GetUserId(), stmt->removeType, address,
+ object, relation);
+
+ /*
+ * Make note if a temporary namespace has been accessed in this
+ * transaction.
+ */
+ if (OidIsValid(namespaceId) && isTempNamespace(namespaceId))
+ MyXactFlags |= XACT_FLAGS_ACCESSEDTEMPNAMESPACE;
+
+ /* Release any relcache reference count, but keep lock until commit. */
+ if (relation)
+ table_close(relation, NoLock);
+
+ add_exact_object_address(&address, objects);
+ }
+
+ /* Here we really delete them. */
+ performMultipleDeletions(objects, stmt->behavior, 0);
+
+ free_object_addresses(objects);
+}
+
+/*
+ * owningrel_does_not_exist_skipping
+ * Subroutine for RemoveObjects
+ *
+ * After determining that a specification for a rule or trigger returns that
+ * the specified object does not exist, test whether its owning relation, and
+ * its schema, exist or not; if they do, return false --- the trigger or rule
+ * itself is missing instead. If the owning relation or its schema do not
+ * exist, fill the error message format string and name, and return true.
+ */
+static bool
+owningrel_does_not_exist_skipping(List *object, const char **msg, char **name)
+{
+ List *parent_object;
+ RangeVar *parent_rel;
+
+ parent_object = list_truncate(list_copy(object),
+ list_length(object) - 1);
+
+ if (schema_does_not_exist_skipping(parent_object, msg, name))
+ return true;
+
+ parent_rel = makeRangeVarFromNameList(parent_object);
+
+ if (!OidIsValid(RangeVarGetRelid(parent_rel, NoLock, true)))
+ {
+ *msg = gettext_noop("relation \"%s\" does not exist, skipping");
+ *name = NameListToString(parent_object);
+
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * schema_does_not_exist_skipping
+ * Subroutine for RemoveObjects
+ *
+ * After determining that a specification for a schema-qualifiable object
+ * refers to an object that does not exist, test whether the specified schema
+ * exists or not. If no schema was specified, or if the schema does exist,
+ * return false -- the object itself is missing instead. If the specified
+ * schema does not exist, fill the error message format string and the
+ * specified schema name, and return true.
+ */
+static bool
+schema_does_not_exist_skipping(List *object, const char **msg, char **name)
+{
+ RangeVar *rel;
+
+ rel = makeRangeVarFromNameList(object);
+
+ if (rel->schemaname != NULL &&
+ !OidIsValid(LookupNamespaceNoError(rel->schemaname)))
+ {
+ *msg = gettext_noop("schema \"%s\" does not exist, skipping");
+ *name = rel->schemaname;
+
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * type_in_list_does_not_exist_skipping
+ * Subroutine for RemoveObjects
+ *
+ * After determining that a specification for a function, cast, aggregate or
+ * operator returns that the specified object does not exist, test whether the
+ * involved datatypes, and their schemas, exist or not; if they do, return
+ * false --- the original object itself is missing instead. If the datatypes
+ * or schemas do not exist, fill the error message format string and the
+ * missing name, and return true.
+ *
+ * First parameter is a list of TypeNames.
+ */
+static bool
+type_in_list_does_not_exist_skipping(List *typenames, const char **msg,
+ char **name)
+{
+ ListCell *l;
+
+ foreach(l, typenames)
+ {
+ TypeName *typeName = lfirst_node(TypeName, l);
+
+ if (typeName != NULL)
+ {
+ if (!OidIsValid(LookupTypeNameOid(NULL, typeName, true)))
+ {
+ /* type doesn't exist, try to find why */
+ if (schema_does_not_exist_skipping(typeName->names, msg, name))
+ return true;
+
+ *msg = gettext_noop("type \"%s\" does not exist, skipping");
+ *name = TypeNameToString(typeName);
+
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/*
+ * does_not_exist_skipping
+ * Subroutine for RemoveObjects
+ *
+ * Generate a NOTICE stating that the named object was not found, and is
+ * being skipped. This is only relevant when "IF EXISTS" is used; otherwise,
+ * get_object_address() in RemoveObjects would have thrown an ERROR.
+ */
+static void
+does_not_exist_skipping(ObjectType objtype, Node *object)
+{
+ const char *msg = NULL;
+ char *name = NULL;
+ char *args = NULL;
+
+ switch (objtype)
+ {
+ case OBJECT_ACCESS_METHOD:
+ msg = gettext_noop("access method \"%s\" does not exist, skipping");
+ name = strVal(object);
+ break;
+ case OBJECT_TYPE:
+ case OBJECT_DOMAIN:
+ {
+ TypeName *typ = castNode(TypeName, object);
+
+ if (!schema_does_not_exist_skipping(typ->names, &msg, &name))
+ {
+ msg = gettext_noop("type \"%s\" does not exist, skipping");
+ name = TypeNameToString(typ);
+ }
+ }
+ break;
+ case OBJECT_COLLATION:
+ if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+ {
+ msg = gettext_noop("collation \"%s\" does not exist, skipping");
+ name = NameListToString(castNode(List, object));
+ }
+ break;
+ case OBJECT_CONVERSION:
+ if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+ {
+ msg = gettext_noop("conversion \"%s\" does not exist, skipping");
+ name = NameListToString(castNode(List, object));
+ }
+ break;
+ case OBJECT_SCHEMA:
+ msg = gettext_noop("schema \"%s\" does not exist, skipping");
+ name = strVal(object);
+ break;
+ case OBJECT_STATISTIC_EXT:
+ if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+ {
+ msg = gettext_noop("statistics object \"%s\" does not exist, skipping");
+ name = NameListToString(castNode(List, object));
+ }
+ break;
+ case OBJECT_TSPARSER:
+ if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+ {
+ msg = gettext_noop("text search parser \"%s\" does not exist, skipping");
+ name = NameListToString(castNode(List, object));
+ }
+ break;
+ case OBJECT_TSDICTIONARY:
+ if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+ {
+ msg = gettext_noop("text search dictionary \"%s\" does not exist, skipping");
+ name = NameListToString(castNode(List, object));
+ }
+ break;
+ case OBJECT_TSTEMPLATE:
+ if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+ {
+ msg = gettext_noop("text search template \"%s\" does not exist, skipping");
+ name = NameListToString(castNode(List, object));
+ }
+ break;
+ case OBJECT_TSCONFIGURATION:
+ if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+ {
+ msg = gettext_noop("text search configuration \"%s\" does not exist, skipping");
+ name = NameListToString(castNode(List, object));
+ }
+ break;
+ case OBJECT_EXTENSION:
+ msg = gettext_noop("extension \"%s\" does not exist, skipping");
+ name = strVal(object);
+ break;
+ case OBJECT_FUNCTION:
+ {
+ ObjectWithArgs *owa = castNode(ObjectWithArgs, object);
+
+ if (!schema_does_not_exist_skipping(owa->objname, &msg, &name) &&
+ !type_in_list_does_not_exist_skipping(owa->objargs, &msg, &name))
+ {
+ msg = gettext_noop("function %s(%s) does not exist, skipping");
+ name = NameListToString(owa->objname);
+ args = TypeNameListToString(owa->objargs);
+ }
+ break;
+ }
+ case OBJECT_PROCEDURE:
+ {
+ ObjectWithArgs *owa = castNode(ObjectWithArgs, object);
+
+ if (!schema_does_not_exist_skipping(owa->objname, &msg, &name) &&
+ !type_in_list_does_not_exist_skipping(owa->objargs, &msg, &name))
+ {
+ msg = gettext_noop("procedure %s(%s) does not exist, skipping");
+ name = NameListToString(owa->objname);
+ args = TypeNameListToString(owa->objargs);
+ }
+ break;
+ }
+ case OBJECT_ROUTINE:
+ {
+ ObjectWithArgs *owa = castNode(ObjectWithArgs, object);
+
+ if (!schema_does_not_exist_skipping(owa->objname, &msg, &name) &&
+ !type_in_list_does_not_exist_skipping(owa->objargs, &msg, &name))
+ {
+ msg = gettext_noop("routine %s(%s) does not exist, skipping");
+ name = NameListToString(owa->objname);
+ args = TypeNameListToString(owa->objargs);
+ }
+ break;
+ }
+ case OBJECT_AGGREGATE:
+ {
+ ObjectWithArgs *owa = castNode(ObjectWithArgs, object);
+
+ if (!schema_does_not_exist_skipping(owa->objname, &msg, &name) &&
+ !type_in_list_does_not_exist_skipping(owa->objargs, &msg, &name))
+ {
+ msg = gettext_noop("aggregate %s(%s) does not exist, skipping");
+ name = NameListToString(owa->objname);
+ args = TypeNameListToString(owa->objargs);
+ }
+ break;
+ }
+ case OBJECT_OPERATOR:
+ {
+ ObjectWithArgs *owa = castNode(ObjectWithArgs, object);
+
+ if (!schema_does_not_exist_skipping(owa->objname, &msg, &name) &&
+ !type_in_list_does_not_exist_skipping(owa->objargs, &msg, &name))
+ {
+ msg = gettext_noop("operator %s does not exist, skipping");
+ name = NameListToString(owa->objname);
+ }
+ break;
+ }
+ case OBJECT_LANGUAGE:
+ msg = gettext_noop("language \"%s\" does not exist, skipping");
+ name = strVal(object);
+ break;
+ case OBJECT_CAST:
+ {
+ if (!type_in_list_does_not_exist_skipping(list_make1(linitial(castNode(List, object))), &msg, &name) &&
+ !type_in_list_does_not_exist_skipping(list_make1(lsecond(castNode(List, object))), &msg, &name))
+ {
+ /* XXX quote or no quote? */
+ msg = gettext_noop("cast from type %s to type %s does not exist, skipping");
+ name = TypeNameToString(linitial_node(TypeName, castNode(List, object)));
+ args = TypeNameToString(lsecond_node(TypeName, castNode(List, object)));
+ }
+ }
+ break;
+ case OBJECT_TRANSFORM:
+ if (!type_in_list_does_not_exist_skipping(list_make1(linitial(castNode(List, object))), &msg, &name))
+ {
+ msg = gettext_noop("transform for type %s language \"%s\" does not exist, skipping");
+ name = TypeNameToString(linitial_node(TypeName, castNode(List, object)));
+ args = strVal(lsecond(castNode(List, object)));
+ }
+ break;
+ case OBJECT_TRIGGER:
+ if (!owningrel_does_not_exist_skipping(castNode(List, object), &msg, &name))
+ {
+ msg = gettext_noop("trigger \"%s\" for relation \"%s\" does not exist, skipping");
+ name = strVal(llast(castNode(List, object)));
+ args = NameListToString(list_truncate(list_copy(castNode(List, object)),
+ list_length(castNode(List, object)) - 1));
+ }
+ break;
+ case OBJECT_POLICY:
+ if (!owningrel_does_not_exist_skipping(castNode(List, object), &msg, &name))
+ {
+ msg = gettext_noop("policy \"%s\" for relation \"%s\" does not exist, skipping");
+ name = strVal(llast(castNode(List, object)));
+ args = NameListToString(list_truncate(list_copy(castNode(List, object)),
+ list_length(castNode(List, object)) - 1));
+ }
+ break;
+ case OBJECT_EVENT_TRIGGER:
+ msg = gettext_noop("event trigger \"%s\" does not exist, skipping");
+ name = strVal(object);
+ break;
+ case OBJECT_RULE:
+ if (!owningrel_does_not_exist_skipping(castNode(List, object), &msg, &name))
+ {
+ msg = gettext_noop("rule \"%s\" for relation \"%s\" does not exist, skipping");
+ name = strVal(llast(castNode(List, object)));
+ args = NameListToString(list_truncate(list_copy(castNode(List, object)),
+ list_length(castNode(List, object)) - 1));
+ }
+ break;
+ case OBJECT_FDW:
+ msg = gettext_noop("foreign-data wrapper \"%s\" does not exist, skipping");
+ name = strVal(object);
+ break;
+ case OBJECT_FOREIGN_SERVER:
+ msg = gettext_noop("server \"%s\" does not exist, skipping");
+ name = strVal(object);
+ break;
+ case OBJECT_OPCLASS:
+ {
+ List *opcname = list_copy_tail(castNode(List, object), 1);
+
+ if (!schema_does_not_exist_skipping(opcname, &msg, &name))
+ {
+ msg = gettext_noop("operator class \"%s\" does not exist for access method \"%s\", skipping");
+ name = NameListToString(opcname);
+ args = strVal(linitial(castNode(List, object)));
+ }
+ }
+ break;
+ case OBJECT_OPFAMILY:
+ {
+ List *opfname = list_copy_tail(castNode(List, object), 1);
+
+ if (!schema_does_not_exist_skipping(opfname, &msg, &name))
+ {
+ msg = gettext_noop("operator family \"%s\" does not exist for access method \"%s\", skipping");
+ name = NameListToString(opfname);
+ args = strVal(linitial(castNode(List, object)));
+ }
+ }
+ break;
+ case OBJECT_PUBLICATION:
+ msg = gettext_noop("publication \"%s\" does not exist, skipping");
+ name = strVal(object);
+ break;
+ default:
+ elog(ERROR, "unrecognized object type: %d", (int) objtype);
+ break;
+ }
+
+ if (!args)
+ ereport(NOTICE, (errmsg(msg, name)));
+ else
+ ereport(NOTICE, (errmsg(msg, name, args)));
+}
diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c
new file mode 100644
index 0000000..356aac4
--- /dev/null
+++ b/src/backend/commands/event_trigger.c
@@ -0,0 +1,2182 @@
+/*-------------------------------------------------------------------------
+ *
+ * event_trigger.c
+ * PostgreSQL EVENT TRIGGER support code.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/commands/event_trigger.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_event_trigger.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_trigger.h"
+#include "catalog/pg_ts_config.h"
+#include "catalog/pg_type.h"
+#include "commands/dbcommands.h"
+#include "commands/event_trigger.h"
+#include "commands/extension.h"
+#include "commands/trigger.h"
+#include "funcapi.h"
+#include "lib/ilist.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "pgstat.h"
+#include "tcop/deparse_utility.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/evtcache.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+typedef struct EventTriggerQueryState
+{
+ /* memory context for this state's objects */
+ MemoryContext cxt;
+
+ /* sql_drop */
+ slist_head SQLDropList;
+ bool in_sql_drop;
+
+ /* table_rewrite */
+ Oid table_rewrite_oid; /* InvalidOid, or set for table_rewrite
+ * event */
+ int table_rewrite_reason; /* AT_REWRITE reason */
+
+ /* Support for command collection */
+ bool commandCollectionInhibited;
+ CollectedCommand *currentCommand;
+ List *commandList; /* list of CollectedCommand; see
+ * deparse_utility.h */
+ struct EventTriggerQueryState *previous;
+} EventTriggerQueryState;
+
+static EventTriggerQueryState *currentEventTriggerState = NULL;
+
+/* Support for dropped objects */
+typedef struct SQLDropObject
+{
+ ObjectAddress address;
+ const char *schemaname;
+ const char *objname;
+ const char *objidentity;
+ const char *objecttype;
+ List *addrnames;
+ List *addrargs;
+ bool original;
+ bool normal;
+ bool istemp;
+ slist_node next;
+} SQLDropObject;
+
+static void AlterEventTriggerOwner_internal(Relation rel,
+ HeapTuple tup,
+ Oid newOwnerId);
+static void error_duplicate_filter_variable(const char *defname);
+static Datum filter_list_to_array(List *filterlist);
+static Oid insert_event_trigger_tuple(const char *trigname, const char *eventname,
+ Oid evtOwner, Oid funcoid, List *tags);
+static void validate_ddl_tags(const char *filtervar, List *taglist);
+static void validate_table_rewrite_tags(const char *filtervar, List *taglist);
+static void EventTriggerInvoke(List *fn_oid_list, EventTriggerData *trigdata);
+static const char *stringify_grant_objtype(ObjectType objtype);
+static const char *stringify_adefprivs_objtype(ObjectType objtype);
+
+/*
+ * Create an event trigger.
+ */
+Oid
+CreateEventTrigger(CreateEventTrigStmt *stmt)
+{
+ HeapTuple tuple;
+ Oid funcoid;
+ Oid funcrettype;
+ Oid evtowner = GetUserId();
+ ListCell *lc;
+ List *tags = NULL;
+
+ /*
+ * It would be nice to allow database owners or even regular users to do
+ * this, but there are obvious privilege escalation risks which would have
+ * to somehow be plugged first.
+ */
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to create event trigger \"%s\"",
+ stmt->trigname),
+ errhint("Must be superuser to create an event trigger.")));
+
+ /* Validate event name. */
+ if (strcmp(stmt->eventname, "ddl_command_start") != 0 &&
+ strcmp(stmt->eventname, "ddl_command_end") != 0 &&
+ strcmp(stmt->eventname, "sql_drop") != 0 &&
+ strcmp(stmt->eventname, "table_rewrite") != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized event name \"%s\"",
+ stmt->eventname)));
+
+ /* Validate filter conditions. */
+ foreach(lc, stmt->whenclause)
+ {
+ DefElem *def = (DefElem *) lfirst(lc);
+
+ if (strcmp(def->defname, "tag") == 0)
+ {
+ if (tags != NULL)
+ error_duplicate_filter_variable(def->defname);
+ tags = (List *) def->arg;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized filter variable \"%s\"", def->defname)));
+ }
+
+ /* Validate tag list, if any. */
+ if ((strcmp(stmt->eventname, "ddl_command_start") == 0 ||
+ strcmp(stmt->eventname, "ddl_command_end") == 0 ||
+ strcmp(stmt->eventname, "sql_drop") == 0)
+ && tags != NULL)
+ validate_ddl_tags("tag", tags);
+ else if (strcmp(stmt->eventname, "table_rewrite") == 0
+ && tags != NULL)
+ validate_table_rewrite_tags("tag", tags);
+
+ /*
+ * Give user a nice error message if an event trigger of the same name
+ * already exists.
+ */
+ tuple = SearchSysCache1(EVENTTRIGGERNAME, CStringGetDatum(stmt->trigname));
+ if (HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("event trigger \"%s\" already exists",
+ stmt->trigname)));
+
+ /* Find and validate the trigger function. */
+ funcoid = LookupFuncName(stmt->funcname, 0, NULL, false);
+ funcrettype = get_func_rettype(funcoid);
+ if (funcrettype != EVENT_TRIGGEROID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("function %s must return type %s",
+ NameListToString(stmt->funcname), "event_trigger")));
+
+ /* Insert catalog entries. */
+ return insert_event_trigger_tuple(stmt->trigname, stmt->eventname,
+ evtowner, funcoid, tags);
+}
+
+/*
+ * Validate DDL command tags.
+ */
+static void
+validate_ddl_tags(const char *filtervar, List *taglist)
+{
+ ListCell *lc;
+
+ foreach(lc, taglist)
+ {
+ const char *tagstr = strVal(lfirst(lc));
+ CommandTag commandTag = GetCommandTagEnum(tagstr);
+
+ if (commandTag == CMDTAG_UNKNOWN)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("filter value \"%s\" not recognized for filter variable \"%s\"",
+ tagstr, filtervar)));
+ if (!command_tag_event_trigger_ok(commandTag))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ /* translator: %s represents an SQL statement name */
+ errmsg("event triggers are not supported for %s",
+ tagstr)));
+ }
+}
+
+/*
+ * Validate DDL command tags for event table_rewrite.
+ */
+static void
+validate_table_rewrite_tags(const char *filtervar, List *taglist)
+{
+ ListCell *lc;
+
+ foreach(lc, taglist)
+ {
+ const char *tagstr = strVal(lfirst(lc));
+ CommandTag commandTag = GetCommandTagEnum(tagstr);
+
+ if (!command_tag_table_rewrite_ok(commandTag))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ /* translator: %s represents an SQL statement name */
+ errmsg("event triggers are not supported for %s",
+ tagstr)));
+ }
+}
+
+/*
+ * Complain about a duplicate filter variable.
+ */
+static void
+error_duplicate_filter_variable(const char *defname)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("filter variable \"%s\" specified more than once",
+ defname)));
+}
+
+/*
+ * Insert the new pg_event_trigger row and record dependencies.
+ */
+static Oid
+insert_event_trigger_tuple(const char *trigname, const char *eventname, Oid evtOwner,
+ Oid funcoid, List *taglist)
+{
+ Relation tgrel;
+ Oid trigoid;
+ HeapTuple tuple;
+ Datum values[Natts_pg_trigger];
+ bool nulls[Natts_pg_trigger];
+ NameData evtnamedata,
+ evteventdata;
+ ObjectAddress myself,
+ referenced;
+
+ /* Open pg_event_trigger. */
+ tgrel = table_open(EventTriggerRelationId, RowExclusiveLock);
+
+ /* Build the new pg_trigger tuple. */
+ trigoid = GetNewOidWithIndex(tgrel, EventTriggerOidIndexId,
+ Anum_pg_event_trigger_oid);
+ values[Anum_pg_event_trigger_oid - 1] = ObjectIdGetDatum(trigoid);
+ memset(nulls, false, sizeof(nulls));
+ namestrcpy(&evtnamedata, trigname);
+ values[Anum_pg_event_trigger_evtname - 1] = NameGetDatum(&evtnamedata);
+ namestrcpy(&evteventdata, eventname);
+ values[Anum_pg_event_trigger_evtevent - 1] = NameGetDatum(&evteventdata);
+ values[Anum_pg_event_trigger_evtowner - 1] = ObjectIdGetDatum(evtOwner);
+ values[Anum_pg_event_trigger_evtfoid - 1] = ObjectIdGetDatum(funcoid);
+ values[Anum_pg_event_trigger_evtenabled - 1] =
+ CharGetDatum(TRIGGER_FIRES_ON_ORIGIN);
+ if (taglist == NIL)
+ nulls[Anum_pg_event_trigger_evttags - 1] = true;
+ else
+ values[Anum_pg_event_trigger_evttags - 1] =
+ filter_list_to_array(taglist);
+
+ /* Insert heap tuple. */
+ tuple = heap_form_tuple(tgrel->rd_att, values, nulls);
+ CatalogTupleInsert(tgrel, tuple);
+ heap_freetuple(tuple);
+
+ /* Depend on owner. */
+ recordDependencyOnOwner(EventTriggerRelationId, trigoid, evtOwner);
+
+ /* Depend on event trigger function. */
+ myself.classId = EventTriggerRelationId;
+ myself.objectId = trigoid;
+ myself.objectSubId = 0;
+ referenced.classId = ProcedureRelationId;
+ referenced.objectId = funcoid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+ /* Depend on extension, if any. */
+ recordDependencyOnCurrentExtension(&myself, false);
+
+ /* Post creation hook for new event trigger */
+ InvokeObjectPostCreateHook(EventTriggerRelationId, trigoid, 0);
+
+ /* Close pg_event_trigger. */
+ table_close(tgrel, RowExclusiveLock);
+
+ return trigoid;
+}
+
+/*
+ * In the parser, a clause like WHEN tag IN ('cmd1', 'cmd2') is represented
+ * by a DefElem whose value is a List of String nodes; in the catalog, we
+ * store the list of strings as a text array. This function transforms the
+ * former representation into the latter one.
+ *
+ * For cleanliness, we store command tags in the catalog as text. It's
+ * possible (although not currently anticipated) that we might have
+ * a case-sensitive filter variable in the future, in which case this would
+ * need some further adjustment.
+ */
+static Datum
+filter_list_to_array(List *filterlist)
+{
+ ListCell *lc;
+ Datum *data;
+ int i = 0,
+ l = list_length(filterlist);
+
+ data = (Datum *) palloc(l * sizeof(Datum));
+
+ foreach(lc, filterlist)
+ {
+ const char *value = strVal(lfirst(lc));
+ char *result,
+ *p;
+
+ result = pstrdup(value);
+ for (p = result; *p; p++)
+ *p = pg_ascii_toupper((unsigned char) *p);
+ data[i++] = PointerGetDatum(cstring_to_text(result));
+ pfree(result);
+ }
+
+ return PointerGetDatum(construct_array(data, l, TEXTOID,
+ -1, false, TYPALIGN_INT));
+}
+
+/*
+ * ALTER EVENT TRIGGER foo ENABLE|DISABLE|ENABLE ALWAYS|REPLICA
+ */
+Oid
+AlterEventTrigger(AlterEventTrigStmt *stmt)
+{
+ Relation tgrel;
+ HeapTuple tup;
+ Oid trigoid;
+ Form_pg_event_trigger evtForm;
+ char tgenabled = stmt->tgenabled;
+
+ tgrel = table_open(EventTriggerRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(EVENTTRIGGERNAME,
+ CStringGetDatum(stmt->trigname));
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("event trigger \"%s\" does not exist",
+ stmt->trigname)));
+
+ evtForm = (Form_pg_event_trigger) GETSTRUCT(tup);
+ trigoid = evtForm->oid;
+
+ if (!pg_event_trigger_ownercheck(trigoid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_EVENT_TRIGGER,
+ stmt->trigname);
+
+ /* tuple is a copy, so we can modify it below */
+ evtForm->evtenabled = tgenabled;
+
+ CatalogTupleUpdate(tgrel, &tup->t_self, tup);
+
+ InvokeObjectPostAlterHook(EventTriggerRelationId,
+ trigoid, 0);
+
+ /* clean up */
+ heap_freetuple(tup);
+ table_close(tgrel, RowExclusiveLock);
+
+ return trigoid;
+}
+
+/*
+ * Change event trigger's owner -- by name
+ */
+ObjectAddress
+AlterEventTriggerOwner(const char *name, Oid newOwnerId)
+{
+ Oid evtOid;
+ HeapTuple tup;
+ Form_pg_event_trigger evtForm;
+ Relation rel;
+ ObjectAddress address;
+
+ rel = table_open(EventTriggerRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(EVENTTRIGGERNAME, CStringGetDatum(name));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("event trigger \"%s\" does not exist", name)));
+
+ evtForm = (Form_pg_event_trigger) GETSTRUCT(tup);
+ evtOid = evtForm->oid;
+
+ AlterEventTriggerOwner_internal(rel, tup, newOwnerId);
+
+ ObjectAddressSet(address, EventTriggerRelationId, evtOid);
+
+ heap_freetuple(tup);
+
+ table_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * Change event trigger owner, by OID
+ */
+void
+AlterEventTriggerOwner_oid(Oid trigOid, Oid newOwnerId)
+{
+ HeapTuple tup;
+ Relation rel;
+
+ rel = table_open(EventTriggerRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(EVENTTRIGGEROID, ObjectIdGetDatum(trigOid));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("event trigger with OID %u does not exist", trigOid)));
+
+ AlterEventTriggerOwner_internal(rel, tup, newOwnerId);
+
+ heap_freetuple(tup);
+
+ table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Internal workhorse for changing an event trigger's owner
+ */
+static void
+AlterEventTriggerOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId)
+{
+ Form_pg_event_trigger form;
+
+ form = (Form_pg_event_trigger) GETSTRUCT(tup);
+
+ if (form->evtowner == newOwnerId)
+ return;
+
+ if (!pg_event_trigger_ownercheck(form->oid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_EVENT_TRIGGER,
+ NameStr(form->evtname));
+
+ /* New owner must be a superuser */
+ if (!superuser_arg(newOwnerId))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to change owner of event trigger \"%s\"",
+ NameStr(form->evtname)),
+ errhint("The owner of an event trigger must be a superuser.")));
+
+ form->evtowner = newOwnerId;
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+ /* Update owner dependency reference */
+ changeDependencyOnOwner(EventTriggerRelationId,
+ form->oid,
+ newOwnerId);
+
+ InvokeObjectPostAlterHook(EventTriggerRelationId,
+ form->oid, 0);
+}
+
+/*
+ * get_event_trigger_oid - Look up an event trigger by name to find its OID.
+ *
+ * If missing_ok is false, throw an error if trigger not found. If
+ * true, just return InvalidOid.
+ */
+Oid
+get_event_trigger_oid(const char *trigname, bool missing_ok)
+{
+ Oid oid;
+
+ oid = GetSysCacheOid1(EVENTTRIGGERNAME, Anum_pg_event_trigger_oid,
+ CStringGetDatum(trigname));
+ if (!OidIsValid(oid) && !missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("event trigger \"%s\" does not exist", trigname)));
+ return oid;
+}
+
+/*
+ * Return true when we want to fire given Event Trigger and false otherwise,
+ * filtering on the session replication role and the event trigger registered
+ * tags matching.
+ */
+static bool
+filter_event_trigger(CommandTag tag, EventTriggerCacheItem *item)
+{
+ /*
+ * Filter by session replication role, knowing that we never see disabled
+ * items down here.
+ */
+ if (SessionReplicationRole == SESSION_REPLICATION_ROLE_REPLICA)
+ {
+ if (item->enabled == TRIGGER_FIRES_ON_ORIGIN)
+ return false;
+ }
+ else
+ {
+ if (item->enabled == TRIGGER_FIRES_ON_REPLICA)
+ return false;
+ }
+
+ /* Filter by tags, if any were specified. */
+ if (!bms_is_empty(item->tagset) && !bms_is_member(tag, item->tagset))
+ return false;
+
+ /* if we reach that point, we're not filtering out this item */
+ return true;
+}
+
+/*
+ * Setup for running triggers for the given event. Return value is an OID list
+ * of functions to run; if there are any, trigdata is filled with an
+ * appropriate EventTriggerData for them to receive.
+ */
+static List *
+EventTriggerCommonSetup(Node *parsetree,
+ EventTriggerEvent event, const char *eventstr,
+ EventTriggerData *trigdata)
+{
+ CommandTag tag;
+ List *cachelist;
+ ListCell *lc;
+ List *runlist = NIL;
+
+ /*
+ * We want the list of command tags for which this procedure is actually
+ * invoked to match up exactly with the list that CREATE EVENT TRIGGER
+ * accepts. This debugging cross-check will throw an error if this
+ * function is invoked for a command tag that CREATE EVENT TRIGGER won't
+ * accept. (Unfortunately, there doesn't seem to be any simple, automated
+ * way to verify that CREATE EVENT TRIGGER doesn't accept extra stuff that
+ * never reaches this control point.)
+ *
+ * If this cross-check fails for you, you probably need to either adjust
+ * standard_ProcessUtility() not to invoke event triggers for the command
+ * type in question, or you need to adjust event_trigger_ok to accept the
+ * relevant command tag.
+ */
+#ifdef USE_ASSERT_CHECKING
+ {
+ CommandTag dbgtag;
+
+ dbgtag = CreateCommandTag(parsetree);
+ if (event == EVT_DDLCommandStart ||
+ event == EVT_DDLCommandEnd ||
+ event == EVT_SQLDrop)
+ {
+ if (!command_tag_event_trigger_ok(dbgtag))
+ elog(ERROR, "unexpected command tag \"%s\"", GetCommandTagName(dbgtag));
+ }
+ else if (event == EVT_TableRewrite)
+ {
+ if (!command_tag_table_rewrite_ok(dbgtag))
+ elog(ERROR, "unexpected command tag \"%s\"", GetCommandTagName(dbgtag));
+ }
+ }
+#endif
+
+ /* Use cache to find triggers for this event; fast exit if none. */
+ cachelist = EventCacheLookup(event);
+ if (cachelist == NIL)
+ return NIL;
+
+ /* Get the command tag. */
+ tag = CreateCommandTag(parsetree);
+
+ /*
+ * Filter list of event triggers by command tag, and copy them into our
+ * memory context. Once we start running the command triggers, or indeed
+ * once we do anything at all that touches the catalogs, an invalidation
+ * might leave cachelist pointing at garbage, so we must do this before we
+ * can do much else.
+ */
+ foreach(lc, cachelist)
+ {
+ EventTriggerCacheItem *item = lfirst(lc);
+
+ if (filter_event_trigger(tag, item))
+ {
+ /* We must plan to fire this trigger. */
+ runlist = lappend_oid(runlist, item->fnoid);
+ }
+ }
+
+ /* don't spend any more time on this if no functions to run */
+ if (runlist == NIL)
+ return NIL;
+
+ trigdata->type = T_EventTriggerData;
+ trigdata->event = eventstr;
+ trigdata->parsetree = parsetree;
+ trigdata->tag = tag;
+
+ return runlist;
+}
+
+/*
+ * Fire ddl_command_start triggers.
+ */
+void
+EventTriggerDDLCommandStart(Node *parsetree)
+{
+ List *runlist;
+ EventTriggerData trigdata;
+
+ /*
+ * Event Triggers are completely disabled in standalone mode. There are
+ * (at least) two reasons for this:
+ *
+ * 1. A sufficiently broken event trigger might not only render the
+ * database unusable, but prevent disabling itself to fix the situation.
+ * In this scenario, restarting in standalone mode provides an escape
+ * hatch.
+ *
+ * 2. BuildEventTriggerCache relies on systable_beginscan_ordered, and
+ * therefore will malfunction if pg_event_trigger's indexes are damaged.
+ * To allow recovery from a damaged index, we need some operating mode
+ * wherein event triggers are disabled. (Or we could implement
+ * heapscan-and-sort logic for that case, but having disaster recovery
+ * scenarios depend on code that's otherwise untested isn't appetizing.)
+ */
+ if (!IsUnderPostmaster)
+ return;
+
+ runlist = EventTriggerCommonSetup(parsetree,
+ EVT_DDLCommandStart,
+ "ddl_command_start",
+ &trigdata);
+ if (runlist == NIL)
+ return;
+
+ /* Run the triggers. */
+ EventTriggerInvoke(runlist, &trigdata);
+
+ /* Cleanup. */
+ list_free(runlist);
+
+ /*
+ * Make sure anything the event triggers did will be visible to the main
+ * command.
+ */
+ CommandCounterIncrement();
+}
+
+/*
+ * Fire ddl_command_end triggers.
+ */
+void
+EventTriggerDDLCommandEnd(Node *parsetree)
+{
+ List *runlist;
+ EventTriggerData trigdata;
+
+ /*
+ * See EventTriggerDDLCommandStart for a discussion about why event
+ * triggers are disabled in single user mode.
+ */
+ if (!IsUnderPostmaster)
+ return;
+
+ /*
+ * Also do nothing if our state isn't set up, which it won't be if there
+ * weren't any relevant event triggers at the start of the current DDL
+ * command. This test might therefore seem optional, but it's important
+ * because EventTriggerCommonSetup might find triggers that didn't exist
+ * at the time the command started. Although this function itself
+ * wouldn't crash, the event trigger functions would presumably call
+ * pg_event_trigger_ddl_commands which would fail. Better to do nothing
+ * until the next command.
+ */
+ if (!currentEventTriggerState)
+ return;
+
+ runlist = EventTriggerCommonSetup(parsetree,
+ EVT_DDLCommandEnd, "ddl_command_end",
+ &trigdata);
+ if (runlist == NIL)
+ return;
+
+ /*
+ * Make sure anything the main command did will be visible to the event
+ * triggers.
+ */
+ CommandCounterIncrement();
+
+ /* Run the triggers. */
+ EventTriggerInvoke(runlist, &trigdata);
+
+ /* Cleanup. */
+ list_free(runlist);
+}
+
+/*
+ * Fire sql_drop triggers.
+ */
+void
+EventTriggerSQLDrop(Node *parsetree)
+{
+ List *runlist;
+ EventTriggerData trigdata;
+
+ /*
+ * See EventTriggerDDLCommandStart for a discussion about why event
+ * triggers are disabled in single user mode.
+ */
+ if (!IsUnderPostmaster)
+ return;
+
+ /*
+ * Use current state to determine whether this event fires at all. If
+ * there are no triggers for the sql_drop event, then we don't have
+ * anything to do here. Note that dropped object collection is disabled
+ * if this is the case, so even if we were to try to run, the list would
+ * be empty.
+ */
+ if (!currentEventTriggerState ||
+ slist_is_empty(&currentEventTriggerState->SQLDropList))
+ return;
+
+ runlist = EventTriggerCommonSetup(parsetree,
+ EVT_SQLDrop, "sql_drop",
+ &trigdata);
+
+ /*
+ * Nothing to do if run list is empty. Note this typically can't happen,
+ * because if there are no sql_drop events, then objects-to-drop wouldn't
+ * have been collected in the first place and we would have quit above.
+ * But it could occur if event triggers were dropped partway through.
+ */
+ if (runlist == NIL)
+ return;
+
+ /*
+ * Make sure anything the main command did will be visible to the event
+ * triggers.
+ */
+ CommandCounterIncrement();
+
+ /*
+ * Make sure pg_event_trigger_dropped_objects only works when running
+ * these triggers. Use PG_TRY to ensure in_sql_drop is reset even when
+ * one trigger fails. (This is perhaps not necessary, as the currentState
+ * variable will be removed shortly by our caller, but it seems better to
+ * play safe.)
+ */
+ currentEventTriggerState->in_sql_drop = true;
+
+ /* Run the triggers. */
+ PG_TRY();
+ {
+ EventTriggerInvoke(runlist, &trigdata);
+ }
+ PG_FINALLY();
+ {
+ currentEventTriggerState->in_sql_drop = false;
+ }
+ PG_END_TRY();
+
+ /* Cleanup. */
+ list_free(runlist);
+}
+
+
+/*
+ * Fire table_rewrite triggers.
+ */
+void
+EventTriggerTableRewrite(Node *parsetree, Oid tableOid, int reason)
+{
+ List *runlist;
+ EventTriggerData trigdata;
+
+ /*
+ * See EventTriggerDDLCommandStart for a discussion about why event
+ * triggers are disabled in single user mode.
+ */
+ if (!IsUnderPostmaster)
+ return;
+
+ /*
+ * Also do nothing if our state isn't set up, which it won't be if there
+ * weren't any relevant event triggers at the start of the current DDL
+ * command. This test might therefore seem optional, but it's
+ * *necessary*, because EventTriggerCommonSetup might find triggers that
+ * didn't exist at the time the command started.
+ */
+ if (!currentEventTriggerState)
+ return;
+
+ runlist = EventTriggerCommonSetup(parsetree,
+ EVT_TableRewrite,
+ "table_rewrite",
+ &trigdata);
+ if (runlist == NIL)
+ return;
+
+ /*
+ * Make sure pg_event_trigger_table_rewrite_oid only works when running
+ * these triggers. Use PG_TRY to ensure table_rewrite_oid is reset even
+ * when one trigger fails. (This is perhaps not necessary, as the
+ * currentState variable will be removed shortly by our caller, but it
+ * seems better to play safe.)
+ */
+ currentEventTriggerState->table_rewrite_oid = tableOid;
+ currentEventTriggerState->table_rewrite_reason = reason;
+
+ /* Run the triggers. */
+ PG_TRY();
+ {
+ EventTriggerInvoke(runlist, &trigdata);
+ }
+ PG_FINALLY();
+ {
+ currentEventTriggerState->table_rewrite_oid = InvalidOid;
+ currentEventTriggerState->table_rewrite_reason = 0;
+ }
+ PG_END_TRY();
+
+ /* Cleanup. */
+ list_free(runlist);
+
+ /*
+ * Make sure anything the event triggers did will be visible to the main
+ * command.
+ */
+ CommandCounterIncrement();
+}
+
+/*
+ * Invoke each event trigger in a list of event triggers.
+ */
+static void
+EventTriggerInvoke(List *fn_oid_list, EventTriggerData *trigdata)
+{
+ MemoryContext context;
+ MemoryContext oldcontext;
+ ListCell *lc;
+ bool first = true;
+
+ /* Guard against stack overflow due to recursive event trigger */
+ check_stack_depth();
+
+ /*
+ * Let's evaluate event triggers in their own memory context, so that any
+ * leaks get cleaned up promptly.
+ */
+ context = AllocSetContextCreate(CurrentMemoryContext,
+ "event trigger context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(context);
+
+ /* Call each event trigger. */
+ foreach(lc, fn_oid_list)
+ {
+ LOCAL_FCINFO(fcinfo, 0);
+ Oid fnoid = lfirst_oid(lc);
+ FmgrInfo flinfo;
+ PgStat_FunctionCallUsage fcusage;
+
+ elog(DEBUG1, "EventTriggerInvoke %u", fnoid);
+
+ /*
+ * We want each event trigger to be able to see the results of the
+ * previous event trigger's action. Caller is responsible for any
+ * command-counter increment that is needed between the event trigger
+ * and anything else in the transaction.
+ */
+ if (first)
+ first = false;
+ else
+ CommandCounterIncrement();
+
+ /* Look up the function */
+ fmgr_info(fnoid, &flinfo);
+
+ /* Call the function, passing no arguments but setting a context. */
+ InitFunctionCallInfoData(*fcinfo, &flinfo, 0,
+ InvalidOid, (Node *) trigdata, NULL);
+ pgstat_init_function_usage(fcinfo, &fcusage);
+ FunctionCallInvoke(fcinfo);
+ pgstat_end_function_usage(&fcusage, true);
+
+ /* Reclaim memory. */
+ MemoryContextReset(context);
+ }
+
+ /* Restore old memory context and delete the temporary one. */
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(context);
+}
+
+/*
+ * Do event triggers support this object type?
+ */
+bool
+EventTriggerSupportsObjectType(ObjectType obtype)
+{
+ switch (obtype)
+ {
+ case OBJECT_DATABASE:
+ case OBJECT_TABLESPACE:
+ case OBJECT_ROLE:
+ case OBJECT_PARAMETER_ACL:
+ /* no support for global objects */
+ return false;
+ case OBJECT_EVENT_TRIGGER:
+ /* no support for event triggers on event triggers */
+ return false;
+ case OBJECT_ACCESS_METHOD:
+ case OBJECT_AGGREGATE:
+ case OBJECT_AMOP:
+ case OBJECT_AMPROC:
+ case OBJECT_ATTRIBUTE:
+ case OBJECT_CAST:
+ case OBJECT_COLUMN:
+ case OBJECT_COLLATION:
+ case OBJECT_CONVERSION:
+ case OBJECT_DEFACL:
+ case OBJECT_DEFAULT:
+ case OBJECT_DOMAIN:
+ case OBJECT_DOMCONSTRAINT:
+ case OBJECT_EXTENSION:
+ case OBJECT_FDW:
+ case OBJECT_FOREIGN_SERVER:
+ case OBJECT_FOREIGN_TABLE:
+ case OBJECT_FUNCTION:
+ case OBJECT_INDEX:
+ case OBJECT_LANGUAGE:
+ case OBJECT_LARGEOBJECT:
+ case OBJECT_MATVIEW:
+ case OBJECT_OPCLASS:
+ case OBJECT_OPERATOR:
+ case OBJECT_OPFAMILY:
+ case OBJECT_POLICY:
+ case OBJECT_PROCEDURE:
+ case OBJECT_PUBLICATION:
+ case OBJECT_PUBLICATION_NAMESPACE:
+ case OBJECT_PUBLICATION_REL:
+ case OBJECT_ROUTINE:
+ case OBJECT_RULE:
+ case OBJECT_SCHEMA:
+ case OBJECT_SEQUENCE:
+ case OBJECT_SUBSCRIPTION:
+ case OBJECT_STATISTIC_EXT:
+ case OBJECT_TABCONSTRAINT:
+ case OBJECT_TABLE:
+ case OBJECT_TRANSFORM:
+ case OBJECT_TRIGGER:
+ case OBJECT_TSCONFIGURATION:
+ case OBJECT_TSDICTIONARY:
+ case OBJECT_TSPARSER:
+ case OBJECT_TSTEMPLATE:
+ case OBJECT_TYPE:
+ case OBJECT_USER_MAPPING:
+ case OBJECT_VIEW:
+ return true;
+
+ /*
+ * There's intentionally no default: case here; we want the
+ * compiler to warn if a new ObjectType hasn't been handled above.
+ */
+ }
+
+ /* Shouldn't get here, but if we do, say "no support" */
+ return false;
+}
+
+/*
+ * Do event triggers support this object class?
+ */
+bool
+EventTriggerSupportsObjectClass(ObjectClass objclass)
+{
+ switch (objclass)
+ {
+ case OCLASS_DATABASE:
+ case OCLASS_TBLSPACE:
+ case OCLASS_ROLE:
+ case OCLASS_PARAMETER_ACL:
+ /* no support for global objects */
+ return false;
+ case OCLASS_EVENT_TRIGGER:
+ /* no support for event triggers on event triggers */
+ return false;
+ case OCLASS_CLASS:
+ case OCLASS_PROC:
+ case OCLASS_TYPE:
+ case OCLASS_CAST:
+ case OCLASS_COLLATION:
+ case OCLASS_CONSTRAINT:
+ case OCLASS_CONVERSION:
+ case OCLASS_DEFAULT:
+ case OCLASS_LANGUAGE:
+ case OCLASS_LARGEOBJECT:
+ case OCLASS_OPERATOR:
+ case OCLASS_OPCLASS:
+ case OCLASS_OPFAMILY:
+ case OCLASS_AM:
+ case OCLASS_AMOP:
+ case OCLASS_AMPROC:
+ case OCLASS_REWRITE:
+ case OCLASS_TRIGGER:
+ case OCLASS_SCHEMA:
+ case OCLASS_STATISTIC_EXT:
+ case OCLASS_TSPARSER:
+ case OCLASS_TSDICT:
+ case OCLASS_TSTEMPLATE:
+ case OCLASS_TSCONFIG:
+ case OCLASS_FDW:
+ case OCLASS_FOREIGN_SERVER:
+ case OCLASS_USER_MAPPING:
+ case OCLASS_DEFACL:
+ case OCLASS_EXTENSION:
+ case OCLASS_POLICY:
+ case OCLASS_PUBLICATION:
+ case OCLASS_PUBLICATION_NAMESPACE:
+ case OCLASS_PUBLICATION_REL:
+ case OCLASS_SUBSCRIPTION:
+ case OCLASS_TRANSFORM:
+ return true;
+
+ /*
+ * There's intentionally no default: case here; we want the
+ * compiler to warn if a new OCLASS hasn't been handled above.
+ */
+ }
+
+ /* Shouldn't get here, but if we do, say "no support" */
+ return false;
+}
+
+/*
+ * Prepare event trigger state for a new complete query to run, if necessary;
+ * returns whether this was done. If it was, EventTriggerEndCompleteQuery must
+ * be called when the query is done, regardless of whether it succeeds or fails
+ * -- so use of a PG_TRY block is mandatory.
+ */
+bool
+EventTriggerBeginCompleteQuery(void)
+{
+ EventTriggerQueryState *state;
+ MemoryContext cxt;
+
+ /*
+ * Currently, sql_drop, table_rewrite, ddl_command_end events are the only
+ * reason to have event trigger state at all; so if there are none, don't
+ * install one.
+ */
+ if (!trackDroppedObjectsNeeded())
+ return false;
+
+ cxt = AllocSetContextCreate(TopMemoryContext,
+ "event trigger state",
+ ALLOCSET_DEFAULT_SIZES);
+ state = MemoryContextAlloc(cxt, sizeof(EventTriggerQueryState));
+ state->cxt = cxt;
+ slist_init(&(state->SQLDropList));
+ state->in_sql_drop = false;
+ state->table_rewrite_oid = InvalidOid;
+
+ state->commandCollectionInhibited = currentEventTriggerState ?
+ currentEventTriggerState->commandCollectionInhibited : false;
+ state->currentCommand = NULL;
+ state->commandList = NIL;
+ state->previous = currentEventTriggerState;
+ currentEventTriggerState = state;
+
+ return true;
+}
+
+/*
+ * Query completed (or errored out) -- clean up local state, return to previous
+ * one.
+ *
+ * Note: it's an error to call this routine if EventTriggerBeginCompleteQuery
+ * returned false previously.
+ *
+ * Note: this might be called in the PG_CATCH block of a failing transaction,
+ * so be wary of running anything unnecessary. (In particular, it's probably
+ * unwise to try to allocate memory.)
+ */
+void
+EventTriggerEndCompleteQuery(void)
+{
+ EventTriggerQueryState *prevstate;
+
+ prevstate = currentEventTriggerState->previous;
+
+ /* this avoids the need for retail pfree of SQLDropList items: */
+ MemoryContextDelete(currentEventTriggerState->cxt);
+
+ currentEventTriggerState = prevstate;
+}
+
+/*
+ * Do we need to keep close track of objects being dropped?
+ *
+ * This is useful because there is a cost to running with them enabled.
+ */
+bool
+trackDroppedObjectsNeeded(void)
+{
+ /*
+ * true if any sql_drop, table_rewrite, ddl_command_end event trigger
+ * exists
+ */
+ return list_length(EventCacheLookup(EVT_SQLDrop)) > 0 ||
+ list_length(EventCacheLookup(EVT_TableRewrite)) > 0 ||
+ list_length(EventCacheLookup(EVT_DDLCommandEnd)) > 0;
+}
+
+/*
+ * Support for dropped objects information on event trigger functions.
+ *
+ * We keep the list of objects dropped by the current command in current
+ * state's SQLDropList (comprising SQLDropObject items). Each time a new
+ * command is to start, a clean EventTriggerQueryState is created; commands
+ * that drop objects do the dependency.c dance to drop objects, which
+ * populates the current state's SQLDropList; when the event triggers are
+ * invoked they can consume the list via pg_event_trigger_dropped_objects().
+ * When the command finishes, the EventTriggerQueryState is cleared, and
+ * the one from the previous command is restored (when no command is in
+ * execution, the current state is NULL).
+ *
+ * All this lets us support the case that an event trigger function drops
+ * objects "reentrantly".
+ */
+
+/*
+ * Register one object as being dropped by the current command.
+ */
+void
+EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool normal)
+{
+ SQLDropObject *obj;
+ MemoryContext oldcxt;
+
+ if (!currentEventTriggerState)
+ return;
+
+ Assert(EventTriggerSupportsObjectClass(getObjectClass(object)));
+
+ /* don't report temp schemas except my own */
+ if (object->classId == NamespaceRelationId &&
+ (isAnyTempNamespace(object->objectId) &&
+ !isTempNamespace(object->objectId)))
+ return;
+
+ oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+ obj = palloc0(sizeof(SQLDropObject));
+ obj->address = *object;
+ obj->original = original;
+ obj->normal = normal;
+
+ /*
+ * Obtain schema names from the object's catalog tuple, if one exists;
+ * this lets us skip objects in temp schemas. We trust that
+ * ObjectProperty contains all object classes that can be
+ * schema-qualified.
+ */
+ if (is_objectclass_supported(object->classId))
+ {
+ Relation catalog;
+ HeapTuple tuple;
+
+ catalog = table_open(obj->address.classId, AccessShareLock);
+ tuple = get_catalog_object_by_oid(catalog,
+ get_object_attnum_oid(object->classId),
+ obj->address.objectId);
+
+ if (tuple)
+ {
+ AttrNumber attnum;
+ Datum datum;
+ bool isnull;
+
+ attnum = get_object_attnum_namespace(obj->address.classId);
+ if (attnum != InvalidAttrNumber)
+ {
+ datum = heap_getattr(tuple, attnum,
+ RelationGetDescr(catalog), &isnull);
+ if (!isnull)
+ {
+ Oid namespaceId;
+
+ namespaceId = DatumGetObjectId(datum);
+ /* temp objects are only reported if they are my own */
+ if (isTempNamespace(namespaceId))
+ {
+ obj->schemaname = "pg_temp";
+ obj->istemp = true;
+ }
+ else if (isAnyTempNamespace(namespaceId))
+ {
+ pfree(obj);
+ table_close(catalog, AccessShareLock);
+ MemoryContextSwitchTo(oldcxt);
+ return;
+ }
+ else
+ {
+ obj->schemaname = get_namespace_name(namespaceId);
+ obj->istemp = false;
+ }
+ }
+ }
+
+ if (get_object_namensp_unique(obj->address.classId) &&
+ obj->address.objectSubId == 0)
+ {
+ attnum = get_object_attnum_name(obj->address.classId);
+ if (attnum != InvalidAttrNumber)
+ {
+ datum = heap_getattr(tuple, attnum,
+ RelationGetDescr(catalog), &isnull);
+ if (!isnull)
+ obj->objname = pstrdup(NameStr(*DatumGetName(datum)));
+ }
+ }
+ }
+
+ table_close(catalog, AccessShareLock);
+ }
+ else
+ {
+ if (object->classId == NamespaceRelationId &&
+ isTempNamespace(object->objectId))
+ obj->istemp = true;
+ }
+
+ /* object identity, objname and objargs */
+ obj->objidentity =
+ getObjectIdentityParts(&obj->address, &obj->addrnames, &obj->addrargs,
+ false);
+
+ /* object type */
+ obj->objecttype = getObjectTypeDescription(&obj->address, false);
+
+ slist_push_head(&(currentEventTriggerState->SQLDropList), &obj->next);
+
+ MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * pg_event_trigger_dropped_objects
+ *
+ * Make the list of dropped objects available to the user function run by the
+ * Event Trigger.
+ */
+Datum
+pg_event_trigger_dropped_objects(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ slist_iter iter;
+
+ /*
+ * Protect this function from being called out of context
+ */
+ if (!currentEventTriggerState ||
+ !currentEventTriggerState->in_sql_drop)
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_EVENT_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("%s can only be called in a sql_drop event trigger function",
+ "pg_event_trigger_dropped_objects()")));
+
+ /* Build tuplestore to hold the result rows */
+ InitMaterializedSRF(fcinfo, 0);
+
+ slist_foreach(iter, &(currentEventTriggerState->SQLDropList))
+ {
+ SQLDropObject *obj;
+ int i = 0;
+ Datum values[12];
+ bool nulls[12];
+
+ obj = slist_container(SQLDropObject, next, iter.cur);
+
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ /* classid */
+ values[i++] = ObjectIdGetDatum(obj->address.classId);
+
+ /* objid */
+ values[i++] = ObjectIdGetDatum(obj->address.objectId);
+
+ /* objsubid */
+ values[i++] = Int32GetDatum(obj->address.objectSubId);
+
+ /* original */
+ values[i++] = BoolGetDatum(obj->original);
+
+ /* normal */
+ values[i++] = BoolGetDatum(obj->normal);
+
+ /* is_temporary */
+ values[i++] = BoolGetDatum(obj->istemp);
+
+ /* object_type */
+ values[i++] = CStringGetTextDatum(obj->objecttype);
+
+ /* schema_name */
+ if (obj->schemaname)
+ values[i++] = CStringGetTextDatum(obj->schemaname);
+ else
+ nulls[i++] = true;
+
+ /* object_name */
+ if (obj->objname)
+ values[i++] = CStringGetTextDatum(obj->objname);
+ else
+ nulls[i++] = true;
+
+ /* object_identity */
+ if (obj->objidentity)
+ values[i++] = CStringGetTextDatum(obj->objidentity);
+ else
+ nulls[i++] = true;
+
+ /* address_names and address_args */
+ if (obj->addrnames)
+ {
+ values[i++] = PointerGetDatum(strlist_to_textarray(obj->addrnames));
+
+ if (obj->addrargs)
+ values[i++] = PointerGetDatum(strlist_to_textarray(obj->addrargs));
+ else
+ values[i++] = PointerGetDatum(construct_empty_array(TEXTOID));
+ }
+ else
+ {
+ nulls[i++] = true;
+ nulls[i++] = true;
+ }
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+
+ return (Datum) 0;
+}
+
+/*
+ * pg_event_trigger_table_rewrite_oid
+ *
+ * Make the Oid of the table going to be rewritten available to the user
+ * function run by the Event Trigger.
+ */
+Datum
+pg_event_trigger_table_rewrite_oid(PG_FUNCTION_ARGS)
+{
+ /*
+ * Protect this function from being called out of context
+ */
+ if (!currentEventTriggerState ||
+ currentEventTriggerState->table_rewrite_oid == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_EVENT_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("%s can only be called in a table_rewrite event trigger function",
+ "pg_event_trigger_table_rewrite_oid()")));
+
+ PG_RETURN_OID(currentEventTriggerState->table_rewrite_oid);
+}
+
+/*
+ * pg_event_trigger_table_rewrite_reason
+ *
+ * Make the rewrite reason available to the user.
+ */
+Datum
+pg_event_trigger_table_rewrite_reason(PG_FUNCTION_ARGS)
+{
+ /*
+ * Protect this function from being called out of context
+ */
+ if (!currentEventTriggerState ||
+ currentEventTriggerState->table_rewrite_reason == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_EVENT_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("%s can only be called in a table_rewrite event trigger function",
+ "pg_event_trigger_table_rewrite_reason()")));
+
+ PG_RETURN_INT32(currentEventTriggerState->table_rewrite_reason);
+}
+
+/*-------------------------------------------------------------------------
+ * Support for DDL command deparsing
+ *
+ * The routines below enable an event trigger function to obtain a list of
+ * DDL commands as they are executed. There are three main pieces to this
+ * feature:
+ *
+ * 1) Within ProcessUtilitySlow, or some sub-routine thereof, each DDL command
+ * adds a struct CollectedCommand representation of itself to the command list,
+ * using the routines below.
+ *
+ * 2) Some time after that, ddl_command_end fires and the command list is made
+ * available to the event trigger function via pg_event_trigger_ddl_commands();
+ * the complete command details are exposed as a column of type pg_ddl_command.
+ *
+ * 3) An extension can install a function capable of taking a value of type
+ * pg_ddl_command and transform it into some external, user-visible and/or
+ * -modifiable representation.
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Inhibit DDL command collection.
+ */
+void
+EventTriggerInhibitCommandCollection(void)
+{
+ if (!currentEventTriggerState)
+ return;
+
+ currentEventTriggerState->commandCollectionInhibited = true;
+}
+
+/*
+ * Re-establish DDL command collection.
+ */
+void
+EventTriggerUndoInhibitCommandCollection(void)
+{
+ if (!currentEventTriggerState)
+ return;
+
+ currentEventTriggerState->commandCollectionInhibited = false;
+}
+
+/*
+ * EventTriggerCollectSimpleCommand
+ * Save data about a simple DDL command that was just executed
+ *
+ * address identifies the object being operated on. secondaryObject is an
+ * object address that was related in some way to the executed command; its
+ * meaning is command-specific.
+ *
+ * For instance, for an ALTER obj SET SCHEMA command, objtype is the type of
+ * object being moved, objectId is its OID, and secondaryOid is the OID of the
+ * old schema. (The destination schema OID can be obtained by catalog lookup
+ * of the object.)
+ */
+void
+EventTriggerCollectSimpleCommand(ObjectAddress address,
+ ObjectAddress secondaryObject,
+ Node *parsetree)
+{
+ MemoryContext oldcxt;
+ CollectedCommand *command;
+
+ /* ignore if event trigger context not set, or collection disabled */
+ if (!currentEventTriggerState ||
+ currentEventTriggerState->commandCollectionInhibited)
+ return;
+
+ oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+ command = palloc(sizeof(CollectedCommand));
+
+ command->type = SCT_Simple;
+ command->in_extension = creating_extension;
+
+ command->d.simple.address = address;
+ command->d.simple.secondaryObject = secondaryObject;
+ command->parsetree = copyObject(parsetree);
+
+ currentEventTriggerState->commandList = lappend(currentEventTriggerState->commandList,
+ command);
+
+ MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * EventTriggerAlterTableStart
+ * Prepare to receive data on an ALTER TABLE command about to be executed
+ *
+ * Note we don't collect the command immediately; instead we keep it in
+ * currentCommand, and only when we're done processing the subcommands we will
+ * add it to the command list.
+ */
+void
+EventTriggerAlterTableStart(Node *parsetree)
+{
+ MemoryContext oldcxt;
+ CollectedCommand *command;
+
+ /* ignore if event trigger context not set, or collection disabled */
+ if (!currentEventTriggerState ||
+ currentEventTriggerState->commandCollectionInhibited)
+ return;
+
+ oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+ command = palloc(sizeof(CollectedCommand));
+
+ command->type = SCT_AlterTable;
+ command->in_extension = creating_extension;
+
+ command->d.alterTable.classId = RelationRelationId;
+ command->d.alterTable.objectId = InvalidOid;
+ command->d.alterTable.subcmds = NIL;
+ command->parsetree = copyObject(parsetree);
+
+ command->parent = currentEventTriggerState->currentCommand;
+ currentEventTriggerState->currentCommand = command;
+
+ MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * Remember the OID of the object being affected by an ALTER TABLE.
+ *
+ * This is needed because in some cases we don't know the OID until later.
+ */
+void
+EventTriggerAlterTableRelid(Oid objectId)
+{
+ if (!currentEventTriggerState ||
+ currentEventTriggerState->commandCollectionInhibited)
+ return;
+
+ currentEventTriggerState->currentCommand->d.alterTable.objectId = objectId;
+}
+
+/*
+ * EventTriggerCollectAlterTableSubcmd
+ * Save data about a single part of an ALTER TABLE.
+ *
+ * Several different commands go through this path, but apart from ALTER TABLE
+ * itself, they are all concerned with AlterTableCmd nodes that are generated
+ * internally, so that's all that this code needs to handle at the moment.
+ */
+void
+EventTriggerCollectAlterTableSubcmd(Node *subcmd, ObjectAddress address)
+{
+ MemoryContext oldcxt;
+ CollectedATSubcmd *newsub;
+
+ /* ignore if event trigger context not set, or collection disabled */
+ if (!currentEventTriggerState ||
+ currentEventTriggerState->commandCollectionInhibited)
+ return;
+
+ Assert(IsA(subcmd, AlterTableCmd));
+ Assert(currentEventTriggerState->currentCommand != NULL);
+ Assert(OidIsValid(currentEventTriggerState->currentCommand->d.alterTable.objectId));
+
+ oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+ newsub = palloc(sizeof(CollectedATSubcmd));
+ newsub->address = address;
+ newsub->parsetree = copyObject(subcmd);
+
+ currentEventTriggerState->currentCommand->d.alterTable.subcmds =
+ lappend(currentEventTriggerState->currentCommand->d.alterTable.subcmds, newsub);
+
+ MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * EventTriggerAlterTableEnd
+ * Finish up saving an ALTER TABLE command, and add it to command list.
+ *
+ * FIXME this API isn't considering the possibility that an xact/subxact is
+ * aborted partway through. Probably it's best to add an
+ * AtEOSubXact_EventTriggers() to fix this.
+ */
+void
+EventTriggerAlterTableEnd(void)
+{
+ CollectedCommand *parent;
+
+ /* ignore if event trigger context not set, or collection disabled */
+ if (!currentEventTriggerState ||
+ currentEventTriggerState->commandCollectionInhibited)
+ return;
+
+ parent = currentEventTriggerState->currentCommand->parent;
+
+ /* If no subcommands, don't collect */
+ if (list_length(currentEventTriggerState->currentCommand->d.alterTable.subcmds) != 0)
+ {
+ MemoryContext oldcxt;
+
+ oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+ currentEventTriggerState->commandList =
+ lappend(currentEventTriggerState->commandList,
+ currentEventTriggerState->currentCommand);
+
+ MemoryContextSwitchTo(oldcxt);
+ }
+ else
+ pfree(currentEventTriggerState->currentCommand);
+
+ currentEventTriggerState->currentCommand = parent;
+}
+
+/*
+ * EventTriggerCollectGrant
+ * Save data about a GRANT/REVOKE command being executed
+ *
+ * This function creates a copy of the InternalGrant, as the original might
+ * not have the right lifetime.
+ */
+void
+EventTriggerCollectGrant(InternalGrant *istmt)
+{
+ MemoryContext oldcxt;
+ CollectedCommand *command;
+ InternalGrant *icopy;
+ ListCell *cell;
+
+ /* ignore if event trigger context not set, or collection disabled */
+ if (!currentEventTriggerState ||
+ currentEventTriggerState->commandCollectionInhibited)
+ return;
+
+ oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+ /*
+ * This is tedious, but necessary.
+ */
+ icopy = palloc(sizeof(InternalGrant));
+ memcpy(icopy, istmt, sizeof(InternalGrant));
+ icopy->objects = list_copy(istmt->objects);
+ icopy->grantees = list_copy(istmt->grantees);
+ icopy->col_privs = NIL;
+ foreach(cell, istmt->col_privs)
+ icopy->col_privs = lappend(icopy->col_privs, copyObject(lfirst(cell)));
+
+ /* Now collect it, using the copied InternalGrant */
+ command = palloc(sizeof(CollectedCommand));
+ command->type = SCT_Grant;
+ command->in_extension = creating_extension;
+ command->d.grant.istmt = icopy;
+ command->parsetree = NULL;
+
+ currentEventTriggerState->commandList =
+ lappend(currentEventTriggerState->commandList, command);
+
+ MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * EventTriggerCollectAlterOpFam
+ * Save data about an ALTER OPERATOR FAMILY ADD/DROP command being
+ * executed
+ */
+void
+EventTriggerCollectAlterOpFam(AlterOpFamilyStmt *stmt, Oid opfamoid,
+ List *operators, List *procedures)
+{
+ MemoryContext oldcxt;
+ CollectedCommand *command;
+
+ /* ignore if event trigger context not set, or collection disabled */
+ if (!currentEventTriggerState ||
+ currentEventTriggerState->commandCollectionInhibited)
+ return;
+
+ oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+ command = palloc(sizeof(CollectedCommand));
+ command->type = SCT_AlterOpFamily;
+ command->in_extension = creating_extension;
+ ObjectAddressSet(command->d.opfam.address,
+ OperatorFamilyRelationId, opfamoid);
+ command->d.opfam.operators = operators;
+ command->d.opfam.procedures = procedures;
+ command->parsetree = (Node *) copyObject(stmt);
+
+ currentEventTriggerState->commandList =
+ lappend(currentEventTriggerState->commandList, command);
+
+ MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * EventTriggerCollectCreateOpClass
+ * Save data about a CREATE OPERATOR CLASS command being executed
+ */
+void
+EventTriggerCollectCreateOpClass(CreateOpClassStmt *stmt, Oid opcoid,
+ List *operators, List *procedures)
+{
+ MemoryContext oldcxt;
+ CollectedCommand *command;
+
+ /* ignore if event trigger context not set, or collection disabled */
+ if (!currentEventTriggerState ||
+ currentEventTriggerState->commandCollectionInhibited)
+ return;
+
+ oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+ command = palloc0(sizeof(CollectedCommand));
+ command->type = SCT_CreateOpClass;
+ command->in_extension = creating_extension;
+ ObjectAddressSet(command->d.createopc.address,
+ OperatorClassRelationId, opcoid);
+ command->d.createopc.operators = operators;
+ command->d.createopc.procedures = procedures;
+ command->parsetree = (Node *) copyObject(stmt);
+
+ currentEventTriggerState->commandList =
+ lappend(currentEventTriggerState->commandList, command);
+
+ MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * EventTriggerCollectAlterTSConfig
+ * Save data about an ALTER TEXT SEARCH CONFIGURATION command being
+ * executed
+ */
+void
+EventTriggerCollectAlterTSConfig(AlterTSConfigurationStmt *stmt, Oid cfgId,
+ Oid *dictIds, int ndicts)
+{
+ MemoryContext oldcxt;
+ CollectedCommand *command;
+
+ /* ignore if event trigger context not set, or collection disabled */
+ if (!currentEventTriggerState ||
+ currentEventTriggerState->commandCollectionInhibited)
+ return;
+
+ oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+ command = palloc0(sizeof(CollectedCommand));
+ command->type = SCT_AlterTSConfig;
+ command->in_extension = creating_extension;
+ ObjectAddressSet(command->d.atscfg.address,
+ TSConfigRelationId, cfgId);
+ command->d.atscfg.dictIds = palloc(sizeof(Oid) * ndicts);
+ memcpy(command->d.atscfg.dictIds, dictIds, sizeof(Oid) * ndicts);
+ command->d.atscfg.ndicts = ndicts;
+ command->parsetree = (Node *) copyObject(stmt);
+
+ currentEventTriggerState->commandList =
+ lappend(currentEventTriggerState->commandList, command);
+
+ MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * EventTriggerCollectAlterDefPrivs
+ * Save data about an ALTER DEFAULT PRIVILEGES command being
+ * executed
+ */
+void
+EventTriggerCollectAlterDefPrivs(AlterDefaultPrivilegesStmt *stmt)
+{
+ MemoryContext oldcxt;
+ CollectedCommand *command;
+
+ /* ignore if event trigger context not set, or collection disabled */
+ if (!currentEventTriggerState ||
+ currentEventTriggerState->commandCollectionInhibited)
+ return;
+
+ oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
+
+ command = palloc0(sizeof(CollectedCommand));
+ command->type = SCT_AlterDefaultPrivileges;
+ command->d.defprivs.objtype = stmt->action->objtype;
+ command->in_extension = creating_extension;
+ command->parsetree = (Node *) copyObject(stmt);
+
+ currentEventTriggerState->commandList =
+ lappend(currentEventTriggerState->commandList, command);
+ MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * In a ddl_command_end event trigger, this function reports the DDL commands
+ * being run.
+ */
+Datum
+pg_event_trigger_ddl_commands(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ ListCell *lc;
+
+ /*
+ * Protect this function from being called out of context
+ */
+ if (!currentEventTriggerState)
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_EVENT_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("%s can only be called in an event trigger function",
+ "pg_event_trigger_ddl_commands()")));
+
+ /* Build tuplestore to hold the result rows */
+ InitMaterializedSRF(fcinfo, 0);
+
+ foreach(lc, currentEventTriggerState->commandList)
+ {
+ CollectedCommand *cmd = lfirst(lc);
+ Datum values[9];
+ bool nulls[9];
+ ObjectAddress addr;
+ int i = 0;
+
+ /*
+ * For IF NOT EXISTS commands that attempt to create an existing
+ * object, the returned OID is Invalid. Don't return anything.
+ *
+ * One might think that a viable alternative would be to look up the
+ * Oid of the existing object and run the deparse with that. But
+ * since the parse tree might be different from the one that created
+ * the object in the first place, we might not end up in a consistent
+ * state anyway.
+ */
+ if (cmd->type == SCT_Simple &&
+ !OidIsValid(cmd->d.simple.address.objectId))
+ continue;
+
+ MemSet(nulls, 0, sizeof(nulls));
+
+ switch (cmd->type)
+ {
+ case SCT_Simple:
+ case SCT_AlterTable:
+ case SCT_AlterOpFamily:
+ case SCT_CreateOpClass:
+ case SCT_AlterTSConfig:
+ {
+ char *identity;
+ char *type;
+ char *schema = NULL;
+
+ if (cmd->type == SCT_Simple)
+ addr = cmd->d.simple.address;
+ else if (cmd->type == SCT_AlterTable)
+ ObjectAddressSet(addr,
+ cmd->d.alterTable.classId,
+ cmd->d.alterTable.objectId);
+ else if (cmd->type == SCT_AlterOpFamily)
+ addr = cmd->d.opfam.address;
+ else if (cmd->type == SCT_CreateOpClass)
+ addr = cmd->d.createopc.address;
+ else if (cmd->type == SCT_AlterTSConfig)
+ addr = cmd->d.atscfg.address;
+
+ /*
+ * If an object was dropped in the same command we may end
+ * up in a situation where we generated a message but can
+ * no longer look for the object information, so skip it
+ * rather than failing. This can happen for example with
+ * some subcommand combinations of ALTER TABLE.
+ */
+ identity = getObjectIdentity(&addr, true);
+ if (identity == NULL)
+ continue;
+
+ /* The type can never be NULL. */
+ type = getObjectTypeDescription(&addr, true);
+
+ /*
+ * Obtain schema name, if any ("pg_temp" if a temp
+ * object). If the object class is not in the supported
+ * list here, we assume it's a schema-less object type,
+ * and thus "schema" remains set to NULL.
+ */
+ if (is_objectclass_supported(addr.classId))
+ {
+ AttrNumber nspAttnum;
+
+ nspAttnum = get_object_attnum_namespace(addr.classId);
+ if (nspAttnum != InvalidAttrNumber)
+ {
+ Relation catalog;
+ HeapTuple objtup;
+ Oid schema_oid;
+ bool isnull;
+
+ catalog = table_open(addr.classId, AccessShareLock);
+ objtup = get_catalog_object_by_oid(catalog,
+ get_object_attnum_oid(addr.classId),
+ addr.objectId);
+ if (!HeapTupleIsValid(objtup))
+ elog(ERROR, "cache lookup failed for object %u/%u",
+ addr.classId, addr.objectId);
+ schema_oid =
+ heap_getattr(objtup, nspAttnum,
+ RelationGetDescr(catalog), &isnull);
+ if (isnull)
+ elog(ERROR,
+ "invalid null namespace in object %u/%u/%d",
+ addr.classId, addr.objectId, addr.objectSubId);
+ schema = get_namespace_name_or_temp(schema_oid);
+
+ table_close(catalog, AccessShareLock);
+ }
+ }
+
+ /* classid */
+ values[i++] = ObjectIdGetDatum(addr.classId);
+ /* objid */
+ values[i++] = ObjectIdGetDatum(addr.objectId);
+ /* objsubid */
+ values[i++] = Int32GetDatum(addr.objectSubId);
+ /* command tag */
+ values[i++] = CStringGetTextDatum(CreateCommandName(cmd->parsetree));
+ /* object_type */
+ values[i++] = CStringGetTextDatum(type);
+ /* schema */
+ if (schema == NULL)
+ nulls[i++] = true;
+ else
+ values[i++] = CStringGetTextDatum(schema);
+ /* identity */
+ values[i++] = CStringGetTextDatum(identity);
+ /* in_extension */
+ values[i++] = BoolGetDatum(cmd->in_extension);
+ /* command */
+ values[i++] = PointerGetDatum(cmd);
+ }
+ break;
+
+ case SCT_AlterDefaultPrivileges:
+ /* classid */
+ nulls[i++] = true;
+ /* objid */
+ nulls[i++] = true;
+ /* objsubid */
+ nulls[i++] = true;
+ /* command tag */
+ values[i++] = CStringGetTextDatum(CreateCommandName(cmd->parsetree));
+ /* object_type */
+ values[i++] = CStringGetTextDatum(stringify_adefprivs_objtype(cmd->d.defprivs.objtype));
+ /* schema */
+ nulls[i++] = true;
+ /* identity */
+ nulls[i++] = true;
+ /* in_extension */
+ values[i++] = BoolGetDatum(cmd->in_extension);
+ /* command */
+ values[i++] = PointerGetDatum(cmd);
+ break;
+
+ case SCT_Grant:
+ /* classid */
+ nulls[i++] = true;
+ /* objid */
+ nulls[i++] = true;
+ /* objsubid */
+ nulls[i++] = true;
+ /* command tag */
+ values[i++] = CStringGetTextDatum(cmd->d.grant.istmt->is_grant ?
+ "GRANT" : "REVOKE");
+ /* object_type */
+ values[i++] = CStringGetTextDatum(stringify_grant_objtype(cmd->d.grant.istmt->objtype));
+ /* schema */
+ nulls[i++] = true;
+ /* identity */
+ nulls[i++] = true;
+ /* in_extension */
+ values[i++] = BoolGetDatum(cmd->in_extension);
+ /* command */
+ values[i++] = PointerGetDatum(cmd);
+ break;
+ }
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Return the ObjectType as a string, as it would appear in GRANT and
+ * REVOKE commands.
+ */
+static const char *
+stringify_grant_objtype(ObjectType objtype)
+{
+ switch (objtype)
+ {
+ case OBJECT_COLUMN:
+ return "COLUMN";
+ case OBJECT_TABLE:
+ return "TABLE";
+ case OBJECT_SEQUENCE:
+ return "SEQUENCE";
+ case OBJECT_DATABASE:
+ return "DATABASE";
+ case OBJECT_DOMAIN:
+ return "DOMAIN";
+ case OBJECT_FDW:
+ return "FOREIGN DATA WRAPPER";
+ case OBJECT_FOREIGN_SERVER:
+ return "FOREIGN SERVER";
+ case OBJECT_FUNCTION:
+ return "FUNCTION";
+ case OBJECT_LANGUAGE:
+ return "LANGUAGE";
+ case OBJECT_LARGEOBJECT:
+ return "LARGE OBJECT";
+ case OBJECT_SCHEMA:
+ return "SCHEMA";
+ case OBJECT_PARAMETER_ACL:
+ return "PARAMETER";
+ case OBJECT_PROCEDURE:
+ return "PROCEDURE";
+ case OBJECT_ROUTINE:
+ return "ROUTINE";
+ case OBJECT_TABLESPACE:
+ return "TABLESPACE";
+ case OBJECT_TYPE:
+ return "TYPE";
+ /* these currently aren't used */
+ case OBJECT_ACCESS_METHOD:
+ case OBJECT_AGGREGATE:
+ case OBJECT_AMOP:
+ case OBJECT_AMPROC:
+ case OBJECT_ATTRIBUTE:
+ case OBJECT_CAST:
+ case OBJECT_COLLATION:
+ case OBJECT_CONVERSION:
+ case OBJECT_DEFAULT:
+ case OBJECT_DEFACL:
+ case OBJECT_DOMCONSTRAINT:
+ case OBJECT_EVENT_TRIGGER:
+ case OBJECT_EXTENSION:
+ case OBJECT_FOREIGN_TABLE:
+ case OBJECT_INDEX:
+ case OBJECT_MATVIEW:
+ case OBJECT_OPCLASS:
+ case OBJECT_OPERATOR:
+ case OBJECT_OPFAMILY:
+ case OBJECT_POLICY:
+ case OBJECT_PUBLICATION:
+ case OBJECT_PUBLICATION_NAMESPACE:
+ case OBJECT_PUBLICATION_REL:
+ case OBJECT_ROLE:
+ case OBJECT_RULE:
+ case OBJECT_STATISTIC_EXT:
+ case OBJECT_SUBSCRIPTION:
+ case OBJECT_TABCONSTRAINT:
+ case OBJECT_TRANSFORM:
+ case OBJECT_TRIGGER:
+ case OBJECT_TSCONFIGURATION:
+ case OBJECT_TSDICTIONARY:
+ case OBJECT_TSPARSER:
+ case OBJECT_TSTEMPLATE:
+ case OBJECT_USER_MAPPING:
+ case OBJECT_VIEW:
+ elog(ERROR, "unsupported object type: %d", (int) objtype);
+ }
+
+ return "???"; /* keep compiler quiet */
+}
+
+/*
+ * Return the ObjectType as a string; as above, but use the spelling
+ * in ALTER DEFAULT PRIVILEGES commands instead. Generally this is just
+ * the plural.
+ */
+static const char *
+stringify_adefprivs_objtype(ObjectType objtype)
+{
+ switch (objtype)
+ {
+ case OBJECT_COLUMN:
+ return "COLUMNS";
+ case OBJECT_TABLE:
+ return "TABLES";
+ case OBJECT_SEQUENCE:
+ return "SEQUENCES";
+ case OBJECT_DATABASE:
+ return "DATABASES";
+ case OBJECT_DOMAIN:
+ return "DOMAINS";
+ case OBJECT_FDW:
+ return "FOREIGN DATA WRAPPERS";
+ case OBJECT_FOREIGN_SERVER:
+ return "FOREIGN SERVERS";
+ case OBJECT_FUNCTION:
+ return "FUNCTIONS";
+ case OBJECT_LANGUAGE:
+ return "LANGUAGES";
+ case OBJECT_LARGEOBJECT:
+ return "LARGE OBJECTS";
+ case OBJECT_SCHEMA:
+ return "SCHEMAS";
+ case OBJECT_PROCEDURE:
+ return "PROCEDURES";
+ case OBJECT_ROUTINE:
+ return "ROUTINES";
+ case OBJECT_TABLESPACE:
+ return "TABLESPACES";
+ case OBJECT_TYPE:
+ return "TYPES";
+ /* these currently aren't used */
+ case OBJECT_ACCESS_METHOD:
+ case OBJECT_AGGREGATE:
+ case OBJECT_AMOP:
+ case OBJECT_AMPROC:
+ case OBJECT_ATTRIBUTE:
+ case OBJECT_CAST:
+ case OBJECT_COLLATION:
+ case OBJECT_CONVERSION:
+ case OBJECT_DEFAULT:
+ case OBJECT_DEFACL:
+ case OBJECT_DOMCONSTRAINT:
+ case OBJECT_EVENT_TRIGGER:
+ case OBJECT_EXTENSION:
+ case OBJECT_FOREIGN_TABLE:
+ case OBJECT_INDEX:
+ case OBJECT_MATVIEW:
+ case OBJECT_OPCLASS:
+ case OBJECT_OPERATOR:
+ case OBJECT_OPFAMILY:
+ case OBJECT_PARAMETER_ACL:
+ case OBJECT_POLICY:
+ case OBJECT_PUBLICATION:
+ case OBJECT_PUBLICATION_NAMESPACE:
+ case OBJECT_PUBLICATION_REL:
+ case OBJECT_ROLE:
+ case OBJECT_RULE:
+ case OBJECT_STATISTIC_EXT:
+ case OBJECT_SUBSCRIPTION:
+ case OBJECT_TABCONSTRAINT:
+ case OBJECT_TRANSFORM:
+ case OBJECT_TRIGGER:
+ case OBJECT_TSCONFIGURATION:
+ case OBJECT_TSDICTIONARY:
+ case OBJECT_TSPARSER:
+ case OBJECT_TSTEMPLATE:
+ case OBJECT_USER_MAPPING:
+ case OBJECT_VIEW:
+ elog(ERROR, "unsupported object type: %d", (int) objtype);
+ }
+
+ return "???"; /* keep compiler quiet */
+}
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
new file mode 100644
index 0000000..060c618
--- /dev/null
+++ b/src/backend/commands/explain.c
@@ -0,0 +1,5022 @@
+/*-------------------------------------------------------------------------
+ *
+ * explain.c
+ * Explain query execution plans
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994-5, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/commands/explain.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/xact.h"
+#include "catalog/pg_type.h"
+#include "commands/createas.h"
+#include "commands/defrem.h"
+#include "commands/prepare.h"
+#include "executor/nodeHash.h"
+#include "foreign/fdwapi.h"
+#include "jit/jit.h"
+#include "nodes/extensible.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/analyze.h"
+#include "parser/parsetree.h"
+#include "rewrite/rewriteHandler.h"
+#include "storage/bufmgr.h"
+#include "tcop/tcopprot.h"
+#include "utils/builtins.h"
+#include "utils/guc_tables.h"
+#include "utils/json.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/ruleutils.h"
+#include "utils/snapmgr.h"
+#include "utils/tuplesort.h"
+#include "utils/typcache.h"
+#include "utils/xml.h"
+
+
+/* Hook for plugins to get control in ExplainOneQuery() */
+ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL;
+
+/* Hook for plugins to get control in explain_get_index_name() */
+explain_get_index_name_hook_type explain_get_index_name_hook = NULL;
+
+
+/* OR-able flags for ExplainXMLTag() */
+#define X_OPENING 0
+#define X_CLOSING 1
+#define X_CLOSE_IMMEDIATE 2
+#define X_NOWHITESPACE 4
+
+static void ExplainOneQuery(Query *query, int cursorOptions,
+ IntoClause *into, ExplainState *es,
+ const char *queryString, ParamListInfo params,
+ QueryEnvironment *queryEnv);
+static void ExplainPrintJIT(ExplainState *es, int jit_flags,
+ JitInstrumentation *ji);
+static void report_triggers(ResultRelInfo *rInfo, bool show_relname,
+ ExplainState *es);
+static double elapsed_time(instr_time *starttime);
+static bool ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used);
+static void ExplainNode(PlanState *planstate, List *ancestors,
+ const char *relationship, const char *plan_name,
+ ExplainState *es);
+static void show_plan_tlist(PlanState *planstate, List *ancestors,
+ ExplainState *es);
+static void show_expression(Node *node, const char *qlabel,
+ PlanState *planstate, List *ancestors,
+ bool useprefix, ExplainState *es);
+static void show_qual(List *qual, const char *qlabel,
+ PlanState *planstate, List *ancestors,
+ bool useprefix, ExplainState *es);
+static void show_scan_qual(List *qual, const char *qlabel,
+ PlanState *planstate, List *ancestors,
+ ExplainState *es);
+static void show_upper_qual(List *qual, const char *qlabel,
+ PlanState *planstate, List *ancestors,
+ ExplainState *es);
+static void show_sort_keys(SortState *sortstate, List *ancestors,
+ ExplainState *es);
+static void show_incremental_sort_keys(IncrementalSortState *incrsortstate,
+ List *ancestors, ExplainState *es);
+static void show_merge_append_keys(MergeAppendState *mstate, List *ancestors,
+ ExplainState *es);
+static void show_agg_keys(AggState *astate, List *ancestors,
+ ExplainState *es);
+static void show_grouping_sets(PlanState *planstate, Agg *agg,
+ List *ancestors, ExplainState *es);
+static void show_grouping_set_keys(PlanState *planstate,
+ Agg *aggnode, Sort *sortnode,
+ List *context, bool useprefix,
+ List *ancestors, ExplainState *es);
+static void show_group_keys(GroupState *gstate, List *ancestors,
+ ExplainState *es);
+static void show_sort_group_keys(PlanState *planstate, const char *qlabel,
+ int nkeys, int nPresortedKeys, AttrNumber *keycols,
+ Oid *sortOperators, Oid *collations, bool *nullsFirst,
+ List *ancestors, ExplainState *es);
+static void show_sortorder_options(StringInfo buf, Node *sortexpr,
+ Oid sortOperator, Oid collation, bool nullsFirst);
+static void show_tablesample(TableSampleClause *tsc, PlanState *planstate,
+ List *ancestors, ExplainState *es);
+static void show_sort_info(SortState *sortstate, ExplainState *es);
+static void show_incremental_sort_info(IncrementalSortState *incrsortstate,
+ ExplainState *es);
+static void show_hash_info(HashState *hashstate, ExplainState *es);
+static void show_memoize_info(MemoizeState *mstate, List *ancestors,
+ ExplainState *es);
+static void show_hashagg_info(AggState *hashstate, ExplainState *es);
+static void show_tidbitmap_info(BitmapHeapScanState *planstate,
+ ExplainState *es);
+static void show_instrumentation_count(const char *qlabel, int which,
+ PlanState *planstate, ExplainState *es);
+static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es);
+static void show_eval_params(Bitmapset *bms_params, ExplainState *es);
+static const char *explain_get_index_name(Oid indexId);
+static void show_buffer_usage(ExplainState *es, const BufferUsage *usage,
+ bool planning);
+static void show_wal_usage(ExplainState *es, const WalUsage *usage);
+static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir,
+ ExplainState *es);
+static void ExplainScanTarget(Scan *plan, ExplainState *es);
+static void ExplainModifyTarget(ModifyTable *plan, ExplainState *es);
+static void ExplainTargetRel(Plan *plan, Index rti, ExplainState *es);
+static void show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
+ ExplainState *es);
+static void ExplainMemberNodes(PlanState **planstates, int nplans,
+ List *ancestors, ExplainState *es);
+static void ExplainMissingMembers(int nplans, int nchildren, ExplainState *es);
+static void ExplainSubPlans(List *plans, List *ancestors,
+ const char *relationship, ExplainState *es);
+static void ExplainCustomChildren(CustomScanState *css,
+ List *ancestors, ExplainState *es);
+static ExplainWorkersState *ExplainCreateWorkersState(int num_workers);
+static void ExplainOpenWorker(int n, ExplainState *es);
+static void ExplainCloseWorker(int n, ExplainState *es);
+static void ExplainFlushWorkersState(ExplainState *es);
+static void ExplainProperty(const char *qlabel, const char *unit,
+ const char *value, bool numeric, ExplainState *es);
+static void ExplainOpenSetAsideGroup(const char *objtype, const char *labelname,
+ bool labeled, int depth, ExplainState *es);
+static void ExplainSaveGroup(ExplainState *es, int depth, int *state_save);
+static void ExplainRestoreGroup(ExplainState *es, int depth, int *state_save);
+static void ExplainDummyGroup(const char *objtype, const char *labelname,
+ ExplainState *es);
+static void ExplainXMLTag(const char *tagname, int flags, ExplainState *es);
+static void ExplainIndentText(ExplainState *es);
+static void ExplainJSONLineEnding(ExplainState *es);
+static void ExplainYAMLLineStarting(ExplainState *es);
+static void escape_yaml(StringInfo buf, const char *str);
+
+
+
+/*
+ * ExplainQuery -
+ * execute an EXPLAIN command
+ */
+void
+ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
+ ParamListInfo params, DestReceiver *dest)
+{
+ ExplainState *es = NewExplainState();
+ TupOutputState *tstate;
+ JumbleState *jstate = NULL;
+ Query *query;
+ List *rewritten;
+ ListCell *lc;
+ bool timing_set = false;
+ bool summary_set = false;
+
+ /* Parse options list. */
+ foreach(lc, stmt->options)
+ {
+ DefElem *opt = (DefElem *) lfirst(lc);
+
+ if (strcmp(opt->defname, "analyze") == 0)
+ es->analyze = defGetBoolean(opt);
+ else if (strcmp(opt->defname, "verbose") == 0)
+ es->verbose = defGetBoolean(opt);
+ else if (strcmp(opt->defname, "costs") == 0)
+ es->costs = defGetBoolean(opt);
+ else if (strcmp(opt->defname, "buffers") == 0)
+ es->buffers = defGetBoolean(opt);
+ else if (strcmp(opt->defname, "wal") == 0)
+ es->wal = defGetBoolean(opt);
+ else if (strcmp(opt->defname, "settings") == 0)
+ es->settings = defGetBoolean(opt);
+ else if (strcmp(opt->defname, "timing") == 0)
+ {
+ timing_set = true;
+ es->timing = defGetBoolean(opt);
+ }
+ else if (strcmp(opt->defname, "summary") == 0)
+ {
+ summary_set = true;
+ es->summary = defGetBoolean(opt);
+ }
+ else if (strcmp(opt->defname, "format") == 0)
+ {
+ char *p = defGetString(opt);
+
+ if (strcmp(p, "text") == 0)
+ es->format = EXPLAIN_FORMAT_TEXT;
+ else if (strcmp(p, "xml") == 0)
+ es->format = EXPLAIN_FORMAT_XML;
+ else if (strcmp(p, "json") == 0)
+ es->format = EXPLAIN_FORMAT_JSON;
+ else if (strcmp(p, "yaml") == 0)
+ es->format = EXPLAIN_FORMAT_YAML;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized value for EXPLAIN option \"%s\": \"%s\"",
+ opt->defname, p),
+ parser_errposition(pstate, opt->location)));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized EXPLAIN option \"%s\"",
+ opt->defname),
+ parser_errposition(pstate, opt->location)));
+ }
+
+ if (es->wal && !es->analyze)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("EXPLAIN option WAL requires ANALYZE")));
+
+ /* if the timing was not set explicitly, set default value */
+ es->timing = (timing_set) ? es->timing : es->analyze;
+
+ /* check that timing is used with EXPLAIN ANALYZE */
+ if (es->timing && !es->analyze)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("EXPLAIN option TIMING requires ANALYZE")));
+
+ /* if the summary was not set explicitly, set default value */
+ es->summary = (summary_set) ? es->summary : es->analyze;
+
+ query = castNode(Query, stmt->query);
+ if (IsQueryIdEnabled())
+ jstate = JumbleQuery(query, pstate->p_sourcetext);
+
+ if (post_parse_analyze_hook)
+ (*post_parse_analyze_hook) (pstate, query, jstate);
+
+ /*
+ * Parse analysis was done already, but we still have to run the rule
+ * rewriter. We do not do AcquireRewriteLocks: we assume the query either
+ * came straight from the parser, or suitable locks were acquired by
+ * plancache.c.
+ */
+ rewritten = QueryRewrite(castNode(Query, stmt->query));
+
+ /* emit opening boilerplate */
+ ExplainBeginOutput(es);
+
+ if (rewritten == NIL)
+ {
+ /*
+ * In the case of an INSTEAD NOTHING, tell at least that. But in
+ * non-text format, the output is delimited, so this isn't necessary.
+ */
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ appendStringInfoString(es->str, "Query rewrites to nothing\n");
+ }
+ else
+ {
+ ListCell *l;
+
+ /* Explain every plan */
+ foreach(l, rewritten)
+ {
+ ExplainOneQuery(lfirst_node(Query, l),
+ CURSOR_OPT_PARALLEL_OK, NULL, es,
+ pstate->p_sourcetext, params, pstate->p_queryEnv);
+
+ /* Separate plans with an appropriate separator */
+ if (lnext(rewritten, l) != NULL)
+ ExplainSeparatePlans(es);
+ }
+ }
+
+ /* emit closing boilerplate */
+ ExplainEndOutput(es);
+ Assert(es->indent == 0);
+
+ /* output tuples */
+ tstate = begin_tup_output_tupdesc(dest, ExplainResultDesc(stmt),
+ &TTSOpsVirtual);
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ do_text_output_multiline(tstate, es->str->data);
+ else
+ do_text_output_oneline(tstate, es->str->data);
+ end_tup_output(tstate);
+
+ pfree(es->str->data);
+}
+
+/*
+ * Create a new ExplainState struct initialized with default options.
+ */
+ExplainState *
+NewExplainState(void)
+{
+ ExplainState *es = (ExplainState *) palloc0(sizeof(ExplainState));
+
+ /* Set default options (most fields can be left as zeroes). */
+ es->costs = true;
+ /* Prepare output buffer. */
+ es->str = makeStringInfo();
+
+ return es;
+}
+
+/*
+ * ExplainResultDesc -
+ * construct the result tupledesc for an EXPLAIN
+ */
+TupleDesc
+ExplainResultDesc(ExplainStmt *stmt)
+{
+ TupleDesc tupdesc;
+ ListCell *lc;
+ Oid result_type = TEXTOID;
+
+ /* Check for XML format option */
+ foreach(lc, stmt->options)
+ {
+ DefElem *opt = (DefElem *) lfirst(lc);
+
+ if (strcmp(opt->defname, "format") == 0)
+ {
+ char *p = defGetString(opt);
+
+ if (strcmp(p, "xml") == 0)
+ result_type = XMLOID;
+ else if (strcmp(p, "json") == 0)
+ result_type = JSONOID;
+ else
+ result_type = TEXTOID;
+ /* don't "break", as ExplainQuery will use the last value */
+ }
+ }
+
+ /* Need a tuple descriptor representing a single TEXT or XML column */
+ tupdesc = CreateTemplateTupleDesc(1);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "QUERY PLAN",
+ result_type, -1, 0);
+ return tupdesc;
+}
+
+/*
+ * ExplainOneQuery -
+ * print out the execution plan for one Query
+ *
+ * "into" is NULL unless we are explaining the contents of a CreateTableAsStmt.
+ */
+static void
+ExplainOneQuery(Query *query, int cursorOptions,
+ IntoClause *into, ExplainState *es,
+ const char *queryString, ParamListInfo params,
+ QueryEnvironment *queryEnv)
+{
+ /* planner will not cope with utility statements */
+ if (query->commandType == CMD_UTILITY)
+ {
+ ExplainOneUtility(query->utilityStmt, into, es, queryString, params,
+ queryEnv);
+ return;
+ }
+
+ /* if an advisor plugin is present, let it manage things */
+ if (ExplainOneQuery_hook)
+ (*ExplainOneQuery_hook) (query, cursorOptions, into, es,
+ queryString, params, queryEnv);
+ else
+ {
+ PlannedStmt *plan;
+ instr_time planstart,
+ planduration;
+ BufferUsage bufusage_start,
+ bufusage;
+
+ if (es->buffers)
+ bufusage_start = pgBufferUsage;
+ INSTR_TIME_SET_CURRENT(planstart);
+
+ /* plan the query */
+ plan = pg_plan_query(query, queryString, cursorOptions, params);
+
+ INSTR_TIME_SET_CURRENT(planduration);
+ INSTR_TIME_SUBTRACT(planduration, planstart);
+
+ /* calc differences of buffer counters. */
+ if (es->buffers)
+ {
+ memset(&bufusage, 0, sizeof(BufferUsage));
+ BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
+ }
+
+ /* run it (if needed) and produce output */
+ ExplainOnePlan(plan, into, es, queryString, params, queryEnv,
+ &planduration, (es->buffers ? &bufusage : NULL));
+ }
+}
+
+/*
+ * ExplainOneUtility -
+ * print out the execution plan for one utility statement
+ * (In general, utility statements don't have plans, but there are some
+ * we treat as special cases)
+ *
+ * "into" is NULL unless we are explaining the contents of a CreateTableAsStmt.
+ *
+ * This is exported because it's called back from prepare.c in the
+ * EXPLAIN EXECUTE case. In that case, we'll be dealing with a statement
+ * that's in the plan cache, so we have to ensure we don't modify it.
+ */
+void
+ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es,
+ const char *queryString, ParamListInfo params,
+ QueryEnvironment *queryEnv)
+{
+ if (utilityStmt == NULL)
+ return;
+
+ if (IsA(utilityStmt, CreateTableAsStmt))
+ {
+ /*
+ * We have to rewrite the contained SELECT and then pass it back to
+ * ExplainOneQuery. Copy to be safe in the EXPLAIN EXECUTE case.
+ */
+ CreateTableAsStmt *ctas = (CreateTableAsStmt *) utilityStmt;
+ List *rewritten;
+
+ /*
+ * Check if the relation exists or not. This is done at this stage to
+ * avoid query planning or execution.
+ */
+ if (CreateTableAsRelExists(ctas))
+ {
+ if (ctas->objtype == OBJECT_TABLE)
+ ExplainDummyGroup("CREATE TABLE AS", NULL, es);
+ else if (ctas->objtype == OBJECT_MATVIEW)
+ ExplainDummyGroup("CREATE MATERIALIZED VIEW", NULL, es);
+ else
+ elog(ERROR, "unexpected object type: %d",
+ (int) ctas->objtype);
+ return;
+ }
+
+ rewritten = QueryRewrite(castNode(Query, copyObject(ctas->query)));
+ Assert(list_length(rewritten) == 1);
+ ExplainOneQuery(linitial_node(Query, rewritten),
+ CURSOR_OPT_PARALLEL_OK, ctas->into, es,
+ queryString, params, queryEnv);
+ }
+ else if (IsA(utilityStmt, DeclareCursorStmt))
+ {
+ /*
+ * Likewise for DECLARE CURSOR.
+ *
+ * Notice that if you say EXPLAIN ANALYZE DECLARE CURSOR then we'll
+ * actually run the query. This is different from pre-8.3 behavior
+ * but seems more useful than not running the query. No cursor will
+ * be created, however.
+ */
+ DeclareCursorStmt *dcs = (DeclareCursorStmt *) utilityStmt;
+ List *rewritten;
+
+ rewritten = QueryRewrite(castNode(Query, copyObject(dcs->query)));
+ Assert(list_length(rewritten) == 1);
+ ExplainOneQuery(linitial_node(Query, rewritten),
+ dcs->options, NULL, es,
+ queryString, params, queryEnv);
+ }
+ else if (IsA(utilityStmt, ExecuteStmt))
+ ExplainExecuteQuery((ExecuteStmt *) utilityStmt, into, es,
+ queryString, params, queryEnv);
+ else if (IsA(utilityStmt, NotifyStmt))
+ {
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ appendStringInfoString(es->str, "NOTIFY\n");
+ else
+ ExplainDummyGroup("Notify", NULL, es);
+ }
+ else
+ {
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ appendStringInfoString(es->str,
+ "Utility statements have no plan structure\n");
+ else
+ ExplainDummyGroup("Utility Statement", NULL, es);
+ }
+}
+
+/*
+ * ExplainOnePlan -
+ * given a planned query, execute it if needed, and then print
+ * EXPLAIN output
+ *
+ * "into" is NULL unless we are explaining the contents of a CreateTableAsStmt,
+ * in which case executing the query should result in creating that table.
+ *
+ * This is exported because it's called back from prepare.c in the
+ * EXPLAIN EXECUTE case, and because an index advisor plugin would need
+ * to call it.
+ */
+void
+ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
+ const char *queryString, ParamListInfo params,
+ QueryEnvironment *queryEnv, const instr_time *planduration,
+ const BufferUsage *bufusage)
+{
+ DestReceiver *dest;
+ QueryDesc *queryDesc;
+ instr_time starttime;
+ double totaltime = 0;
+ int eflags;
+ int instrument_option = 0;
+
+ Assert(plannedstmt->commandType != CMD_UTILITY);
+
+ if (es->analyze && es->timing)
+ instrument_option |= INSTRUMENT_TIMER;
+ else if (es->analyze)
+ instrument_option |= INSTRUMENT_ROWS;
+
+ if (es->buffers)
+ instrument_option |= INSTRUMENT_BUFFERS;
+ if (es->wal)
+ instrument_option |= INSTRUMENT_WAL;
+
+ /*
+ * We always collect timing for the entire statement, even when node-level
+ * timing is off, so we don't look at es->timing here. (We could skip
+ * this if !es->summary, but it's hardly worth the complication.)
+ */
+ INSTR_TIME_SET_CURRENT(starttime);
+
+ /*
+ * Use a snapshot with an updated command ID to ensure this query sees
+ * results of any previously executed queries.
+ */
+ PushCopiedSnapshot(GetActiveSnapshot());
+ UpdateActiveSnapshotCommandId();
+
+ /*
+ * Normally we discard the query's output, but if explaining CREATE TABLE
+ * AS, we'd better use the appropriate tuple receiver.
+ */
+ if (into)
+ dest = CreateIntoRelDestReceiver(into);
+ else
+ dest = None_Receiver;
+
+ /* Create a QueryDesc for the query */
+ queryDesc = CreateQueryDesc(plannedstmt, queryString,
+ GetActiveSnapshot(), InvalidSnapshot,
+ dest, params, queryEnv, instrument_option);
+
+ /* Select execution options */
+ if (es->analyze)
+ eflags = 0; /* default run-to-completion flags */
+ else
+ eflags = EXEC_FLAG_EXPLAIN_ONLY;
+ if (into)
+ eflags |= GetIntoRelEFlags(into);
+
+ /* call ExecutorStart to prepare the plan for execution */
+ ExecutorStart(queryDesc, eflags);
+
+ /* Execute the plan for statistics if asked for */
+ if (es->analyze)
+ {
+ ScanDirection dir;
+
+ /* EXPLAIN ANALYZE CREATE TABLE AS WITH NO DATA is weird */
+ if (into && into->skipData)
+ dir = NoMovementScanDirection;
+ else
+ dir = ForwardScanDirection;
+
+ /* run the plan */
+ ExecutorRun(queryDesc, dir, 0L, true);
+
+ /* run cleanup too */
+ ExecutorFinish(queryDesc);
+
+ /* We can't run ExecutorEnd 'till we're done printing the stats... */
+ totaltime += elapsed_time(&starttime);
+ }
+
+ ExplainOpenGroup("Query", NULL, true, es);
+
+ /* Create textual dump of plan tree */
+ ExplainPrintPlan(es, queryDesc);
+
+ /*
+ * COMPUTE_QUERY_ID_REGRESS means COMPUTE_QUERY_ID_AUTO, but we don't show
+ * the queryid in any of the EXPLAIN plans to keep stable the results
+ * generated by regression test suites.
+ */
+ if (es->verbose && plannedstmt->queryId != UINT64CONST(0) &&
+ compute_query_id != COMPUTE_QUERY_ID_REGRESS)
+ {
+ /*
+ * Output the queryid as an int64 rather than a uint64 so we match
+ * what would be seen in the BIGINT pg_stat_statements.queryid column.
+ */
+ ExplainPropertyInteger("Query Identifier", NULL, (int64)
+ plannedstmt->queryId, es);
+ }
+
+ /* Show buffer usage in planning */
+ if (bufusage)
+ {
+ ExplainOpenGroup("Planning", "Planning", true, es);
+ show_buffer_usage(es, bufusage, true);
+ ExplainCloseGroup("Planning", "Planning", true, es);
+ }
+
+ if (es->summary && planduration)
+ {
+ double plantime = INSTR_TIME_GET_DOUBLE(*planduration);
+
+ ExplainPropertyFloat("Planning Time", "ms", 1000.0 * plantime, 3, es);
+ }
+
+ /* Print info about runtime of triggers */
+ if (es->analyze)
+ ExplainPrintTriggers(es, queryDesc);
+
+ /*
+ * Print info about JITing. Tied to es->costs because we don't want to
+ * display this in regression tests, as it'd cause output differences
+ * depending on build options. Might want to separate that out from COSTS
+ * at a later stage.
+ */
+ if (es->costs)
+ ExplainPrintJITSummary(es, queryDesc);
+
+ /*
+ * Close down the query and free resources. Include time for this in the
+ * total execution time (although it should be pretty minimal).
+ */
+ INSTR_TIME_SET_CURRENT(starttime);
+
+ ExecutorEnd(queryDesc);
+
+ FreeQueryDesc(queryDesc);
+
+ PopActiveSnapshot();
+
+ /* We need a CCI just in case query expanded to multiple plans */
+ if (es->analyze)
+ CommandCounterIncrement();
+
+ totaltime += elapsed_time(&starttime);
+
+ /*
+ * We only report execution time if we actually ran the query (that is,
+ * the user specified ANALYZE), and if summary reporting is enabled (the
+ * user can set SUMMARY OFF to not have the timing information included in
+ * the output). By default, ANALYZE sets SUMMARY to true.
+ */
+ if (es->summary && es->analyze)
+ ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3,
+ es);
+
+ ExplainCloseGroup("Query", NULL, true, es);
+}
+
+/*
+ * ExplainPrintSettings -
+ * Print summary of modified settings affecting query planning.
+ */
+static void
+ExplainPrintSettings(ExplainState *es)
+{
+ int num;
+ struct config_generic **gucs;
+
+ /* bail out if information about settings not requested */
+ if (!es->settings)
+ return;
+
+ /* request an array of relevant settings */
+ gucs = get_explain_guc_options(&num);
+
+ if (es->format != EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainOpenGroup("Settings", "Settings", true, es);
+
+ for (int i = 0; i < num; i++)
+ {
+ char *setting;
+ struct config_generic *conf = gucs[i];
+
+ setting = GetConfigOptionByName(conf->name, NULL, true);
+
+ ExplainPropertyText(conf->name, setting, es);
+ }
+
+ ExplainCloseGroup("Settings", "Settings", true, es);
+ }
+ else
+ {
+ StringInfoData str;
+
+ /* In TEXT mode, print nothing if there are no options */
+ if (num <= 0)
+ return;
+
+ initStringInfo(&str);
+
+ for (int i = 0; i < num; i++)
+ {
+ char *setting;
+ struct config_generic *conf = gucs[i];
+
+ if (i > 0)
+ appendStringInfoString(&str, ", ");
+
+ setting = GetConfigOptionByName(conf->name, NULL, true);
+
+ if (setting)
+ appendStringInfo(&str, "%s = '%s'", conf->name, setting);
+ else
+ appendStringInfo(&str, "%s = NULL", conf->name);
+ }
+
+ ExplainPropertyText("Settings", str.data, es);
+ }
+}
+
+/*
+ * ExplainPrintPlan -
+ * convert a QueryDesc's plan tree to text and append it to es->str
+ *
+ * The caller should have set up the options fields of *es, as well as
+ * initializing the output buffer es->str. Also, output formatting state
+ * such as the indent level is assumed valid. Plan-tree-specific fields
+ * in *es are initialized here.
+ *
+ * NB: will not work on utility statements
+ */
+void
+ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc)
+{
+ Bitmapset *rels_used = NULL;
+ PlanState *ps;
+
+ /* Set up ExplainState fields associated with this plan tree */
+ Assert(queryDesc->plannedstmt != NULL);
+ es->pstmt = queryDesc->plannedstmt;
+ es->rtable = queryDesc->plannedstmt->rtable;
+ ExplainPreScanNode(queryDesc->planstate, &rels_used);
+ es->rtable_names = select_rtable_names_for_explain(es->rtable, rels_used);
+ es->deparse_cxt = deparse_context_for_plan_tree(queryDesc->plannedstmt,
+ es->rtable_names);
+ es->printed_subplans = NULL;
+
+ /*
+ * Sometimes we mark a Gather node as "invisible", which means that it's
+ * not to be displayed in EXPLAIN output. The purpose of this is to allow
+ * running regression tests with force_parallel_mode=regress to get the
+ * same results as running the same tests with force_parallel_mode=off.
+ * Such marking is currently only supported on a Gather at the top of the
+ * plan. We skip that node, and we must also hide per-worker detail data
+ * further down in the plan tree.
+ */
+ ps = queryDesc->planstate;
+ if (IsA(ps, GatherState) && ((Gather *) ps->plan)->invisible)
+ {
+ ps = outerPlanState(ps);
+ es->hide_workers = true;
+ }
+ ExplainNode(ps, NIL, NULL, NULL, es);
+
+ /*
+ * If requested, include information about GUC parameters with values that
+ * don't match the built-in defaults.
+ */
+ ExplainPrintSettings(es);
+}
+
+/*
+ * ExplainPrintTriggers -
+ * convert a QueryDesc's trigger statistics to text and append it to
+ * es->str
+ *
+ * The caller should have set up the options fields of *es, as well as
+ * initializing the output buffer es->str. Other fields in *es are
+ * initialized here.
+ */
+void
+ExplainPrintTriggers(ExplainState *es, QueryDesc *queryDesc)
+{
+ ResultRelInfo *rInfo;
+ bool show_relname;
+ List *resultrels;
+ List *routerels;
+ List *targrels;
+ ListCell *l;
+
+ resultrels = queryDesc->estate->es_opened_result_relations;
+ routerels = queryDesc->estate->es_tuple_routing_result_relations;
+ targrels = queryDesc->estate->es_trig_target_relations;
+
+ ExplainOpenGroup("Triggers", "Triggers", false, es);
+
+ show_relname = (list_length(resultrels) > 1 ||
+ routerels != NIL || targrels != NIL);
+ foreach(l, resultrels)
+ {
+ rInfo = (ResultRelInfo *) lfirst(l);
+ report_triggers(rInfo, show_relname, es);
+ }
+
+ foreach(l, routerels)
+ {
+ rInfo = (ResultRelInfo *) lfirst(l);
+ report_triggers(rInfo, show_relname, es);
+ }
+
+ foreach(l, targrels)
+ {
+ rInfo = (ResultRelInfo *) lfirst(l);
+ report_triggers(rInfo, show_relname, es);
+ }
+
+ ExplainCloseGroup("Triggers", "Triggers", false, es);
+}
+
+/*
+ * ExplainPrintJITSummary -
+ * Print summarized JIT instrumentation from leader and workers
+ */
+void
+ExplainPrintJITSummary(ExplainState *es, QueryDesc *queryDesc)
+{
+ JitInstrumentation ji = {0};
+
+ if (!(queryDesc->estate->es_jit_flags & PGJIT_PERFORM))
+ return;
+
+ /*
+ * Work with a copy instead of modifying the leader state, since this
+ * function may be called twice
+ */
+ if (queryDesc->estate->es_jit)
+ InstrJitAgg(&ji, &queryDesc->estate->es_jit->instr);
+
+ /* If this process has done JIT in parallel workers, merge stats */
+ if (queryDesc->estate->es_jit_worker_instr)
+ InstrJitAgg(&ji, queryDesc->estate->es_jit_worker_instr);
+
+ ExplainPrintJIT(es, queryDesc->estate->es_jit_flags, &ji);
+}
+
+/*
+ * ExplainPrintJIT -
+ * Append information about JITing to es->str.
+ */
+static void
+ExplainPrintJIT(ExplainState *es, int jit_flags, JitInstrumentation *ji)
+{
+ instr_time total_time;
+
+ /* don't print information if no JITing happened */
+ if (!ji || ji->created_functions == 0)
+ return;
+
+ /* calculate total time */
+ INSTR_TIME_SET_ZERO(total_time);
+ INSTR_TIME_ADD(total_time, ji->generation_counter);
+ INSTR_TIME_ADD(total_time, ji->inlining_counter);
+ INSTR_TIME_ADD(total_time, ji->optimization_counter);
+ INSTR_TIME_ADD(total_time, ji->emission_counter);
+
+ ExplainOpenGroup("JIT", "JIT", true, es);
+
+ /* for higher density, open code the text output format */
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainIndentText(es);
+ appendStringInfoString(es->str, "JIT:\n");
+ es->indent++;
+
+ ExplainPropertyInteger("Functions", NULL, ji->created_functions, es);
+
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "Options: %s %s, %s %s, %s %s, %s %s\n",
+ "Inlining", jit_flags & PGJIT_INLINE ? "true" : "false",
+ "Optimization", jit_flags & PGJIT_OPT3 ? "true" : "false",
+ "Expressions", jit_flags & PGJIT_EXPR ? "true" : "false",
+ "Deforming", jit_flags & PGJIT_DEFORM ? "true" : "false");
+
+ if (es->analyze && es->timing)
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str,
+ "Timing: %s %.3f ms, %s %.3f ms, %s %.3f ms, %s %.3f ms, %s %.3f ms\n",
+ "Generation", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->generation_counter),
+ "Inlining", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->inlining_counter),
+ "Optimization", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->optimization_counter),
+ "Emission", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->emission_counter),
+ "Total", 1000.0 * INSTR_TIME_GET_DOUBLE(total_time));
+ }
+
+ es->indent--;
+ }
+ else
+ {
+ ExplainPropertyInteger("Functions", NULL, ji->created_functions, es);
+
+ ExplainOpenGroup("Options", "Options", true, es);
+ ExplainPropertyBool("Inlining", jit_flags & PGJIT_INLINE, es);
+ ExplainPropertyBool("Optimization", jit_flags & PGJIT_OPT3, es);
+ ExplainPropertyBool("Expressions", jit_flags & PGJIT_EXPR, es);
+ ExplainPropertyBool("Deforming", jit_flags & PGJIT_DEFORM, es);
+ ExplainCloseGroup("Options", "Options", true, es);
+
+ if (es->analyze && es->timing)
+ {
+ ExplainOpenGroup("Timing", "Timing", true, es);
+
+ ExplainPropertyFloat("Generation", "ms",
+ 1000.0 * INSTR_TIME_GET_DOUBLE(ji->generation_counter),
+ 3, es);
+ ExplainPropertyFloat("Inlining", "ms",
+ 1000.0 * INSTR_TIME_GET_DOUBLE(ji->inlining_counter),
+ 3, es);
+ ExplainPropertyFloat("Optimization", "ms",
+ 1000.0 * INSTR_TIME_GET_DOUBLE(ji->optimization_counter),
+ 3, es);
+ ExplainPropertyFloat("Emission", "ms",
+ 1000.0 * INSTR_TIME_GET_DOUBLE(ji->emission_counter),
+ 3, es);
+ ExplainPropertyFloat("Total", "ms",
+ 1000.0 * INSTR_TIME_GET_DOUBLE(total_time),
+ 3, es);
+
+ ExplainCloseGroup("Timing", "Timing", true, es);
+ }
+ }
+
+ ExplainCloseGroup("JIT", "JIT", true, es);
+}
+
+/*
+ * ExplainQueryText -
+ * add a "Query Text" node that contains the actual text of the query
+ *
+ * The caller should have set up the options fields of *es, as well as
+ * initializing the output buffer es->str.
+ *
+ */
+void
+ExplainQueryText(ExplainState *es, QueryDesc *queryDesc)
+{
+ if (queryDesc->sourceText)
+ ExplainPropertyText("Query Text", queryDesc->sourceText, es);
+}
+
+/*
+ * report_triggers -
+ * report execution stats for a single relation's triggers
+ */
+static void
+report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es)
+{
+ int nt;
+
+ if (!rInfo->ri_TrigDesc || !rInfo->ri_TrigInstrument)
+ return;
+ for (nt = 0; nt < rInfo->ri_TrigDesc->numtriggers; nt++)
+ {
+ Trigger *trig = rInfo->ri_TrigDesc->triggers + nt;
+ Instrumentation *instr = rInfo->ri_TrigInstrument + nt;
+ char *relname;
+ char *conname = NULL;
+
+ /* Must clean up instrumentation state */
+ InstrEndLoop(instr);
+
+ /*
+ * We ignore triggers that were never invoked; they likely aren't
+ * relevant to the current query type.
+ */
+ if (instr->ntuples == 0)
+ continue;
+
+ ExplainOpenGroup("Trigger", NULL, true, es);
+
+ relname = RelationGetRelationName(rInfo->ri_RelationDesc);
+ if (OidIsValid(trig->tgconstraint))
+ conname = get_constraint_name(trig->tgconstraint);
+
+ /*
+ * In text format, we avoid printing both the trigger name and the
+ * constraint name unless VERBOSE is specified. In non-text formats
+ * we just print everything.
+ */
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ if (es->verbose || conname == NULL)
+ appendStringInfo(es->str, "Trigger %s", trig->tgname);
+ else
+ appendStringInfoString(es->str, "Trigger");
+ if (conname)
+ appendStringInfo(es->str, " for constraint %s", conname);
+ if (show_relname)
+ appendStringInfo(es->str, " on %s", relname);
+ if (es->timing)
+ appendStringInfo(es->str, ": time=%.3f calls=%.0f\n",
+ 1000.0 * instr->total, instr->ntuples);
+ else
+ appendStringInfo(es->str, ": calls=%.0f\n", instr->ntuples);
+ }
+ else
+ {
+ ExplainPropertyText("Trigger Name", trig->tgname, es);
+ if (conname)
+ ExplainPropertyText("Constraint Name", conname, es);
+ ExplainPropertyText("Relation", relname, es);
+ if (es->timing)
+ ExplainPropertyFloat("Time", "ms", 1000.0 * instr->total, 3,
+ es);
+ ExplainPropertyFloat("Calls", NULL, instr->ntuples, 0, es);
+ }
+
+ if (conname)
+ pfree(conname);
+
+ ExplainCloseGroup("Trigger", NULL, true, es);
+ }
+}
+
+/* Compute elapsed time in seconds since given timestamp */
+static double
+elapsed_time(instr_time *starttime)
+{
+ instr_time endtime;
+
+ INSTR_TIME_SET_CURRENT(endtime);
+ INSTR_TIME_SUBTRACT(endtime, *starttime);
+ return INSTR_TIME_GET_DOUBLE(endtime);
+}
+
+/*
+ * ExplainPreScanNode -
+ * Prescan the planstate tree to identify which RTEs are referenced
+ *
+ * Adds the relid of each referenced RTE to *rels_used. The result controls
+ * which RTEs are assigned aliases by select_rtable_names_for_explain.
+ * This ensures that we don't confusingly assign un-suffixed aliases to RTEs
+ * that never appear in the EXPLAIN output (such as inheritance parents).
+ */
+static bool
+ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
+{
+ Plan *plan = planstate->plan;
+
+ switch (nodeTag(plan))
+ {
+ case T_SeqScan:
+ case T_SampleScan:
+ case T_IndexScan:
+ case T_IndexOnlyScan:
+ case T_BitmapHeapScan:
+ case T_TidScan:
+ case T_TidRangeScan:
+ case T_SubqueryScan:
+ case T_FunctionScan:
+ case T_TableFuncScan:
+ case T_ValuesScan:
+ case T_CteScan:
+ case T_NamedTuplestoreScan:
+ case T_WorkTableScan:
+ *rels_used = bms_add_member(*rels_used,
+ ((Scan *) plan)->scanrelid);
+ break;
+ case T_ForeignScan:
+ *rels_used = bms_add_members(*rels_used,
+ ((ForeignScan *) plan)->fs_relids);
+ break;
+ case T_CustomScan:
+ *rels_used = bms_add_members(*rels_used,
+ ((CustomScan *) plan)->custom_relids);
+ break;
+ case T_ModifyTable:
+ *rels_used = bms_add_member(*rels_used,
+ ((ModifyTable *) plan)->nominalRelation);
+ if (((ModifyTable *) plan)->exclRelRTI)
+ *rels_used = bms_add_member(*rels_used,
+ ((ModifyTable *) plan)->exclRelRTI);
+ break;
+ case T_Append:
+ *rels_used = bms_add_members(*rels_used,
+ ((Append *) plan)->apprelids);
+ break;
+ case T_MergeAppend:
+ *rels_used = bms_add_members(*rels_used,
+ ((MergeAppend *) plan)->apprelids);
+ break;
+ default:
+ break;
+ }
+
+ return planstate_tree_walker(planstate, ExplainPreScanNode, rels_used);
+}
+
+/*
+ * ExplainNode -
+ * Appends a description of a plan tree to es->str
+ *
+ * planstate points to the executor state node for the current plan node.
+ * We need to work from a PlanState node, not just a Plan node, in order to
+ * get at the instrumentation data (if any) as well as the list of subplans.
+ *
+ * ancestors is a list of parent Plan and SubPlan nodes, most-closely-nested
+ * first. These are needed in order to interpret PARAM_EXEC Params.
+ *
+ * relationship describes the relationship of this plan node to its parent
+ * (eg, "Outer", "Inner"); it can be null at top level. plan_name is an
+ * optional name to be attached to the node.
+ *
+ * In text format, es->indent is controlled in this function since we only
+ * want it to change at plan-node boundaries (but a few subroutines will
+ * transiently increment it). In non-text formats, es->indent corresponds
+ * to the nesting depth of logical output groups, and therefore is controlled
+ * by ExplainOpenGroup/ExplainCloseGroup.
+ */
+static void
+ExplainNode(PlanState *planstate, List *ancestors,
+ const char *relationship, const char *plan_name,
+ ExplainState *es)
+{
+ Plan *plan = planstate->plan;
+ const char *pname; /* node type name for text output */
+ const char *sname; /* node type name for non-text output */
+ const char *strategy = NULL;
+ const char *partialmode = NULL;
+ const char *operation = NULL;
+ const char *custom_name = NULL;
+ ExplainWorkersState *save_workers_state = es->workers_state;
+ int save_indent = es->indent;
+ bool haschildren;
+
+ /*
+ * Prepare per-worker output buffers, if needed. We'll append the data in
+ * these to the main output string further down.
+ */
+ if (planstate->worker_instrument && es->analyze && !es->hide_workers)
+ es->workers_state = ExplainCreateWorkersState(planstate->worker_instrument->num_workers);
+ else
+ es->workers_state = NULL;
+
+ /* Identify plan node type, and print generic details */
+ switch (nodeTag(plan))
+ {
+ case T_Result:
+ pname = sname = "Result";
+ break;
+ case T_ProjectSet:
+ pname = sname = "ProjectSet";
+ break;
+ case T_ModifyTable:
+ sname = "ModifyTable";
+ switch (((ModifyTable *) plan)->operation)
+ {
+ case CMD_INSERT:
+ pname = operation = "Insert";
+ break;
+ case CMD_UPDATE:
+ pname = operation = "Update";
+ break;
+ case CMD_DELETE:
+ pname = operation = "Delete";
+ break;
+ case CMD_MERGE:
+ pname = operation = "Merge";
+ break;
+ default:
+ pname = "???";
+ break;
+ }
+ break;
+ case T_Append:
+ pname = sname = "Append";
+ break;
+ case T_MergeAppend:
+ pname = sname = "Merge Append";
+ break;
+ case T_RecursiveUnion:
+ pname = sname = "Recursive Union";
+ break;
+ case T_BitmapAnd:
+ pname = sname = "BitmapAnd";
+ break;
+ case T_BitmapOr:
+ pname = sname = "BitmapOr";
+ break;
+ case T_NestLoop:
+ pname = sname = "Nested Loop";
+ break;
+ case T_MergeJoin:
+ pname = "Merge"; /* "Join" gets added by jointype switch */
+ sname = "Merge Join";
+ break;
+ case T_HashJoin:
+ pname = "Hash"; /* "Join" gets added by jointype switch */
+ sname = "Hash Join";
+ break;
+ case T_SeqScan:
+ pname = sname = "Seq Scan";
+ break;
+ case T_SampleScan:
+ pname = sname = "Sample Scan";
+ break;
+ case T_Gather:
+ pname = sname = "Gather";
+ break;
+ case T_GatherMerge:
+ pname = sname = "Gather Merge";
+ break;
+ case T_IndexScan:
+ pname = sname = "Index Scan";
+ break;
+ case T_IndexOnlyScan:
+ pname = sname = "Index Only Scan";
+ break;
+ case T_BitmapIndexScan:
+ pname = sname = "Bitmap Index Scan";
+ break;
+ case T_BitmapHeapScan:
+ pname = sname = "Bitmap Heap Scan";
+ break;
+ case T_TidScan:
+ pname = sname = "Tid Scan";
+ break;
+ case T_TidRangeScan:
+ pname = sname = "Tid Range Scan";
+ break;
+ case T_SubqueryScan:
+ pname = sname = "Subquery Scan";
+ break;
+ case T_FunctionScan:
+ pname = sname = "Function Scan";
+ break;
+ case T_TableFuncScan:
+ pname = sname = "Table Function Scan";
+ break;
+ case T_ValuesScan:
+ pname = sname = "Values Scan";
+ break;
+ case T_CteScan:
+ pname = sname = "CTE Scan";
+ break;
+ case T_NamedTuplestoreScan:
+ pname = sname = "Named Tuplestore Scan";
+ break;
+ case T_WorkTableScan:
+ pname = sname = "WorkTable Scan";
+ break;
+ case T_ForeignScan:
+ sname = "Foreign Scan";
+ switch (((ForeignScan *) plan)->operation)
+ {
+ case CMD_SELECT:
+ pname = "Foreign Scan";
+ operation = "Select";
+ break;
+ case CMD_INSERT:
+ pname = "Foreign Insert";
+ operation = "Insert";
+ break;
+ case CMD_UPDATE:
+ pname = "Foreign Update";
+ operation = "Update";
+ break;
+ case CMD_DELETE:
+ pname = "Foreign Delete";
+ operation = "Delete";
+ break;
+ default:
+ pname = "???";
+ break;
+ }
+ break;
+ case T_CustomScan:
+ sname = "Custom Scan";
+ custom_name = ((CustomScan *) plan)->methods->CustomName;
+ if (custom_name)
+ pname = psprintf("Custom Scan (%s)", custom_name);
+ else
+ pname = sname;
+ break;
+ case T_Material:
+ pname = sname = "Materialize";
+ break;
+ case T_Memoize:
+ pname = sname = "Memoize";
+ break;
+ case T_Sort:
+ pname = sname = "Sort";
+ break;
+ case T_IncrementalSort:
+ pname = sname = "Incremental Sort";
+ break;
+ case T_Group:
+ pname = sname = "Group";
+ break;
+ case T_Agg:
+ {
+ Agg *agg = (Agg *) plan;
+
+ sname = "Aggregate";
+ switch (agg->aggstrategy)
+ {
+ case AGG_PLAIN:
+ pname = "Aggregate";
+ strategy = "Plain";
+ break;
+ case AGG_SORTED:
+ pname = "GroupAggregate";
+ strategy = "Sorted";
+ break;
+ case AGG_HASHED:
+ pname = "HashAggregate";
+ strategy = "Hashed";
+ break;
+ case AGG_MIXED:
+ pname = "MixedAggregate";
+ strategy = "Mixed";
+ break;
+ default:
+ pname = "Aggregate ???";
+ strategy = "???";
+ break;
+ }
+
+ if (DO_AGGSPLIT_SKIPFINAL(agg->aggsplit))
+ {
+ partialmode = "Partial";
+ pname = psprintf("%s %s", partialmode, pname);
+ }
+ else if (DO_AGGSPLIT_COMBINE(agg->aggsplit))
+ {
+ partialmode = "Finalize";
+ pname = psprintf("%s %s", partialmode, pname);
+ }
+ else
+ partialmode = "Simple";
+ }
+ break;
+ case T_WindowAgg:
+ pname = sname = "WindowAgg";
+ break;
+ case T_Unique:
+ pname = sname = "Unique";
+ break;
+ case T_SetOp:
+ sname = "SetOp";
+ switch (((SetOp *) plan)->strategy)
+ {
+ case SETOP_SORTED:
+ pname = "SetOp";
+ strategy = "Sorted";
+ break;
+ case SETOP_HASHED:
+ pname = "HashSetOp";
+ strategy = "Hashed";
+ break;
+ default:
+ pname = "SetOp ???";
+ strategy = "???";
+ break;
+ }
+ break;
+ case T_LockRows:
+ pname = sname = "LockRows";
+ break;
+ case T_Limit:
+ pname = sname = "Limit";
+ break;
+ case T_Hash:
+ pname = sname = "Hash";
+ break;
+ default:
+ pname = sname = "???";
+ break;
+ }
+
+ ExplainOpenGroup("Plan",
+ relationship ? NULL : "Plan",
+ true, es);
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ if (plan_name)
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "%s\n", plan_name);
+ es->indent++;
+ }
+ if (es->indent)
+ {
+ ExplainIndentText(es);
+ appendStringInfoString(es->str, "-> ");
+ es->indent += 2;
+ }
+ if (plan->parallel_aware)
+ appendStringInfoString(es->str, "Parallel ");
+ if (plan->async_capable)
+ appendStringInfoString(es->str, "Async ");
+ appendStringInfoString(es->str, pname);
+ es->indent++;
+ }
+ else
+ {
+ ExplainPropertyText("Node Type", sname, es);
+ if (strategy)
+ ExplainPropertyText("Strategy", strategy, es);
+ if (partialmode)
+ ExplainPropertyText("Partial Mode", partialmode, es);
+ if (operation)
+ ExplainPropertyText("Operation", operation, es);
+ if (relationship)
+ ExplainPropertyText("Parent Relationship", relationship, es);
+ if (plan_name)
+ ExplainPropertyText("Subplan Name", plan_name, es);
+ if (custom_name)
+ ExplainPropertyText("Custom Plan Provider", custom_name, es);
+ ExplainPropertyBool("Parallel Aware", plan->parallel_aware, es);
+ ExplainPropertyBool("Async Capable", plan->async_capable, es);
+ }
+
+ switch (nodeTag(plan))
+ {
+ case T_SeqScan:
+ case T_SampleScan:
+ case T_BitmapHeapScan:
+ case T_TidScan:
+ case T_TidRangeScan:
+ case T_SubqueryScan:
+ case T_FunctionScan:
+ case T_TableFuncScan:
+ case T_ValuesScan:
+ case T_CteScan:
+ case T_WorkTableScan:
+ ExplainScanTarget((Scan *) plan, es);
+ break;
+ case T_ForeignScan:
+ case T_CustomScan:
+ if (((Scan *) plan)->scanrelid > 0)
+ ExplainScanTarget((Scan *) plan, es);
+ break;
+ case T_IndexScan:
+ {
+ IndexScan *indexscan = (IndexScan *) plan;
+
+ ExplainIndexScanDetails(indexscan->indexid,
+ indexscan->indexorderdir,
+ es);
+ ExplainScanTarget((Scan *) indexscan, es);
+ }
+ break;
+ case T_IndexOnlyScan:
+ {
+ IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan;
+
+ ExplainIndexScanDetails(indexonlyscan->indexid,
+ indexonlyscan->indexorderdir,
+ es);
+ ExplainScanTarget((Scan *) indexonlyscan, es);
+ }
+ break;
+ case T_BitmapIndexScan:
+ {
+ BitmapIndexScan *bitmapindexscan = (BitmapIndexScan *) plan;
+ const char *indexname =
+ explain_get_index_name(bitmapindexscan->indexid);
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ appendStringInfo(es->str, " on %s",
+ quote_identifier(indexname));
+ else
+ ExplainPropertyText("Index Name", indexname, es);
+ }
+ break;
+ case T_ModifyTable:
+ ExplainModifyTarget((ModifyTable *) plan, es);
+ break;
+ case T_NestLoop:
+ case T_MergeJoin:
+ case T_HashJoin:
+ {
+ const char *jointype;
+
+ switch (((Join *) plan)->jointype)
+ {
+ case JOIN_INNER:
+ jointype = "Inner";
+ break;
+ case JOIN_LEFT:
+ jointype = "Left";
+ break;
+ case JOIN_FULL:
+ jointype = "Full";
+ break;
+ case JOIN_RIGHT:
+ jointype = "Right";
+ break;
+ case JOIN_SEMI:
+ jointype = "Semi";
+ break;
+ case JOIN_ANTI:
+ jointype = "Anti";
+ break;
+ default:
+ jointype = "???";
+ break;
+ }
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ /*
+ * For historical reasons, the join type is interpolated
+ * into the node type name...
+ */
+ if (((Join *) plan)->jointype != JOIN_INNER)
+ appendStringInfo(es->str, " %s Join", jointype);
+ else if (!IsA(plan, NestLoop))
+ appendStringInfoString(es->str, " Join");
+ }
+ else
+ ExplainPropertyText("Join Type", jointype, es);
+ }
+ break;
+ case T_SetOp:
+ {
+ const char *setopcmd;
+
+ switch (((SetOp *) plan)->cmd)
+ {
+ case SETOPCMD_INTERSECT:
+ setopcmd = "Intersect";
+ break;
+ case SETOPCMD_INTERSECT_ALL:
+ setopcmd = "Intersect All";
+ break;
+ case SETOPCMD_EXCEPT:
+ setopcmd = "Except";
+ break;
+ case SETOPCMD_EXCEPT_ALL:
+ setopcmd = "Except All";
+ break;
+ default:
+ setopcmd = "???";
+ break;
+ }
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ appendStringInfo(es->str, " %s", setopcmd);
+ else
+ ExplainPropertyText("Command", setopcmd, es);
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (es->costs)
+ {
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ appendStringInfo(es->str, " (cost=%.2f..%.2f rows=%.0f width=%d)",
+ plan->startup_cost, plan->total_cost,
+ plan->plan_rows, plan->plan_width);
+ }
+ else
+ {
+ ExplainPropertyFloat("Startup Cost", NULL, plan->startup_cost,
+ 2, es);
+ ExplainPropertyFloat("Total Cost", NULL, plan->total_cost,
+ 2, es);
+ ExplainPropertyFloat("Plan Rows", NULL, plan->plan_rows,
+ 0, es);
+ ExplainPropertyInteger("Plan Width", NULL, plan->plan_width,
+ es);
+ }
+ }
+
+ /*
+ * We have to forcibly clean up the instrumentation state because we
+ * haven't done ExecutorEnd yet. This is pretty grotty ...
+ *
+ * Note: contrib/auto_explain could cause instrumentation to be set up
+ * even though we didn't ask for it here. Be careful not to print any
+ * instrumentation results the user didn't ask for. But we do the
+ * InstrEndLoop call anyway, if possible, to reduce the number of cases
+ * auto_explain has to contend with.
+ */
+ if (planstate->instrument)
+ InstrEndLoop(planstate->instrument);
+
+ if (es->analyze &&
+ planstate->instrument && planstate->instrument->nloops > 0)
+ {
+ double nloops = planstate->instrument->nloops;
+ double startup_ms = 1000.0 * planstate->instrument->startup / nloops;
+ double total_ms = 1000.0 * planstate->instrument->total / nloops;
+ double rows = planstate->instrument->ntuples / nloops;
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ if (es->timing)
+ appendStringInfo(es->str,
+ " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
+ startup_ms, total_ms, rows, nloops);
+ else
+ appendStringInfo(es->str,
+ " (actual rows=%.0f loops=%.0f)",
+ rows, nloops);
+ }
+ else
+ {
+ if (es->timing)
+ {
+ ExplainPropertyFloat("Actual Startup Time", "ms", startup_ms,
+ 3, es);
+ ExplainPropertyFloat("Actual Total Time", "ms", total_ms,
+ 3, es);
+ }
+ ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+ ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+ }
+ }
+ else if (es->analyze)
+ {
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ appendStringInfoString(es->str, " (never executed)");
+ else
+ {
+ if (es->timing)
+ {
+ ExplainPropertyFloat("Actual Startup Time", "ms", 0.0, 3, es);
+ ExplainPropertyFloat("Actual Total Time", "ms", 0.0, 3, es);
+ }
+ ExplainPropertyFloat("Actual Rows", NULL, 0.0, 0, es);
+ ExplainPropertyFloat("Actual Loops", NULL, 0.0, 0, es);
+ }
+ }
+
+ /* in text format, first line ends here */
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ appendStringInfoChar(es->str, '\n');
+
+ /* prepare per-worker general execution details */
+ if (es->workers_state && es->verbose)
+ {
+ WorkerInstrumentation *w = planstate->worker_instrument;
+
+ for (int n = 0; n < w->num_workers; n++)
+ {
+ Instrumentation *instrument = &w->instrument[n];
+ double nloops = instrument->nloops;
+ double startup_ms;
+ double total_ms;
+ double rows;
+
+ if (nloops <= 0)
+ continue;
+ startup_ms = 1000.0 * instrument->startup / nloops;
+ total_ms = 1000.0 * instrument->total / nloops;
+ rows = instrument->ntuples / nloops;
+
+ ExplainOpenWorker(n, es);
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainIndentText(es);
+ if (es->timing)
+ appendStringInfo(es->str,
+ "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
+ startup_ms, total_ms, rows, nloops);
+ else
+ appendStringInfo(es->str,
+ "actual rows=%.0f loops=%.0f\n",
+ rows, nloops);
+ }
+ else
+ {
+ if (es->timing)
+ {
+ ExplainPropertyFloat("Actual Startup Time", "ms",
+ startup_ms, 3, es);
+ ExplainPropertyFloat("Actual Total Time", "ms",
+ total_ms, 3, es);
+ }
+ ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+ ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+ }
+
+ ExplainCloseWorker(n, es);
+ }
+ }
+
+ /* target list */
+ if (es->verbose)
+ show_plan_tlist(planstate, ancestors, es);
+
+ /* unique join */
+ switch (nodeTag(plan))
+ {
+ case T_NestLoop:
+ case T_MergeJoin:
+ case T_HashJoin:
+ /* try not to be too chatty about this in text mode */
+ if (es->format != EXPLAIN_FORMAT_TEXT ||
+ (es->verbose && ((Join *) plan)->inner_unique))
+ ExplainPropertyBool("Inner Unique",
+ ((Join *) plan)->inner_unique,
+ es);
+ break;
+ default:
+ break;
+ }
+
+ /* quals, sort keys, etc */
+ switch (nodeTag(plan))
+ {
+ case T_IndexScan:
+ show_scan_qual(((IndexScan *) plan)->indexqualorig,
+ "Index Cond", planstate, ancestors, es);
+ if (((IndexScan *) plan)->indexqualorig)
+ show_instrumentation_count("Rows Removed by Index Recheck", 2,
+ planstate, es);
+ show_scan_qual(((IndexScan *) plan)->indexorderbyorig,
+ "Order By", planstate, ancestors, es);
+ show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ break;
+ case T_IndexOnlyScan:
+ show_scan_qual(((IndexOnlyScan *) plan)->indexqual,
+ "Index Cond", planstate, ancestors, es);
+ if (((IndexOnlyScan *) plan)->recheckqual)
+ show_instrumentation_count("Rows Removed by Index Recheck", 2,
+ planstate, es);
+ show_scan_qual(((IndexOnlyScan *) plan)->indexorderby,
+ "Order By", planstate, ancestors, es);
+ show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ if (es->analyze)
+ ExplainPropertyFloat("Heap Fetches", NULL,
+ planstate->instrument->ntuples2, 0, es);
+ break;
+ case T_BitmapIndexScan:
+ show_scan_qual(((BitmapIndexScan *) plan)->indexqualorig,
+ "Index Cond", planstate, ancestors, es);
+ break;
+ case T_BitmapHeapScan:
+ show_scan_qual(((BitmapHeapScan *) plan)->bitmapqualorig,
+ "Recheck Cond", planstate, ancestors, es);
+ if (((BitmapHeapScan *) plan)->bitmapqualorig)
+ show_instrumentation_count("Rows Removed by Index Recheck", 2,
+ planstate, es);
+ show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ if (es->analyze)
+ show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
+ break;
+ case T_SampleScan:
+ show_tablesample(((SampleScan *) plan)->tablesample,
+ planstate, ancestors, es);
+ /* fall through to print additional fields the same as SeqScan */
+ /* FALLTHROUGH */
+ case T_SeqScan:
+ case T_ValuesScan:
+ case T_CteScan:
+ case T_NamedTuplestoreScan:
+ case T_WorkTableScan:
+ case T_SubqueryScan:
+ show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ break;
+ case T_Gather:
+ {
+ Gather *gather = (Gather *) plan;
+
+ show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ ExplainPropertyInteger("Workers Planned", NULL,
+ gather->num_workers, es);
+
+ /* Show params evaluated at gather node */
+ if (gather->initParam)
+ show_eval_params(gather->initParam, es);
+
+ if (es->analyze)
+ {
+ int nworkers;
+
+ nworkers = ((GatherState *) planstate)->nworkers_launched;
+ ExplainPropertyInteger("Workers Launched", NULL,
+ nworkers, es);
+ }
+
+ if (gather->single_copy || es->format != EXPLAIN_FORMAT_TEXT)
+ ExplainPropertyBool("Single Copy", gather->single_copy, es);
+ }
+ break;
+ case T_GatherMerge:
+ {
+ GatherMerge *gm = (GatherMerge *) plan;
+
+ show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ ExplainPropertyInteger("Workers Planned", NULL,
+ gm->num_workers, es);
+
+ /* Show params evaluated at gather-merge node */
+ if (gm->initParam)
+ show_eval_params(gm->initParam, es);
+
+ if (es->analyze)
+ {
+ int nworkers;
+
+ nworkers = ((GatherMergeState *) planstate)->nworkers_launched;
+ ExplainPropertyInteger("Workers Launched", NULL,
+ nworkers, es);
+ }
+ }
+ break;
+ case T_FunctionScan:
+ if (es->verbose)
+ {
+ List *fexprs = NIL;
+ ListCell *lc;
+
+ foreach(lc, ((FunctionScan *) plan)->functions)
+ {
+ RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
+
+ fexprs = lappend(fexprs, rtfunc->funcexpr);
+ }
+ /* We rely on show_expression to insert commas as needed */
+ show_expression((Node *) fexprs,
+ "Function Call", planstate, ancestors,
+ es->verbose, es);
+ }
+ show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ break;
+ case T_TableFuncScan:
+ if (es->verbose)
+ {
+ TableFunc *tablefunc = ((TableFuncScan *) plan)->tablefunc;
+
+ show_expression((Node *) tablefunc,
+ "Table Function Call", planstate, ancestors,
+ es->verbose, es);
+ }
+ show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ break;
+ case T_TidScan:
+ {
+ /*
+ * The tidquals list has OR semantics, so be sure to show it
+ * as an OR condition.
+ */
+ List *tidquals = ((TidScan *) plan)->tidquals;
+
+ if (list_length(tidquals) > 1)
+ tidquals = list_make1(make_orclause(tidquals));
+ show_scan_qual(tidquals, "TID Cond", planstate, ancestors, es);
+ show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ }
+ break;
+ case T_TidRangeScan:
+ {
+ /*
+ * The tidrangequals list has AND semantics, so be sure to
+ * show it as an AND condition.
+ */
+ List *tidquals = ((TidRangeScan *) plan)->tidrangequals;
+
+ if (list_length(tidquals) > 1)
+ tidquals = list_make1(make_andclause(tidquals));
+ show_scan_qual(tidquals, "TID Cond", planstate, ancestors, es);
+ show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ }
+ break;
+ case T_ForeignScan:
+ show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ show_foreignscan_info((ForeignScanState *) planstate, es);
+ break;
+ case T_CustomScan:
+ {
+ CustomScanState *css = (CustomScanState *) planstate;
+
+ show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ if (css->methods->ExplainCustomScan)
+ css->methods->ExplainCustomScan(css, ancestors, es);
+ }
+ break;
+ case T_NestLoop:
+ show_upper_qual(((NestLoop *) plan)->join.joinqual,
+ "Join Filter", planstate, ancestors, es);
+ if (((NestLoop *) plan)->join.joinqual)
+ show_instrumentation_count("Rows Removed by Join Filter", 1,
+ planstate, es);
+ show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 2,
+ planstate, es);
+ break;
+ case T_MergeJoin:
+ show_upper_qual(((MergeJoin *) plan)->mergeclauses,
+ "Merge Cond", planstate, ancestors, es);
+ show_upper_qual(((MergeJoin *) plan)->join.joinqual,
+ "Join Filter", planstate, ancestors, es);
+ if (((MergeJoin *) plan)->join.joinqual)
+ show_instrumentation_count("Rows Removed by Join Filter", 1,
+ planstate, es);
+ show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 2,
+ planstate, es);
+ break;
+ case T_HashJoin:
+ show_upper_qual(((HashJoin *) plan)->hashclauses,
+ "Hash Cond", planstate, ancestors, es);
+ show_upper_qual(((HashJoin *) plan)->join.joinqual,
+ "Join Filter", planstate, ancestors, es);
+ if (((HashJoin *) plan)->join.joinqual)
+ show_instrumentation_count("Rows Removed by Join Filter", 1,
+ planstate, es);
+ show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 2,
+ planstate, es);
+ break;
+ case T_Agg:
+ show_agg_keys(castNode(AggState, planstate), ancestors, es);
+ show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
+ show_hashagg_info((AggState *) planstate, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ break;
+ case T_WindowAgg:
+ show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ show_upper_qual(((WindowAgg *) plan)->runConditionOrig,
+ "Run Condition", planstate, ancestors, es);
+ break;
+ case T_Group:
+ show_group_keys(castNode(GroupState, planstate), ancestors, es);
+ show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ break;
+ case T_Sort:
+ show_sort_keys(castNode(SortState, planstate), ancestors, es);
+ show_sort_info(castNode(SortState, planstate), es);
+ break;
+ case T_IncrementalSort:
+ show_incremental_sort_keys(castNode(IncrementalSortState, planstate),
+ ancestors, es);
+ show_incremental_sort_info(castNode(IncrementalSortState, planstate),
+ es);
+ break;
+ case T_MergeAppend:
+ show_merge_append_keys(castNode(MergeAppendState, planstate),
+ ancestors, es);
+ break;
+ case T_Result:
+ show_upper_qual((List *) ((Result *) plan)->resconstantqual,
+ "One-Time Filter", planstate, ancestors, es);
+ show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ break;
+ case T_ModifyTable:
+ show_modifytable_info(castNode(ModifyTableState, planstate), ancestors,
+ es);
+ break;
+ case T_Hash:
+ show_hash_info(castNode(HashState, planstate), es);
+ break;
+ case T_Memoize:
+ show_memoize_info(castNode(MemoizeState, planstate), ancestors,
+ es);
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * Prepare per-worker JIT instrumentation. As with the overall JIT
+ * summary, this is printed only if printing costs is enabled.
+ */
+ if (es->workers_state && es->costs && es->verbose)
+ {
+ SharedJitInstrumentation *w = planstate->worker_jit_instrument;
+
+ if (w)
+ {
+ for (int n = 0; n < w->num_workers; n++)
+ {
+ ExplainOpenWorker(n, es);
+ ExplainPrintJIT(es, planstate->state->es_jit_flags,
+ &w->jit_instr[n]);
+ ExplainCloseWorker(n, es);
+ }
+ }
+ }
+
+ /* Show buffer/WAL usage */
+ if (es->buffers && planstate->instrument)
+ show_buffer_usage(es, &planstate->instrument->bufusage, false);
+ if (es->wal && planstate->instrument)
+ show_wal_usage(es, &planstate->instrument->walusage);
+
+ /* Prepare per-worker buffer/WAL usage */
+ if (es->workers_state && (es->buffers || es->wal) && es->verbose)
+ {
+ WorkerInstrumentation *w = planstate->worker_instrument;
+
+ for (int n = 0; n < w->num_workers; n++)
+ {
+ Instrumentation *instrument = &w->instrument[n];
+ double nloops = instrument->nloops;
+
+ if (nloops <= 0)
+ continue;
+
+ ExplainOpenWorker(n, es);
+ if (es->buffers)
+ show_buffer_usage(es, &instrument->bufusage, false);
+ if (es->wal)
+ show_wal_usage(es, &instrument->walusage);
+ ExplainCloseWorker(n, es);
+ }
+ }
+
+ /* Show per-worker details for this plan node, then pop that stack */
+ if (es->workers_state)
+ ExplainFlushWorkersState(es);
+ es->workers_state = save_workers_state;
+
+ /*
+ * If partition pruning was done during executor initialization, the
+ * number of child plans we'll display below will be less than the number
+ * of subplans that was specified in the plan. To make this a bit less
+ * mysterious, emit an indication that this happened. Note that this
+ * field is emitted now because we want it to be a property of the parent
+ * node; it *cannot* be emitted within the Plans sub-node we'll open next.
+ */
+ switch (nodeTag(plan))
+ {
+ case T_Append:
+ ExplainMissingMembers(((AppendState *) planstate)->as_nplans,
+ list_length(((Append *) plan)->appendplans),
+ es);
+ break;
+ case T_MergeAppend:
+ ExplainMissingMembers(((MergeAppendState *) planstate)->ms_nplans,
+ list_length(((MergeAppend *) plan)->mergeplans),
+ es);
+ break;
+ default:
+ break;
+ }
+
+ /* Get ready to display the child plans */
+ haschildren = planstate->initPlan ||
+ outerPlanState(planstate) ||
+ innerPlanState(planstate) ||
+ IsA(plan, Append) ||
+ IsA(plan, MergeAppend) ||
+ IsA(plan, BitmapAnd) ||
+ IsA(plan, BitmapOr) ||
+ IsA(plan, SubqueryScan) ||
+ (IsA(planstate, CustomScanState) &&
+ ((CustomScanState *) planstate)->custom_ps != NIL) ||
+ planstate->subPlan;
+ if (haschildren)
+ {
+ ExplainOpenGroup("Plans", "Plans", false, es);
+ /* Pass current Plan as head of ancestors list for children */
+ ancestors = lcons(plan, ancestors);
+ }
+
+ /* initPlan-s */
+ if (planstate->initPlan)
+ ExplainSubPlans(planstate->initPlan, ancestors, "InitPlan", es);
+
+ /* lefttree */
+ if (outerPlanState(planstate))
+ ExplainNode(outerPlanState(planstate), ancestors,
+ "Outer", NULL, es);
+
+ /* righttree */
+ if (innerPlanState(planstate))
+ ExplainNode(innerPlanState(planstate), ancestors,
+ "Inner", NULL, es);
+
+ /* special child plans */
+ switch (nodeTag(plan))
+ {
+ case T_Append:
+ ExplainMemberNodes(((AppendState *) planstate)->appendplans,
+ ((AppendState *) planstate)->as_nplans,
+ ancestors, es);
+ break;
+ case T_MergeAppend:
+ ExplainMemberNodes(((MergeAppendState *) planstate)->mergeplans,
+ ((MergeAppendState *) planstate)->ms_nplans,
+ ancestors, es);
+ break;
+ case T_BitmapAnd:
+ ExplainMemberNodes(((BitmapAndState *) planstate)->bitmapplans,
+ ((BitmapAndState *) planstate)->nplans,
+ ancestors, es);
+ break;
+ case T_BitmapOr:
+ ExplainMemberNodes(((BitmapOrState *) planstate)->bitmapplans,
+ ((BitmapOrState *) planstate)->nplans,
+ ancestors, es);
+ break;
+ case T_SubqueryScan:
+ ExplainNode(((SubqueryScanState *) planstate)->subplan, ancestors,
+ "Subquery", NULL, es);
+ break;
+ case T_CustomScan:
+ ExplainCustomChildren((CustomScanState *) planstate,
+ ancestors, es);
+ break;
+ default:
+ break;
+ }
+
+ /* subPlan-s */
+ if (planstate->subPlan)
+ ExplainSubPlans(planstate->subPlan, ancestors, "SubPlan", es);
+
+ /* end of child plans */
+ if (haschildren)
+ {
+ ancestors = list_delete_first(ancestors);
+ ExplainCloseGroup("Plans", "Plans", false, es);
+ }
+
+ /* in text format, undo whatever indentation we added */
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ es->indent = save_indent;
+
+ ExplainCloseGroup("Plan",
+ relationship ? NULL : "Plan",
+ true, es);
+}
+
+/*
+ * Show the targetlist of a plan node
+ */
+static void
+show_plan_tlist(PlanState *planstate, List *ancestors, ExplainState *es)
+{
+ Plan *plan = planstate->plan;
+ List *context;
+ List *result = NIL;
+ bool useprefix;
+ ListCell *lc;
+
+ /* No work if empty tlist (this occurs eg in bitmap indexscans) */
+ if (plan->targetlist == NIL)
+ return;
+ /* The tlist of an Append isn't real helpful, so suppress it */
+ if (IsA(plan, Append))
+ return;
+ /* Likewise for MergeAppend and RecursiveUnion */
+ if (IsA(plan, MergeAppend))
+ return;
+ if (IsA(plan, RecursiveUnion))
+ return;
+
+ /*
+ * Likewise for ForeignScan that executes a direct INSERT/UPDATE/DELETE
+ *
+ * Note: the tlist for a ForeignScan that executes a direct INSERT/UPDATE
+ * might contain subplan output expressions that are confusing in this
+ * context. The tlist for a ForeignScan that executes a direct UPDATE/
+ * DELETE always contains "junk" target columns to identify the exact row
+ * to update or delete, which would be confusing in this context. So, we
+ * suppress it in all the cases.
+ */
+ if (IsA(plan, ForeignScan) &&
+ ((ForeignScan *) plan)->operation != CMD_SELECT)
+ return;
+
+ /* Set up deparsing context */
+ context = set_deparse_context_plan(es->deparse_cxt,
+ plan,
+ ancestors);
+ useprefix = list_length(es->rtable) > 1;
+
+ /* Deparse each result column (we now include resjunk ones) */
+ foreach(lc, plan->targetlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(lc);
+
+ result = lappend(result,
+ deparse_expression((Node *) tle->expr, context,
+ useprefix, false));
+ }
+
+ /* Print results */
+ ExplainPropertyList("Output", result, es);
+}
+
+/*
+ * Show a generic expression
+ */
+static void
+show_expression(Node *node, const char *qlabel,
+ PlanState *planstate, List *ancestors,
+ bool useprefix, ExplainState *es)
+{
+ List *context;
+ char *exprstr;
+
+ /* Set up deparsing context */
+ context = set_deparse_context_plan(es->deparse_cxt,
+ planstate->plan,
+ ancestors);
+
+ /* Deparse the expression */
+ exprstr = deparse_expression(node, context, useprefix, false);
+
+ /* And add to es->str */
+ ExplainPropertyText(qlabel, exprstr, es);
+}
+
+/*
+ * Show a qualifier expression (which is a List with implicit AND semantics)
+ */
+static void
+show_qual(List *qual, const char *qlabel,
+ PlanState *planstate, List *ancestors,
+ bool useprefix, ExplainState *es)
+{
+ Node *node;
+
+ /* No work if empty qual */
+ if (qual == NIL)
+ return;
+
+ /* Convert AND list to explicit AND */
+ node = (Node *) make_ands_explicit(qual);
+
+ /* And show it */
+ show_expression(node, qlabel, planstate, ancestors, useprefix, es);
+}
+
+/*
+ * Show a qualifier expression for a scan plan node
+ */
+static void
+show_scan_qual(List *qual, const char *qlabel,
+ PlanState *planstate, List *ancestors,
+ ExplainState *es)
+{
+ bool useprefix;
+
+ useprefix = (IsA(planstate->plan, SubqueryScan) || es->verbose);
+ show_qual(qual, qlabel, planstate, ancestors, useprefix, es);
+}
+
+/*
+ * Show a qualifier expression for an upper-level plan node
+ */
+static void
+show_upper_qual(List *qual, const char *qlabel,
+ PlanState *planstate, List *ancestors,
+ ExplainState *es)
+{
+ bool useprefix;
+
+ useprefix = (list_length(es->rtable) > 1 || es->verbose);
+ show_qual(qual, qlabel, planstate, ancestors, useprefix, es);
+}
+
+/*
+ * Show the sort keys for a Sort node.
+ */
+static void
+show_sort_keys(SortState *sortstate, List *ancestors, ExplainState *es)
+{
+ Sort *plan = (Sort *) sortstate->ss.ps.plan;
+
+ show_sort_group_keys((PlanState *) sortstate, "Sort Key",
+ plan->numCols, 0, plan->sortColIdx,
+ plan->sortOperators, plan->collations,
+ plan->nullsFirst,
+ ancestors, es);
+}
+
+/*
+ * Show the sort keys for a IncrementalSort node.
+ */
+static void
+show_incremental_sort_keys(IncrementalSortState *incrsortstate,
+ List *ancestors, ExplainState *es)
+{
+ IncrementalSort *plan = (IncrementalSort *) incrsortstate->ss.ps.plan;
+
+ show_sort_group_keys((PlanState *) incrsortstate, "Sort Key",
+ plan->sort.numCols, plan->nPresortedCols,
+ plan->sort.sortColIdx,
+ plan->sort.sortOperators, plan->sort.collations,
+ plan->sort.nullsFirst,
+ ancestors, es);
+}
+
+/*
+ * Likewise, for a MergeAppend node.
+ */
+static void
+show_merge_append_keys(MergeAppendState *mstate, List *ancestors,
+ ExplainState *es)
+{
+ MergeAppend *plan = (MergeAppend *) mstate->ps.plan;
+
+ show_sort_group_keys((PlanState *) mstate, "Sort Key",
+ plan->numCols, 0, plan->sortColIdx,
+ plan->sortOperators, plan->collations,
+ plan->nullsFirst,
+ ancestors, es);
+}
+
+/*
+ * Show the grouping keys for an Agg node.
+ */
+static void
+show_agg_keys(AggState *astate, List *ancestors,
+ ExplainState *es)
+{
+ Agg *plan = (Agg *) astate->ss.ps.plan;
+
+ if (plan->numCols > 0 || plan->groupingSets)
+ {
+ /* The key columns refer to the tlist of the child plan */
+ ancestors = lcons(plan, ancestors);
+
+ if (plan->groupingSets)
+ show_grouping_sets(outerPlanState(astate), plan, ancestors, es);
+ else
+ show_sort_group_keys(outerPlanState(astate), "Group Key",
+ plan->numCols, 0, plan->grpColIdx,
+ NULL, NULL, NULL,
+ ancestors, es);
+
+ ancestors = list_delete_first(ancestors);
+ }
+}
+
+static void
+show_grouping_sets(PlanState *planstate, Agg *agg,
+ List *ancestors, ExplainState *es)
+{
+ List *context;
+ bool useprefix;
+ ListCell *lc;
+
+ /* Set up deparsing context */
+ context = set_deparse_context_plan(es->deparse_cxt,
+ planstate->plan,
+ ancestors);
+ useprefix = (list_length(es->rtable) > 1 || es->verbose);
+
+ ExplainOpenGroup("Grouping Sets", "Grouping Sets", false, es);
+
+ show_grouping_set_keys(planstate, agg, NULL,
+ context, useprefix, ancestors, es);
+
+ foreach(lc, agg->chain)
+ {
+ Agg *aggnode = lfirst(lc);
+ Sort *sortnode = (Sort *) aggnode->plan.lefttree;
+
+ show_grouping_set_keys(planstate, aggnode, sortnode,
+ context, useprefix, ancestors, es);
+ }
+
+ ExplainCloseGroup("Grouping Sets", "Grouping Sets", false, es);
+}
+
+static void
+show_grouping_set_keys(PlanState *planstate,
+ Agg *aggnode, Sort *sortnode,
+ List *context, bool useprefix,
+ List *ancestors, ExplainState *es)
+{
+ Plan *plan = planstate->plan;
+ char *exprstr;
+ ListCell *lc;
+ List *gsets = aggnode->groupingSets;
+ AttrNumber *keycols = aggnode->grpColIdx;
+ const char *keyname;
+ const char *keysetname;
+
+ if (aggnode->aggstrategy == AGG_HASHED || aggnode->aggstrategy == AGG_MIXED)
+ {
+ keyname = "Hash Key";
+ keysetname = "Hash Keys";
+ }
+ else
+ {
+ keyname = "Group Key";
+ keysetname = "Group Keys";
+ }
+
+ ExplainOpenGroup("Grouping Set", NULL, true, es);
+
+ if (sortnode)
+ {
+ show_sort_group_keys(planstate, "Sort Key",
+ sortnode->numCols, 0, sortnode->sortColIdx,
+ sortnode->sortOperators, sortnode->collations,
+ sortnode->nullsFirst,
+ ancestors, es);
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ es->indent++;
+ }
+
+ ExplainOpenGroup(keysetname, keysetname, false, es);
+
+ foreach(lc, gsets)
+ {
+ List *result = NIL;
+ ListCell *lc2;
+
+ foreach(lc2, (List *) lfirst(lc))
+ {
+ Index i = lfirst_int(lc2);
+ AttrNumber keyresno = keycols[i];
+ TargetEntry *target = get_tle_by_resno(plan->targetlist,
+ keyresno);
+
+ if (!target)
+ elog(ERROR, "no tlist entry for key %d", keyresno);
+ /* Deparse the expression, showing any top-level cast */
+ exprstr = deparse_expression((Node *) target->expr, context,
+ useprefix, true);
+
+ result = lappend(result, exprstr);
+ }
+
+ if (!result && es->format == EXPLAIN_FORMAT_TEXT)
+ ExplainPropertyText(keyname, "()", es);
+ else
+ ExplainPropertyListNested(keyname, result, es);
+ }
+
+ ExplainCloseGroup(keysetname, keysetname, false, es);
+
+ if (sortnode && es->format == EXPLAIN_FORMAT_TEXT)
+ es->indent--;
+
+ ExplainCloseGroup("Grouping Set", NULL, true, es);
+}
+
+/*
+ * Show the grouping keys for a Group node.
+ */
+static void
+show_group_keys(GroupState *gstate, List *ancestors,
+ ExplainState *es)
+{
+ Group *plan = (Group *) gstate->ss.ps.plan;
+
+ /* The key columns refer to the tlist of the child plan */
+ ancestors = lcons(plan, ancestors);
+ show_sort_group_keys(outerPlanState(gstate), "Group Key",
+ plan->numCols, 0, plan->grpColIdx,
+ NULL, NULL, NULL,
+ ancestors, es);
+ ancestors = list_delete_first(ancestors);
+}
+
+/*
+ * Common code to show sort/group keys, which are represented in plan nodes
+ * as arrays of targetlist indexes. If it's a sort key rather than a group
+ * key, also pass sort operators/collations/nullsFirst arrays.
+ */
+static void
+show_sort_group_keys(PlanState *planstate, const char *qlabel,
+ int nkeys, int nPresortedKeys, AttrNumber *keycols,
+ Oid *sortOperators, Oid *collations, bool *nullsFirst,
+ List *ancestors, ExplainState *es)
+{
+ Plan *plan = planstate->plan;
+ List *context;
+ List *result = NIL;
+ List *resultPresorted = NIL;
+ StringInfoData sortkeybuf;
+ bool useprefix;
+ int keyno;
+
+ if (nkeys <= 0)
+ return;
+
+ initStringInfo(&sortkeybuf);
+
+ /* Set up deparsing context */
+ context = set_deparse_context_plan(es->deparse_cxt,
+ plan,
+ ancestors);
+ useprefix = (list_length(es->rtable) > 1 || es->verbose);
+
+ for (keyno = 0; keyno < nkeys; keyno++)
+ {
+ /* find key expression in tlist */
+ AttrNumber keyresno = keycols[keyno];
+ TargetEntry *target = get_tle_by_resno(plan->targetlist,
+ keyresno);
+ char *exprstr;
+
+ if (!target)
+ elog(ERROR, "no tlist entry for key %d", keyresno);
+ /* Deparse the expression, showing any top-level cast */
+ exprstr = deparse_expression((Node *) target->expr, context,
+ useprefix, true);
+ resetStringInfo(&sortkeybuf);
+ appendStringInfoString(&sortkeybuf, exprstr);
+ /* Append sort order information, if relevant */
+ if (sortOperators != NULL)
+ show_sortorder_options(&sortkeybuf,
+ (Node *) target->expr,
+ sortOperators[keyno],
+ collations[keyno],
+ nullsFirst[keyno]);
+ /* Emit one property-list item per sort key */
+ result = lappend(result, pstrdup(sortkeybuf.data));
+ if (keyno < nPresortedKeys)
+ resultPresorted = lappend(resultPresorted, exprstr);
+ }
+
+ ExplainPropertyList(qlabel, result, es);
+ if (nPresortedKeys > 0)
+ ExplainPropertyList("Presorted Key", resultPresorted, es);
+}
+
+/*
+ * Append nondefault characteristics of the sort ordering of a column to buf
+ * (collation, direction, NULLS FIRST/LAST)
+ */
+static void
+show_sortorder_options(StringInfo buf, Node *sortexpr,
+ Oid sortOperator, Oid collation, bool nullsFirst)
+{
+ Oid sortcoltype = exprType(sortexpr);
+ bool reverse = false;
+ TypeCacheEntry *typentry;
+
+ typentry = lookup_type_cache(sortcoltype,
+ TYPECACHE_LT_OPR | TYPECACHE_GT_OPR);
+
+ /*
+ * Print COLLATE if it's not default for the column's type. There are
+ * some cases where this is redundant, eg if expression is a column whose
+ * declared collation is that collation, but it's hard to distinguish that
+ * here (and arguably, printing COLLATE explicitly is a good idea anyway
+ * in such cases).
+ */
+ if (OidIsValid(collation) && collation != get_typcollation(sortcoltype))
+ {
+ char *collname = get_collation_name(collation);
+
+ if (collname == NULL)
+ elog(ERROR, "cache lookup failed for collation %u", collation);
+ appendStringInfo(buf, " COLLATE %s", quote_identifier(collname));
+ }
+
+ /* Print direction if not ASC, or USING if non-default sort operator */
+ if (sortOperator == typentry->gt_opr)
+ {
+ appendStringInfoString(buf, " DESC");
+ reverse = true;
+ }
+ else if (sortOperator != typentry->lt_opr)
+ {
+ char *opname = get_opname(sortOperator);
+
+ if (opname == NULL)
+ elog(ERROR, "cache lookup failed for operator %u", sortOperator);
+ appendStringInfo(buf, " USING %s", opname);
+ /* Determine whether operator would be considered ASC or DESC */
+ (void) get_equality_op_for_ordering_op(sortOperator, &reverse);
+ }
+
+ /* Add NULLS FIRST/LAST only if it wouldn't be default */
+ if (nullsFirst && !reverse)
+ {
+ appendStringInfoString(buf, " NULLS FIRST");
+ }
+ else if (!nullsFirst && reverse)
+ {
+ appendStringInfoString(buf, " NULLS LAST");
+ }
+}
+
+/*
+ * Show TABLESAMPLE properties
+ */
+static void
+show_tablesample(TableSampleClause *tsc, PlanState *planstate,
+ List *ancestors, ExplainState *es)
+{
+ List *context;
+ bool useprefix;
+ char *method_name;
+ List *params = NIL;
+ char *repeatable;
+ ListCell *lc;
+
+ /* Set up deparsing context */
+ context = set_deparse_context_plan(es->deparse_cxt,
+ planstate->plan,
+ ancestors);
+ useprefix = list_length(es->rtable) > 1;
+
+ /* Get the tablesample method name */
+ method_name = get_func_name(tsc->tsmhandler);
+
+ /* Deparse parameter expressions */
+ foreach(lc, tsc->args)
+ {
+ Node *arg = (Node *) lfirst(lc);
+
+ params = lappend(params,
+ deparse_expression(arg, context,
+ useprefix, false));
+ }
+ if (tsc->repeatable)
+ repeatable = deparse_expression((Node *) tsc->repeatable, context,
+ useprefix, false);
+ else
+ repeatable = NULL;
+
+ /* Print results */
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ bool first = true;
+
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "Sampling: %s (", method_name);
+ foreach(lc, params)
+ {
+ if (!first)
+ appendStringInfoString(es->str, ", ");
+ appendStringInfoString(es->str, (const char *) lfirst(lc));
+ first = false;
+ }
+ appendStringInfoChar(es->str, ')');
+ if (repeatable)
+ appendStringInfo(es->str, " REPEATABLE (%s)", repeatable);
+ appendStringInfoChar(es->str, '\n');
+ }
+ else
+ {
+ ExplainPropertyText("Sampling Method", method_name, es);
+ ExplainPropertyList("Sampling Parameters", params, es);
+ if (repeatable)
+ ExplainPropertyText("Repeatable Seed", repeatable, es);
+ }
+}
+
+/*
+ * If it's EXPLAIN ANALYZE, show tuplesort stats for a sort node
+ */
+static void
+show_sort_info(SortState *sortstate, ExplainState *es)
+{
+ if (!es->analyze)
+ return;
+
+ if (sortstate->sort_Done && sortstate->tuplesortstate != NULL)
+ {
+ Tuplesortstate *state = (Tuplesortstate *) sortstate->tuplesortstate;
+ TuplesortInstrumentation stats;
+ const char *sortMethod;
+ const char *spaceType;
+ int64 spaceUsed;
+
+ tuplesort_get_stats(state, &stats);
+ sortMethod = tuplesort_method_name(stats.sortMethod);
+ spaceType = tuplesort_space_type_name(stats.spaceType);
+ spaceUsed = stats.spaceUsed;
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "Sort Method: %s %s: " INT64_FORMAT "kB\n",
+ sortMethod, spaceType, spaceUsed);
+ }
+ else
+ {
+ ExplainPropertyText("Sort Method", sortMethod, es);
+ ExplainPropertyInteger("Sort Space Used", "kB", spaceUsed, es);
+ ExplainPropertyText("Sort Space Type", spaceType, es);
+ }
+ }
+
+ /*
+ * You might think we should just skip this stanza entirely when
+ * es->hide_workers is true, but then we'd get no sort-method output at
+ * all. We have to make it look like worker 0's data is top-level data.
+ * This is easily done by just skipping the OpenWorker/CloseWorker calls.
+ * Currently, we don't worry about the possibility that there are multiple
+ * workers in such a case; if there are, duplicate output fields will be
+ * emitted.
+ */
+ if (sortstate->shared_info != NULL)
+ {
+ int n;
+
+ for (n = 0; n < sortstate->shared_info->num_workers; n++)
+ {
+ TuplesortInstrumentation *sinstrument;
+ const char *sortMethod;
+ const char *spaceType;
+ int64 spaceUsed;
+
+ sinstrument = &sortstate->shared_info->sinstrument[n];
+ if (sinstrument->sortMethod == SORT_TYPE_STILL_IN_PROGRESS)
+ continue; /* ignore any unfilled slots */
+ sortMethod = tuplesort_method_name(sinstrument->sortMethod);
+ spaceType = tuplesort_space_type_name(sinstrument->spaceType);
+ spaceUsed = sinstrument->spaceUsed;
+
+ if (es->workers_state)
+ ExplainOpenWorker(n, es);
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str,
+ "Sort Method: %s %s: " INT64_FORMAT "kB\n",
+ sortMethod, spaceType, spaceUsed);
+ }
+ else
+ {
+ ExplainPropertyText("Sort Method", sortMethod, es);
+ ExplainPropertyInteger("Sort Space Used", "kB", spaceUsed, es);
+ ExplainPropertyText("Sort Space Type", spaceType, es);
+ }
+
+ if (es->workers_state)
+ ExplainCloseWorker(n, es);
+ }
+ }
+}
+
+/*
+ * Incremental sort nodes sort in (a potentially very large number of) batches,
+ * so EXPLAIN ANALYZE needs to roll up the tuplesort stats from each batch into
+ * an intelligible summary.
+ *
+ * This function is used for both a non-parallel node and each worker in a
+ * parallel incremental sort node.
+ */
+static void
+show_incremental_sort_group_info(IncrementalSortGroupInfo *groupInfo,
+ const char *groupLabel, bool indent, ExplainState *es)
+{
+ ListCell *methodCell;
+ List *methodNames = NIL;
+
+ /* Generate a list of sort methods used across all groups. */
+ for (int bit = 0; bit < NUM_TUPLESORTMETHODS; bit++)
+ {
+ TuplesortMethod sortMethod = (1 << bit);
+
+ if (groupInfo->sortMethods & sortMethod)
+ {
+ const char *methodName = tuplesort_method_name(sortMethod);
+
+ methodNames = lappend(methodNames, unconstify(char *, methodName));
+ }
+ }
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ if (indent)
+ appendStringInfoSpaces(es->str, es->indent * 2);
+ appendStringInfo(es->str, "%s Groups: " INT64_FORMAT " Sort Method", groupLabel,
+ groupInfo->groupCount);
+ /* plural/singular based on methodNames size */
+ if (list_length(methodNames) > 1)
+ appendStringInfoString(es->str, "s: ");
+ else
+ appendStringInfoString(es->str, ": ");
+ foreach(methodCell, methodNames)
+ {
+ appendStringInfoString(es->str, (char *) methodCell->ptr_value);
+ if (foreach_current_index(methodCell) < list_length(methodNames) - 1)
+ appendStringInfoString(es->str, ", ");
+ }
+
+ if (groupInfo->maxMemorySpaceUsed > 0)
+ {
+ int64 avgSpace = groupInfo->totalMemorySpaceUsed / groupInfo->groupCount;
+ const char *spaceTypeName;
+
+ spaceTypeName = tuplesort_space_type_name(SORT_SPACE_TYPE_MEMORY);
+ appendStringInfo(es->str, " Average %s: " INT64_FORMAT "kB Peak %s: " INT64_FORMAT "kB",
+ spaceTypeName, avgSpace,
+ spaceTypeName, groupInfo->maxMemorySpaceUsed);
+ }
+
+ if (groupInfo->maxDiskSpaceUsed > 0)
+ {
+ int64 avgSpace = groupInfo->totalDiskSpaceUsed / groupInfo->groupCount;
+
+ const char *spaceTypeName;
+
+ spaceTypeName = tuplesort_space_type_name(SORT_SPACE_TYPE_DISK);
+ appendStringInfo(es->str, " Average %s: " INT64_FORMAT "kB Peak %s: " INT64_FORMAT "kB",
+ spaceTypeName, avgSpace,
+ spaceTypeName, groupInfo->maxDiskSpaceUsed);
+ }
+ }
+ else
+ {
+ StringInfoData groupName;
+
+ initStringInfo(&groupName);
+ appendStringInfo(&groupName, "%s Groups", groupLabel);
+ ExplainOpenGroup("Incremental Sort Groups", groupName.data, true, es);
+ ExplainPropertyInteger("Group Count", NULL, groupInfo->groupCount, es);
+
+ ExplainPropertyList("Sort Methods Used", methodNames, es);
+
+ if (groupInfo->maxMemorySpaceUsed > 0)
+ {
+ int64 avgSpace = groupInfo->totalMemorySpaceUsed / groupInfo->groupCount;
+ const char *spaceTypeName;
+ StringInfoData memoryName;
+
+ spaceTypeName = tuplesort_space_type_name(SORT_SPACE_TYPE_MEMORY);
+ initStringInfo(&memoryName);
+ appendStringInfo(&memoryName, "Sort Space %s", spaceTypeName);
+ ExplainOpenGroup("Sort Space", memoryName.data, true, es);
+
+ ExplainPropertyInteger("Average Sort Space Used", "kB", avgSpace, es);
+ ExplainPropertyInteger("Peak Sort Space Used", "kB",
+ groupInfo->maxMemorySpaceUsed, es);
+
+ ExplainCloseGroup("Sort Space", memoryName.data, true, es);
+ }
+ if (groupInfo->maxDiskSpaceUsed > 0)
+ {
+ int64 avgSpace = groupInfo->totalDiskSpaceUsed / groupInfo->groupCount;
+ const char *spaceTypeName;
+ StringInfoData diskName;
+
+ spaceTypeName = tuplesort_space_type_name(SORT_SPACE_TYPE_DISK);
+ initStringInfo(&diskName);
+ appendStringInfo(&diskName, "Sort Space %s", spaceTypeName);
+ ExplainOpenGroup("Sort Space", diskName.data, true, es);
+
+ ExplainPropertyInteger("Average Sort Space Used", "kB", avgSpace, es);
+ ExplainPropertyInteger("Peak Sort Space Used", "kB",
+ groupInfo->maxDiskSpaceUsed, es);
+
+ ExplainCloseGroup("Sort Space", diskName.data, true, es);
+ }
+
+ ExplainCloseGroup("Incremental Sort Groups", groupName.data, true, es);
+ }
+}
+
+/*
+ * If it's EXPLAIN ANALYZE, show tuplesort stats for an incremental sort node
+ */
+static void
+show_incremental_sort_info(IncrementalSortState *incrsortstate,
+ ExplainState *es)
+{
+ IncrementalSortGroupInfo *fullsortGroupInfo;
+ IncrementalSortGroupInfo *prefixsortGroupInfo;
+
+ fullsortGroupInfo = &incrsortstate->incsort_info.fullsortGroupInfo;
+
+ if (!es->analyze)
+ return;
+
+ /*
+ * Since we never have any prefix groups unless we've first sorted a full
+ * groups and transitioned modes (copying the tuples into a prefix group),
+ * we don't need to do anything if there were 0 full groups.
+ *
+ * We still have to continue after this block if there are no full groups,
+ * though, since it's possible that we have workers that did real work
+ * even if the leader didn't participate.
+ */
+ if (fullsortGroupInfo->groupCount > 0)
+ {
+ show_incremental_sort_group_info(fullsortGroupInfo, "Full-sort", true, es);
+ prefixsortGroupInfo = &incrsortstate->incsort_info.prefixsortGroupInfo;
+ if (prefixsortGroupInfo->groupCount > 0)
+ {
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ appendStringInfoChar(es->str, '\n');
+ show_incremental_sort_group_info(prefixsortGroupInfo, "Pre-sorted", true, es);
+ }
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ appendStringInfoChar(es->str, '\n');
+ }
+
+ if (incrsortstate->shared_info != NULL)
+ {
+ int n;
+ bool indent_first_line;
+
+ for (n = 0; n < incrsortstate->shared_info->num_workers; n++)
+ {
+ IncrementalSortInfo *incsort_info =
+ &incrsortstate->shared_info->sinfo[n];
+
+ /*
+ * If a worker hasn't processed any sort groups at all, then
+ * exclude it from output since it either didn't launch or didn't
+ * contribute anything meaningful.
+ */
+ fullsortGroupInfo = &incsort_info->fullsortGroupInfo;
+
+ /*
+ * Since we never have any prefix groups unless we've first sorted
+ * a full groups and transitioned modes (copying the tuples into a
+ * prefix group), we don't need to do anything if there were 0
+ * full groups.
+ */
+ if (fullsortGroupInfo->groupCount == 0)
+ continue;
+
+ if (es->workers_state)
+ ExplainOpenWorker(n, es);
+
+ indent_first_line = es->workers_state == NULL || es->verbose;
+ show_incremental_sort_group_info(fullsortGroupInfo, "Full-sort",
+ indent_first_line, es);
+ prefixsortGroupInfo = &incsort_info->prefixsortGroupInfo;
+ if (prefixsortGroupInfo->groupCount > 0)
+ {
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ appendStringInfoChar(es->str, '\n');
+ show_incremental_sort_group_info(prefixsortGroupInfo, "Pre-sorted", true, es);
+ }
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ appendStringInfoChar(es->str, '\n');
+
+ if (es->workers_state)
+ ExplainCloseWorker(n, es);
+ }
+ }
+}
+
+/*
+ * Show information on hash buckets/batches.
+ */
+static void
+show_hash_info(HashState *hashstate, ExplainState *es)
+{
+ HashInstrumentation hinstrument = {0};
+
+ /*
+ * Collect stats from the local process, even when it's a parallel query.
+ * In a parallel query, the leader process may or may not have run the
+ * hash join, and even if it did it may not have built a hash table due to
+ * timing (if it started late it might have seen no tuples in the outer
+ * relation and skipped building the hash table). Therefore we have to be
+ * prepared to get instrumentation data from all participants.
+ */
+ if (hashstate->hinstrument)
+ memcpy(&hinstrument, hashstate->hinstrument,
+ sizeof(HashInstrumentation));
+
+ /*
+ * Merge results from workers. In the parallel-oblivious case, the
+ * results from all participants should be identical, except where
+ * participants didn't run the join at all so have no data. In the
+ * parallel-aware case, we need to consider all the results. Each worker
+ * may have seen a different subset of batches and we want to report the
+ * highest memory usage across all batches. We take the maxima of other
+ * values too, for the same reasons as in ExecHashAccumInstrumentation.
+ */
+ if (hashstate->shared_info)
+ {
+ SharedHashInfo *shared_info = hashstate->shared_info;
+ int i;
+
+ for (i = 0; i < shared_info->num_workers; ++i)
+ {
+ HashInstrumentation *worker_hi = &shared_info->hinstrument[i];
+
+ hinstrument.nbuckets = Max(hinstrument.nbuckets,
+ worker_hi->nbuckets);
+ hinstrument.nbuckets_original = Max(hinstrument.nbuckets_original,
+ worker_hi->nbuckets_original);
+ hinstrument.nbatch = Max(hinstrument.nbatch,
+ worker_hi->nbatch);
+ hinstrument.nbatch_original = Max(hinstrument.nbatch_original,
+ worker_hi->nbatch_original);
+ hinstrument.space_peak = Max(hinstrument.space_peak,
+ worker_hi->space_peak);
+ }
+ }
+
+ if (hinstrument.nbatch > 0)
+ {
+ long spacePeakKb = (hinstrument.space_peak + 1023) / 1024;
+
+ if (es->format != EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainPropertyInteger("Hash Buckets", NULL,
+ hinstrument.nbuckets, es);
+ ExplainPropertyInteger("Original Hash Buckets", NULL,
+ hinstrument.nbuckets_original, es);
+ ExplainPropertyInteger("Hash Batches", NULL,
+ hinstrument.nbatch, es);
+ ExplainPropertyInteger("Original Hash Batches", NULL,
+ hinstrument.nbatch_original, es);
+ ExplainPropertyInteger("Peak Memory Usage", "kB",
+ spacePeakKb, es);
+ }
+ else if (hinstrument.nbatch_original != hinstrument.nbatch ||
+ hinstrument.nbuckets_original != hinstrument.nbuckets)
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str,
+ "Buckets: %d (originally %d) Batches: %d (originally %d) Memory Usage: %ldkB\n",
+ hinstrument.nbuckets,
+ hinstrument.nbuckets_original,
+ hinstrument.nbatch,
+ hinstrument.nbatch_original,
+ spacePeakKb);
+ }
+ else
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str,
+ "Buckets: %d Batches: %d Memory Usage: %ldkB\n",
+ hinstrument.nbuckets, hinstrument.nbatch,
+ spacePeakKb);
+ }
+ }
+}
+
+/*
+ * Show information on memoize hits/misses/evictions and memory usage.
+ */
+static void
+show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es)
+{
+ Plan *plan = ((PlanState *) mstate)->plan;
+ ListCell *lc;
+ List *context;
+ StringInfoData keystr;
+ char *separator = "";
+ bool useprefix;
+ int64 memPeakKb;
+
+ initStringInfo(&keystr);
+
+ /*
+ * It's hard to imagine having a memoize node with fewer than 2 RTEs, but
+ * let's just keep the same useprefix logic as elsewhere in this file.
+ */
+ useprefix = list_length(es->rtable) > 1 || es->verbose;
+
+ /* Set up deparsing context */
+ context = set_deparse_context_plan(es->deparse_cxt,
+ plan,
+ ancestors);
+
+ foreach(lc, ((Memoize *) plan)->param_exprs)
+ {
+ Node *expr = (Node *) lfirst(lc);
+
+ appendStringInfoString(&keystr, separator);
+
+ appendStringInfoString(&keystr, deparse_expression(expr, context,
+ useprefix, false));
+ separator = ", ";
+ }
+
+ if (es->format != EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainPropertyText("Cache Key", keystr.data, es);
+ ExplainPropertyText("Cache Mode", mstate->binary_mode ? "binary" : "logical", es);
+ }
+ else
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "Cache Key: %s\n", keystr.data);
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "Cache Mode: %s\n", mstate->binary_mode ? "binary" : "logical");
+ }
+
+ pfree(keystr.data);
+
+ if (!es->analyze)
+ return;
+
+ if (mstate->stats.cache_misses > 0)
+ {
+ /*
+ * mem_peak is only set when we freed memory, so we must use mem_used
+ * when mem_peak is 0.
+ */
+ if (mstate->stats.mem_peak > 0)
+ memPeakKb = (mstate->stats.mem_peak + 1023) / 1024;
+ else
+ memPeakKb = (mstate->mem_used + 1023) / 1024;
+
+ if (es->format != EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainPropertyInteger("Cache Hits", NULL, mstate->stats.cache_hits, es);
+ ExplainPropertyInteger("Cache Misses", NULL, mstate->stats.cache_misses, es);
+ ExplainPropertyInteger("Cache Evictions", NULL, mstate->stats.cache_evictions, es);
+ ExplainPropertyInteger("Cache Overflows", NULL, mstate->stats.cache_overflows, es);
+ ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb, es);
+ }
+ else
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str,
+ "Hits: " UINT64_FORMAT " Misses: " UINT64_FORMAT " Evictions: " UINT64_FORMAT " Overflows: " UINT64_FORMAT " Memory Usage: " INT64_FORMAT "kB\n",
+ mstate->stats.cache_hits,
+ mstate->stats.cache_misses,
+ mstate->stats.cache_evictions,
+ mstate->stats.cache_overflows,
+ memPeakKb);
+ }
+ }
+
+ if (mstate->shared_info == NULL)
+ return;
+
+ /* Show details from parallel workers */
+ for (int n = 0; n < mstate->shared_info->num_workers; n++)
+ {
+ MemoizeInstrumentation *si;
+
+ si = &mstate->shared_info->sinstrument[n];
+
+ /*
+ * Skip workers that didn't do any work. We needn't bother checking
+ * for cache hits as a miss will always occur before a cache hit.
+ */
+ if (si->cache_misses == 0)
+ continue;
+
+ if (es->workers_state)
+ ExplainOpenWorker(n, es);
+
+ /*
+ * Since the worker's MemoizeState.mem_used field is unavailable to
+ * us, ExecEndMemoize will have set the
+ * MemoizeInstrumentation.mem_peak field for us. No need to do the
+ * zero checks like we did for the serial case above.
+ */
+ memPeakKb = (si->mem_peak + 1023) / 1024;
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str,
+ "Hits: " UINT64_FORMAT " Misses: " UINT64_FORMAT " Evictions: " UINT64_FORMAT " Overflows: " UINT64_FORMAT " Memory Usage: " INT64_FORMAT "kB\n",
+ si->cache_hits, si->cache_misses,
+ si->cache_evictions, si->cache_overflows,
+ memPeakKb);
+ }
+ else
+ {
+ ExplainPropertyInteger("Cache Hits", NULL,
+ si->cache_hits, es);
+ ExplainPropertyInteger("Cache Misses", NULL,
+ si->cache_misses, es);
+ ExplainPropertyInteger("Cache Evictions", NULL,
+ si->cache_evictions, es);
+ ExplainPropertyInteger("Cache Overflows", NULL,
+ si->cache_overflows, es);
+ ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb,
+ es);
+ }
+
+ if (es->workers_state)
+ ExplainCloseWorker(n, es);
+ }
+}
+
+/*
+ * Show information on hash aggregate memory usage and batches.
+ */
+static void
+show_hashagg_info(AggState *aggstate, ExplainState *es)
+{
+ Agg *agg = (Agg *) aggstate->ss.ps.plan;
+ int64 memPeakKb = (aggstate->hash_mem_peak + 1023) / 1024;
+
+ if (agg->aggstrategy != AGG_HASHED &&
+ agg->aggstrategy != AGG_MIXED)
+ return;
+
+ if (es->format != EXPLAIN_FORMAT_TEXT)
+ {
+
+ if (es->costs)
+ ExplainPropertyInteger("Planned Partitions", NULL,
+ aggstate->hash_planned_partitions, es);
+
+ /*
+ * During parallel query the leader may have not helped out. We
+ * detect this by checking how much memory it used. If we find it
+ * didn't do any work then we don't show its properties.
+ */
+ if (es->analyze && aggstate->hash_mem_peak > 0)
+ {
+ ExplainPropertyInteger("HashAgg Batches", NULL,
+ aggstate->hash_batches_used, es);
+ ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb, es);
+ ExplainPropertyInteger("Disk Usage", "kB",
+ aggstate->hash_disk_used, es);
+ }
+ }
+ else
+ {
+ bool gotone = false;
+
+ if (es->costs && aggstate->hash_planned_partitions > 0)
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "Planned Partitions: %d",
+ aggstate->hash_planned_partitions);
+ gotone = true;
+ }
+
+ /*
+ * During parallel query the leader may have not helped out. We
+ * detect this by checking how much memory it used. If we find it
+ * didn't do any work then we don't show its properties.
+ */
+ if (es->analyze && aggstate->hash_mem_peak > 0)
+ {
+ if (!gotone)
+ ExplainIndentText(es);
+ else
+ appendStringInfoString(es->str, " ");
+
+ appendStringInfo(es->str, "Batches: %d Memory Usage: " INT64_FORMAT "kB",
+ aggstate->hash_batches_used, memPeakKb);
+ gotone = true;
+
+ /* Only display disk usage if we spilled to disk */
+ if (aggstate->hash_batches_used > 1)
+ {
+ appendStringInfo(es->str, " Disk Usage: " UINT64_FORMAT "kB",
+ aggstate->hash_disk_used);
+ }
+ }
+
+ if (gotone)
+ appendStringInfoChar(es->str, '\n');
+ }
+
+ /* Display stats for each parallel worker */
+ if (es->analyze && aggstate->shared_info != NULL)
+ {
+ for (int n = 0; n < aggstate->shared_info->num_workers; n++)
+ {
+ AggregateInstrumentation *sinstrument;
+ uint64 hash_disk_used;
+ int hash_batches_used;
+
+ sinstrument = &aggstate->shared_info->sinstrument[n];
+ /* Skip workers that didn't do anything */
+ if (sinstrument->hash_mem_peak == 0)
+ continue;
+ hash_disk_used = sinstrument->hash_disk_used;
+ hash_batches_used = sinstrument->hash_batches_used;
+ memPeakKb = (sinstrument->hash_mem_peak + 1023) / 1024;
+
+ if (es->workers_state)
+ ExplainOpenWorker(n, es);
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainIndentText(es);
+
+ appendStringInfo(es->str, "Batches: %d Memory Usage: " INT64_FORMAT "kB",
+ hash_batches_used, memPeakKb);
+
+ /* Only display disk usage if we spilled to disk */
+ if (hash_batches_used > 1)
+ appendStringInfo(es->str, " Disk Usage: " UINT64_FORMAT "kB",
+ hash_disk_used);
+ appendStringInfoChar(es->str, '\n');
+ }
+ else
+ {
+ ExplainPropertyInteger("HashAgg Batches", NULL,
+ hash_batches_used, es);
+ ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb,
+ es);
+ ExplainPropertyInteger("Disk Usage", "kB", hash_disk_used, es);
+ }
+
+ if (es->workers_state)
+ ExplainCloseWorker(n, es);
+ }
+ }
+}
+
+/*
+ * If it's EXPLAIN ANALYZE, show exact/lossy pages for a BitmapHeapScan node
+ */
+static void
+show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
+{
+ if (es->format != EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainPropertyInteger("Exact Heap Blocks", NULL,
+ planstate->exact_pages, es);
+ ExplainPropertyInteger("Lossy Heap Blocks", NULL,
+ planstate->lossy_pages, es);
+ }
+ else
+ {
+ if (planstate->exact_pages > 0 || planstate->lossy_pages > 0)
+ {
+ ExplainIndentText(es);
+ appendStringInfoString(es->str, "Heap Blocks:");
+ if (planstate->exact_pages > 0)
+ appendStringInfo(es->str, " exact=%ld", planstate->exact_pages);
+ if (planstate->lossy_pages > 0)
+ appendStringInfo(es->str, " lossy=%ld", planstate->lossy_pages);
+ appendStringInfoChar(es->str, '\n');
+ }
+ }
+}
+
+/*
+ * If it's EXPLAIN ANALYZE, show instrumentation information for a plan node
+ *
+ * "which" identifies which instrumentation counter to print
+ */
+static void
+show_instrumentation_count(const char *qlabel, int which,
+ PlanState *planstate, ExplainState *es)
+{
+ double nfiltered;
+ double nloops;
+
+ if (!es->analyze || !planstate->instrument)
+ return;
+
+ if (which == 2)
+ nfiltered = planstate->instrument->nfiltered2;
+ else
+ nfiltered = planstate->instrument->nfiltered1;
+ nloops = planstate->instrument->nloops;
+
+ /* In text mode, suppress zero counts; they're not interesting enough */
+ if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT)
+ {
+ if (nloops > 0)
+ ExplainPropertyFloat(qlabel, NULL, nfiltered / nloops, 0, es);
+ else
+ ExplainPropertyFloat(qlabel, NULL, 0.0, 0, es);
+ }
+}
+
+/*
+ * Show extra information for a ForeignScan node.
+ */
+static void
+show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es)
+{
+ FdwRoutine *fdwroutine = fsstate->fdwroutine;
+
+ /* Let the FDW emit whatever fields it wants */
+ if (((ForeignScan *) fsstate->ss.ps.plan)->operation != CMD_SELECT)
+ {
+ if (fdwroutine->ExplainDirectModify != NULL)
+ fdwroutine->ExplainDirectModify(fsstate, es);
+ }
+ else
+ {
+ if (fdwroutine->ExplainForeignScan != NULL)
+ fdwroutine->ExplainForeignScan(fsstate, es);
+ }
+}
+
+/*
+ * Show initplan params evaluated at Gather or Gather Merge node.
+ */
+static void
+show_eval_params(Bitmapset *bms_params, ExplainState *es)
+{
+ int paramid = -1;
+ List *params = NIL;
+
+ Assert(bms_params);
+
+ while ((paramid = bms_next_member(bms_params, paramid)) >= 0)
+ {
+ char param[32];
+
+ snprintf(param, sizeof(param), "$%d", paramid);
+ params = lappend(params, pstrdup(param));
+ }
+
+ if (params)
+ ExplainPropertyList("Params Evaluated", params, es);
+}
+
+/*
+ * Fetch the name of an index in an EXPLAIN
+ *
+ * We allow plugins to get control here so that plans involving hypothetical
+ * indexes can be explained.
+ *
+ * Note: names returned by this function should be "raw"; the caller will
+ * apply quoting if needed. Formerly the convention was to do quoting here,
+ * but we don't want that in non-text output formats.
+ */
+static const char *
+explain_get_index_name(Oid indexId)
+{
+ const char *result;
+
+ if (explain_get_index_name_hook)
+ result = (*explain_get_index_name_hook) (indexId);
+ else
+ result = NULL;
+ if (result == NULL)
+ {
+ /* default behavior: look it up in the catalogs */
+ result = get_rel_name(indexId);
+ if (result == NULL)
+ elog(ERROR, "cache lookup failed for index %u", indexId);
+ }
+ return result;
+}
+
+/*
+ * Show buffer usage details.
+ */
+static void
+show_buffer_usage(ExplainState *es, const BufferUsage *usage, bool planning)
+{
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ bool has_shared = (usage->shared_blks_hit > 0 ||
+ usage->shared_blks_read > 0 ||
+ usage->shared_blks_dirtied > 0 ||
+ usage->shared_blks_written > 0);
+ bool has_local = (usage->local_blks_hit > 0 ||
+ usage->local_blks_read > 0 ||
+ usage->local_blks_dirtied > 0 ||
+ usage->local_blks_written > 0);
+ bool has_temp = (usage->temp_blks_read > 0 ||
+ usage->temp_blks_written > 0);
+ bool has_timing = (!INSTR_TIME_IS_ZERO(usage->blk_read_time) ||
+ !INSTR_TIME_IS_ZERO(usage->blk_write_time));
+ bool has_temp_timing = (!INSTR_TIME_IS_ZERO(usage->temp_blk_read_time) ||
+ !INSTR_TIME_IS_ZERO(usage->temp_blk_write_time));
+ bool show_planning = (planning && (has_shared ||
+ has_local || has_temp || has_timing ||
+ has_temp_timing));
+
+ if (show_planning)
+ {
+ ExplainIndentText(es);
+ appendStringInfoString(es->str, "Planning:\n");
+ es->indent++;
+ }
+
+ /* Show only positive counter values. */
+ if (has_shared || has_local || has_temp)
+ {
+ ExplainIndentText(es);
+ appendStringInfoString(es->str, "Buffers:");
+
+ if (has_shared)
+ {
+ appendStringInfoString(es->str, " shared");
+ if (usage->shared_blks_hit > 0)
+ appendStringInfo(es->str, " hit=%lld",
+ (long long) usage->shared_blks_hit);
+ if (usage->shared_blks_read > 0)
+ appendStringInfo(es->str, " read=%lld",
+ (long long) usage->shared_blks_read);
+ if (usage->shared_blks_dirtied > 0)
+ appendStringInfo(es->str, " dirtied=%lld",
+ (long long) usage->shared_blks_dirtied);
+ if (usage->shared_blks_written > 0)
+ appendStringInfo(es->str, " written=%lld",
+ (long long) usage->shared_blks_written);
+ if (has_local || has_temp)
+ appendStringInfoChar(es->str, ',');
+ }
+ if (has_local)
+ {
+ appendStringInfoString(es->str, " local");
+ if (usage->local_blks_hit > 0)
+ appendStringInfo(es->str, " hit=%lld",
+ (long long) usage->local_blks_hit);
+ if (usage->local_blks_read > 0)
+ appendStringInfo(es->str, " read=%lld",
+ (long long) usage->local_blks_read);
+ if (usage->local_blks_dirtied > 0)
+ appendStringInfo(es->str, " dirtied=%lld",
+ (long long) usage->local_blks_dirtied);
+ if (usage->local_blks_written > 0)
+ appendStringInfo(es->str, " written=%lld",
+ (long long) usage->local_blks_written);
+ if (has_temp)
+ appendStringInfoChar(es->str, ',');
+ }
+ if (has_temp)
+ {
+ appendStringInfoString(es->str, " temp");
+ if (usage->temp_blks_read > 0)
+ appendStringInfo(es->str, " read=%lld",
+ (long long) usage->temp_blks_read);
+ if (usage->temp_blks_written > 0)
+ appendStringInfo(es->str, " written=%lld",
+ (long long) usage->temp_blks_written);
+ }
+ appendStringInfoChar(es->str, '\n');
+ }
+
+ /* As above, show only positive counter values. */
+ if (has_timing || has_temp_timing)
+ {
+ ExplainIndentText(es);
+ appendStringInfoString(es->str, "I/O Timings:");
+
+ if (has_timing)
+ {
+ appendStringInfoString(es->str, " shared/local");
+ if (!INSTR_TIME_IS_ZERO(usage->blk_read_time))
+ appendStringInfo(es->str, " read=%0.3f",
+ INSTR_TIME_GET_MILLISEC(usage->blk_read_time));
+ if (!INSTR_TIME_IS_ZERO(usage->blk_write_time))
+ appendStringInfo(es->str, " write=%0.3f",
+ INSTR_TIME_GET_MILLISEC(usage->blk_write_time));
+ if (has_temp_timing)
+ appendStringInfoChar(es->str, ',');
+ }
+ if (has_temp_timing)
+ {
+ appendStringInfoString(es->str, " temp");
+ if (!INSTR_TIME_IS_ZERO(usage->temp_blk_read_time))
+ appendStringInfo(es->str, " read=%0.3f",
+ INSTR_TIME_GET_MILLISEC(usage->temp_blk_read_time));
+ if (!INSTR_TIME_IS_ZERO(usage->temp_blk_write_time))
+ appendStringInfo(es->str, " write=%0.3f",
+ INSTR_TIME_GET_MILLISEC(usage->temp_blk_write_time));
+ }
+ appendStringInfoChar(es->str, '\n');
+ }
+
+ if (show_planning)
+ es->indent--;
+ }
+ else
+ {
+ ExplainPropertyInteger("Shared Hit Blocks", NULL,
+ usage->shared_blks_hit, es);
+ ExplainPropertyInteger("Shared Read Blocks", NULL,
+ usage->shared_blks_read, es);
+ ExplainPropertyInteger("Shared Dirtied Blocks", NULL,
+ usage->shared_blks_dirtied, es);
+ ExplainPropertyInteger("Shared Written Blocks", NULL,
+ usage->shared_blks_written, es);
+ ExplainPropertyInteger("Local Hit Blocks", NULL,
+ usage->local_blks_hit, es);
+ ExplainPropertyInteger("Local Read Blocks", NULL,
+ usage->local_blks_read, es);
+ ExplainPropertyInteger("Local Dirtied Blocks", NULL,
+ usage->local_blks_dirtied, es);
+ ExplainPropertyInteger("Local Written Blocks", NULL,
+ usage->local_blks_written, es);
+ ExplainPropertyInteger("Temp Read Blocks", NULL,
+ usage->temp_blks_read, es);
+ ExplainPropertyInteger("Temp Written Blocks", NULL,
+ usage->temp_blks_written, es);
+ if (track_io_timing)
+ {
+ ExplainPropertyFloat("I/O Read Time", "ms",
+ INSTR_TIME_GET_MILLISEC(usage->blk_read_time),
+ 3, es);
+ ExplainPropertyFloat("I/O Write Time", "ms",
+ INSTR_TIME_GET_MILLISEC(usage->blk_write_time),
+ 3, es);
+ ExplainPropertyFloat("Temp I/O Read Time", "ms",
+ INSTR_TIME_GET_MILLISEC(usage->temp_blk_read_time),
+ 3, es);
+ ExplainPropertyFloat("Temp I/O Write Time", "ms",
+ INSTR_TIME_GET_MILLISEC(usage->temp_blk_write_time),
+ 3, es);
+ }
+ }
+}
+
+/*
+ * Show WAL usage details.
+ */
+static void
+show_wal_usage(ExplainState *es, const WalUsage *usage)
+{
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ /* Show only positive counter values. */
+ if ((usage->wal_records > 0) || (usage->wal_fpi > 0) ||
+ (usage->wal_bytes > 0))
+ {
+ ExplainIndentText(es);
+ appendStringInfoString(es->str, "WAL:");
+
+ if (usage->wal_records > 0)
+ appendStringInfo(es->str, " records=%lld",
+ (long long) usage->wal_records);
+ if (usage->wal_fpi > 0)
+ appendStringInfo(es->str, " fpi=%lld",
+ (long long) usage->wal_fpi);
+ if (usage->wal_bytes > 0)
+ appendStringInfo(es->str, " bytes=" UINT64_FORMAT,
+ usage->wal_bytes);
+ appendStringInfoChar(es->str, '\n');
+ }
+ }
+ else
+ {
+ ExplainPropertyInteger("WAL Records", NULL,
+ usage->wal_records, es);
+ ExplainPropertyInteger("WAL FPI", NULL,
+ usage->wal_fpi, es);
+ ExplainPropertyUInteger("WAL Bytes", NULL,
+ usage->wal_bytes, es);
+ }
+}
+
+/*
+ * Add some additional details about an IndexScan or IndexOnlyScan
+ */
+static void
+ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir,
+ ExplainState *es)
+{
+ const char *indexname = explain_get_index_name(indexid);
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ if (ScanDirectionIsBackward(indexorderdir))
+ appendStringInfoString(es->str, " Backward");
+ appendStringInfo(es->str, " using %s", quote_identifier(indexname));
+ }
+ else
+ {
+ const char *scandir;
+
+ switch (indexorderdir)
+ {
+ case BackwardScanDirection:
+ scandir = "Backward";
+ break;
+ case NoMovementScanDirection:
+ scandir = "NoMovement";
+ break;
+ case ForwardScanDirection:
+ scandir = "Forward";
+ break;
+ default:
+ scandir = "???";
+ break;
+ }
+ ExplainPropertyText("Scan Direction", scandir, es);
+ ExplainPropertyText("Index Name", indexname, es);
+ }
+}
+
+/*
+ * Show the target of a Scan node
+ */
+static void
+ExplainScanTarget(Scan *plan, ExplainState *es)
+{
+ ExplainTargetRel((Plan *) plan, plan->scanrelid, es);
+}
+
+/*
+ * Show the target of a ModifyTable node
+ *
+ * Here we show the nominal target (ie, the relation that was named in the
+ * original query). If the actual target(s) is/are different, we'll show them
+ * in show_modifytable_info().
+ */
+static void
+ExplainModifyTarget(ModifyTable *plan, ExplainState *es)
+{
+ ExplainTargetRel((Plan *) plan, plan->nominalRelation, es);
+}
+
+/*
+ * Show the target relation of a scan or modify node
+ */
+static void
+ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
+{
+ char *objectname = NULL;
+ char *namespace = NULL;
+ const char *objecttag = NULL;
+ RangeTblEntry *rte;
+ char *refname;
+
+ rte = rt_fetch(rti, es->rtable);
+ refname = (char *) list_nth(es->rtable_names, rti - 1);
+ if (refname == NULL)
+ refname = rte->eref->aliasname;
+
+ switch (nodeTag(plan))
+ {
+ case T_SeqScan:
+ case T_SampleScan:
+ case T_IndexScan:
+ case T_IndexOnlyScan:
+ case T_BitmapHeapScan:
+ case T_TidScan:
+ case T_TidRangeScan:
+ case T_ForeignScan:
+ case T_CustomScan:
+ case T_ModifyTable:
+ /* Assert it's on a real relation */
+ Assert(rte->rtekind == RTE_RELATION);
+ objectname = get_rel_name(rte->relid);
+ if (es->verbose)
+ namespace = get_namespace_name_or_temp(get_rel_namespace(rte->relid));
+ objecttag = "Relation Name";
+ break;
+ case T_FunctionScan:
+ {
+ FunctionScan *fscan = (FunctionScan *) plan;
+
+ /* Assert it's on a RangeFunction */
+ Assert(rte->rtekind == RTE_FUNCTION);
+
+ /*
+ * If the expression is still a function call of a single
+ * function, we can get the real name of the function.
+ * Otherwise, punt. (Even if it was a single function call
+ * originally, the optimizer could have simplified it away.)
+ */
+ if (list_length(fscan->functions) == 1)
+ {
+ RangeTblFunction *rtfunc = (RangeTblFunction *) linitial(fscan->functions);
+
+ if (IsA(rtfunc->funcexpr, FuncExpr))
+ {
+ FuncExpr *funcexpr = (FuncExpr *) rtfunc->funcexpr;
+ Oid funcid = funcexpr->funcid;
+
+ objectname = get_func_name(funcid);
+ if (es->verbose)
+ namespace = get_namespace_name_or_temp(get_func_namespace(funcid));
+ }
+ }
+ objecttag = "Function Name";
+ }
+ break;
+ case T_TableFuncScan:
+ Assert(rte->rtekind == RTE_TABLEFUNC);
+ objectname = "xmltable";
+ objecttag = "Table Function Name";
+ break;
+ case T_ValuesScan:
+ Assert(rte->rtekind == RTE_VALUES);
+ break;
+ case T_CteScan:
+ /* Assert it's on a non-self-reference CTE */
+ Assert(rte->rtekind == RTE_CTE);
+ Assert(!rte->self_reference);
+ objectname = rte->ctename;
+ objecttag = "CTE Name";
+ break;
+ case T_NamedTuplestoreScan:
+ Assert(rte->rtekind == RTE_NAMEDTUPLESTORE);
+ objectname = rte->enrname;
+ objecttag = "Tuplestore Name";
+ break;
+ case T_WorkTableScan:
+ /* Assert it's on a self-reference CTE */
+ Assert(rte->rtekind == RTE_CTE);
+ Assert(rte->self_reference);
+ objectname = rte->ctename;
+ objecttag = "CTE Name";
+ break;
+ default:
+ break;
+ }
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ appendStringInfoString(es->str, " on");
+ if (namespace != NULL)
+ appendStringInfo(es->str, " %s.%s", quote_identifier(namespace),
+ quote_identifier(objectname));
+ else if (objectname != NULL)
+ appendStringInfo(es->str, " %s", quote_identifier(objectname));
+ if (objectname == NULL || strcmp(refname, objectname) != 0)
+ appendStringInfo(es->str, " %s", quote_identifier(refname));
+ }
+ else
+ {
+ if (objecttag != NULL && objectname != NULL)
+ ExplainPropertyText(objecttag, objectname, es);
+ if (namespace != NULL)
+ ExplainPropertyText("Schema", namespace, es);
+ ExplainPropertyText("Alias", refname, es);
+ }
+}
+
+/*
+ * Show extra information for a ModifyTable node
+ *
+ * We have three objectives here. First, if there's more than one target
+ * table or it's different from the nominal target, identify the actual
+ * target(s). Second, give FDWs a chance to display extra info about foreign
+ * targets. Third, show information about ON CONFLICT.
+ */
+static void
+show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
+ ExplainState *es)
+{
+ ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
+ const char *operation;
+ const char *foperation;
+ bool labeltargets;
+ int j;
+ List *idxNames = NIL;
+ ListCell *lst;
+
+ switch (node->operation)
+ {
+ case CMD_INSERT:
+ operation = "Insert";
+ foperation = "Foreign Insert";
+ break;
+ case CMD_UPDATE:
+ operation = "Update";
+ foperation = "Foreign Update";
+ break;
+ case CMD_DELETE:
+ operation = "Delete";
+ foperation = "Foreign Delete";
+ break;
+ case CMD_MERGE:
+ operation = "Merge";
+ /* XXX unsupported for now, but avoid compiler noise */
+ foperation = "Foreign Merge";
+ break;
+ default:
+ operation = "???";
+ foperation = "Foreign ???";
+ break;
+ }
+
+ /* Should we explicitly label target relations? */
+ labeltargets = (mtstate->mt_nrels > 1 ||
+ (mtstate->mt_nrels == 1 &&
+ mtstate->resultRelInfo[0].ri_RangeTableIndex != node->nominalRelation));
+
+ if (labeltargets)
+ ExplainOpenGroup("Target Tables", "Target Tables", false, es);
+
+ for (j = 0; j < mtstate->mt_nrels; j++)
+ {
+ ResultRelInfo *resultRelInfo = mtstate->resultRelInfo + j;
+ FdwRoutine *fdwroutine = resultRelInfo->ri_FdwRoutine;
+
+ if (labeltargets)
+ {
+ /* Open a group for this target */
+ ExplainOpenGroup("Target Table", NULL, true, es);
+
+ /*
+ * In text mode, decorate each target with operation type, so that
+ * ExplainTargetRel's output of " on foo" will read nicely.
+ */
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainIndentText(es);
+ appendStringInfoString(es->str,
+ fdwroutine ? foperation : operation);
+ }
+
+ /* Identify target */
+ ExplainTargetRel((Plan *) node,
+ resultRelInfo->ri_RangeTableIndex,
+ es);
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ appendStringInfoChar(es->str, '\n');
+ es->indent++;
+ }
+ }
+
+ /* Give FDW a chance if needed */
+ if (!resultRelInfo->ri_usesFdwDirectModify &&
+ fdwroutine != NULL &&
+ fdwroutine->ExplainForeignModify != NULL)
+ {
+ List *fdw_private = (List *) list_nth(node->fdwPrivLists, j);
+
+ fdwroutine->ExplainForeignModify(mtstate,
+ resultRelInfo,
+ fdw_private,
+ j,
+ es);
+ }
+
+ if (labeltargets)
+ {
+ /* Undo the indentation we added in text format */
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ es->indent--;
+
+ /* Close the group */
+ ExplainCloseGroup("Target Table", NULL, true, es);
+ }
+ }
+
+ /* Gather names of ON CONFLICT arbiter indexes */
+ foreach(lst, node->arbiterIndexes)
+ {
+ char *indexname = get_rel_name(lfirst_oid(lst));
+
+ idxNames = lappend(idxNames, indexname);
+ }
+
+ if (node->onConflictAction != ONCONFLICT_NONE)
+ {
+ ExplainPropertyText("Conflict Resolution",
+ node->onConflictAction == ONCONFLICT_NOTHING ?
+ "NOTHING" : "UPDATE",
+ es);
+
+ /*
+ * Don't display arbiter indexes at all when DO NOTHING variant
+ * implicitly ignores all conflicts
+ */
+ if (idxNames)
+ ExplainPropertyList("Conflict Arbiter Indexes", idxNames, es);
+
+ /* ON CONFLICT DO UPDATE WHERE qual is specially displayed */
+ if (node->onConflictWhere)
+ {
+ show_upper_qual((List *) node->onConflictWhere, "Conflict Filter",
+ &mtstate->ps, ancestors, es);
+ show_instrumentation_count("Rows Removed by Conflict Filter", 1, &mtstate->ps, es);
+ }
+
+ /* EXPLAIN ANALYZE display of actual outcome for each tuple proposed */
+ if (es->analyze && mtstate->ps.instrument)
+ {
+ double total;
+ double insert_path;
+ double other_path;
+
+ InstrEndLoop(outerPlanState(mtstate)->instrument);
+
+ /* count the number of source rows */
+ total = outerPlanState(mtstate)->instrument->ntuples;
+ other_path = mtstate->ps.instrument->ntuples2;
+ insert_path = total - other_path;
+
+ ExplainPropertyFloat("Tuples Inserted", NULL,
+ insert_path, 0, es);
+ ExplainPropertyFloat("Conflicting Tuples", NULL,
+ other_path, 0, es);
+ }
+ }
+ else if (node->operation == CMD_MERGE)
+ {
+ /* EXPLAIN ANALYZE display of tuples processed */
+ if (es->analyze && mtstate->ps.instrument)
+ {
+ double total;
+ double insert_path;
+ double update_path;
+ double delete_path;
+ double skipped_path;
+
+ InstrEndLoop(outerPlanState(mtstate)->instrument);
+
+ /* count the number of source rows */
+ total = outerPlanState(mtstate)->instrument->ntuples;
+ insert_path = mtstate->mt_merge_inserted;
+ update_path = mtstate->mt_merge_updated;
+ delete_path = mtstate->mt_merge_deleted;
+ skipped_path = total - insert_path - update_path - delete_path;
+ Assert(skipped_path >= 0);
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ if (total > 0)
+ {
+ ExplainIndentText(es);
+ appendStringInfoString(es->str, "Tuples:");
+ if (insert_path > 0)
+ appendStringInfo(es->str, " inserted=%.0f", insert_path);
+ if (update_path > 0)
+ appendStringInfo(es->str, " updated=%.0f", update_path);
+ if (delete_path > 0)
+ appendStringInfo(es->str, " deleted=%.0f", delete_path);
+ if (skipped_path > 0)
+ appendStringInfo(es->str, " skipped=%.0f", skipped_path);
+ appendStringInfoChar(es->str, '\n');
+ }
+ }
+ else
+ {
+ ExplainPropertyFloat("Tuples Inserted", NULL, insert_path, 0, es);
+ ExplainPropertyFloat("Tuples Updated", NULL, update_path, 0, es);
+ ExplainPropertyFloat("Tuples Deleted", NULL, delete_path, 0, es);
+ ExplainPropertyFloat("Tuples Skipped", NULL, skipped_path, 0, es);
+ }
+ }
+ }
+
+ if (labeltargets)
+ ExplainCloseGroup("Target Tables", "Target Tables", false, es);
+}
+
+/*
+ * Explain the constituent plans of an Append, MergeAppend,
+ * BitmapAnd, or BitmapOr node.
+ *
+ * The ancestors list should already contain the immediate parent of these
+ * plans.
+ */
+static void
+ExplainMemberNodes(PlanState **planstates, int nplans,
+ List *ancestors, ExplainState *es)
+{
+ int j;
+
+ for (j = 0; j < nplans; j++)
+ ExplainNode(planstates[j], ancestors,
+ "Member", NULL, es);
+}
+
+/*
+ * Report about any pruned subnodes of an Append or MergeAppend node.
+ *
+ * nplans indicates the number of live subplans.
+ * nchildren indicates the original number of subnodes in the Plan;
+ * some of these may have been pruned by the run-time pruning code.
+ */
+static void
+ExplainMissingMembers(int nplans, int nchildren, ExplainState *es)
+{
+ if (nplans < nchildren || es->format != EXPLAIN_FORMAT_TEXT)
+ ExplainPropertyInteger("Subplans Removed", NULL,
+ nchildren - nplans, es);
+}
+
+/*
+ * Explain a list of SubPlans (or initPlans, which also use SubPlan nodes).
+ *
+ * The ancestors list should already contain the immediate parent of these
+ * SubPlans.
+ */
+static void
+ExplainSubPlans(List *plans, List *ancestors,
+ const char *relationship, ExplainState *es)
+{
+ ListCell *lst;
+
+ foreach(lst, plans)
+ {
+ SubPlanState *sps = (SubPlanState *) lfirst(lst);
+ SubPlan *sp = sps->subplan;
+
+ /*
+ * There can be multiple SubPlan nodes referencing the same physical
+ * subplan (same plan_id, which is its index in PlannedStmt.subplans).
+ * We should print a subplan only once, so track which ones we already
+ * printed. This state must be global across the plan tree, since the
+ * duplicate nodes could be in different plan nodes, eg both a bitmap
+ * indexscan's indexqual and its parent heapscan's recheck qual. (We
+ * do not worry too much about which plan node we show the subplan as
+ * attached to in such cases.)
+ */
+ if (bms_is_member(sp->plan_id, es->printed_subplans))
+ continue;
+ es->printed_subplans = bms_add_member(es->printed_subplans,
+ sp->plan_id);
+
+ /*
+ * Treat the SubPlan node as an ancestor of the plan node(s) within
+ * it, so that ruleutils.c can find the referents of subplan
+ * parameters.
+ */
+ ancestors = lcons(sp, ancestors);
+
+ ExplainNode(sps->planstate, ancestors,
+ relationship, sp->plan_name, es);
+
+ ancestors = list_delete_first(ancestors);
+ }
+}
+
+/*
+ * Explain a list of children of a CustomScan.
+ */
+static void
+ExplainCustomChildren(CustomScanState *css, List *ancestors, ExplainState *es)
+{
+ ListCell *cell;
+ const char *label =
+ (list_length(css->custom_ps) != 1 ? "children" : "child");
+
+ foreach(cell, css->custom_ps)
+ ExplainNode((PlanState *) lfirst(cell), ancestors, label, NULL, es);
+}
+
+/*
+ * Create a per-plan-node workspace for collecting per-worker data.
+ *
+ * Output related to each worker will be temporarily "set aside" into a
+ * separate buffer, which we'll merge into the main output stream once
+ * we've processed all data for the plan node. This makes it feasible to
+ * generate a coherent sub-group of fields for each worker, even though the
+ * code that produces the fields is in several different places in this file.
+ * Formatting of such a set-aside field group is managed by
+ * ExplainOpenSetAsideGroup and ExplainSaveGroup/ExplainRestoreGroup.
+ */
+static ExplainWorkersState *
+ExplainCreateWorkersState(int num_workers)
+{
+ ExplainWorkersState *wstate;
+
+ wstate = (ExplainWorkersState *) palloc(sizeof(ExplainWorkersState));
+ wstate->num_workers = num_workers;
+ wstate->worker_inited = (bool *) palloc0(num_workers * sizeof(bool));
+ wstate->worker_str = (StringInfoData *)
+ palloc0(num_workers * sizeof(StringInfoData));
+ wstate->worker_state_save = (int *) palloc(num_workers * sizeof(int));
+ return wstate;
+}
+
+/*
+ * Begin or resume output into the set-aside group for worker N.
+ */
+static void
+ExplainOpenWorker(int n, ExplainState *es)
+{
+ ExplainWorkersState *wstate = es->workers_state;
+
+ Assert(wstate);
+ Assert(n >= 0 && n < wstate->num_workers);
+
+ /* Save prior output buffer pointer */
+ wstate->prev_str = es->str;
+
+ if (!wstate->worker_inited[n])
+ {
+ /* First time through, so create the buffer for this worker */
+ initStringInfo(&wstate->worker_str[n]);
+ es->str = &wstate->worker_str[n];
+
+ /*
+ * Push suitable initial formatting state for this worker's field
+ * group. We allow one extra logical nesting level, since this group
+ * will eventually be wrapped in an outer "Workers" group.
+ */
+ ExplainOpenSetAsideGroup("Worker", NULL, true, 2, es);
+
+ /*
+ * In non-TEXT formats we always emit a "Worker Number" field, even if
+ * there's no other data for this worker.
+ */
+ if (es->format != EXPLAIN_FORMAT_TEXT)
+ ExplainPropertyInteger("Worker Number", NULL, n, es);
+
+ wstate->worker_inited[n] = true;
+ }
+ else
+ {
+ /* Resuming output for a worker we've already emitted some data for */
+ es->str = &wstate->worker_str[n];
+
+ /* Restore formatting state saved by last ExplainCloseWorker() */
+ ExplainRestoreGroup(es, 2, &wstate->worker_state_save[n]);
+ }
+
+ /*
+ * In TEXT format, prefix the first output line for this worker with
+ * "Worker N:". Then, any additional lines should be indented one more
+ * stop than the "Worker N" line is.
+ */
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ if (es->str->len == 0)
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "Worker %d: ", n);
+ }
+
+ es->indent++;
+ }
+}
+
+/*
+ * End output for worker N --- must pair with previous ExplainOpenWorker call
+ */
+static void
+ExplainCloseWorker(int n, ExplainState *es)
+{
+ ExplainWorkersState *wstate = es->workers_state;
+
+ Assert(wstate);
+ Assert(n >= 0 && n < wstate->num_workers);
+ Assert(wstate->worker_inited[n]);
+
+ /*
+ * Save formatting state in case we do another ExplainOpenWorker(), then
+ * pop the formatting stack.
+ */
+ ExplainSaveGroup(es, 2, &wstate->worker_state_save[n]);
+
+ /*
+ * In TEXT format, if we didn't actually produce any output line(s) then
+ * truncate off the partial line emitted by ExplainOpenWorker. (This is
+ * to avoid bogus output if, say, show_buffer_usage chooses not to print
+ * anything for the worker.) Also fix up the indent level.
+ */
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ while (es->str->len > 0 && es->str->data[es->str->len - 1] != '\n')
+ es->str->data[--(es->str->len)] = '\0';
+
+ es->indent--;
+ }
+
+ /* Restore prior output buffer pointer */
+ es->str = wstate->prev_str;
+}
+
+/*
+ * Print per-worker info for current node, then free the ExplainWorkersState.
+ */
+static void
+ExplainFlushWorkersState(ExplainState *es)
+{
+ ExplainWorkersState *wstate = es->workers_state;
+
+ ExplainOpenGroup("Workers", "Workers", false, es);
+ for (int i = 0; i < wstate->num_workers; i++)
+ {
+ if (wstate->worker_inited[i])
+ {
+ /* This must match previous ExplainOpenSetAsideGroup call */
+ ExplainOpenGroup("Worker", NULL, true, es);
+ appendStringInfoString(es->str, wstate->worker_str[i].data);
+ ExplainCloseGroup("Worker", NULL, true, es);
+
+ pfree(wstate->worker_str[i].data);
+ }
+ }
+ ExplainCloseGroup("Workers", "Workers", false, es);
+
+ pfree(wstate->worker_inited);
+ pfree(wstate->worker_str);
+ pfree(wstate->worker_state_save);
+ pfree(wstate);
+}
+
+/*
+ * Explain a property, such as sort keys or targets, that takes the form of
+ * a list of unlabeled items. "data" is a list of C strings.
+ */
+void
+ExplainPropertyList(const char *qlabel, List *data, ExplainState *es)
+{
+ ListCell *lc;
+ bool first = true;
+
+ switch (es->format)
+ {
+ case EXPLAIN_FORMAT_TEXT:
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "%s: ", qlabel);
+ foreach(lc, data)
+ {
+ if (!first)
+ appendStringInfoString(es->str, ", ");
+ appendStringInfoString(es->str, (const char *) lfirst(lc));
+ first = false;
+ }
+ appendStringInfoChar(es->str, '\n');
+ break;
+
+ case EXPLAIN_FORMAT_XML:
+ ExplainXMLTag(qlabel, X_OPENING, es);
+ foreach(lc, data)
+ {
+ char *str;
+
+ appendStringInfoSpaces(es->str, es->indent * 2 + 2);
+ appendStringInfoString(es->str, "<Item>");
+ str = escape_xml((const char *) lfirst(lc));
+ appendStringInfoString(es->str, str);
+ pfree(str);
+ appendStringInfoString(es->str, "</Item>\n");
+ }
+ ExplainXMLTag(qlabel, X_CLOSING, es);
+ break;
+
+ case EXPLAIN_FORMAT_JSON:
+ ExplainJSONLineEnding(es);
+ appendStringInfoSpaces(es->str, es->indent * 2);
+ escape_json(es->str, qlabel);
+ appendStringInfoString(es->str, ": [");
+ foreach(lc, data)
+ {
+ if (!first)
+ appendStringInfoString(es->str, ", ");
+ escape_json(es->str, (const char *) lfirst(lc));
+ first = false;
+ }
+ appendStringInfoChar(es->str, ']');
+ break;
+
+ case EXPLAIN_FORMAT_YAML:
+ ExplainYAMLLineStarting(es);
+ appendStringInfo(es->str, "%s: ", qlabel);
+ foreach(lc, data)
+ {
+ appendStringInfoChar(es->str, '\n');
+ appendStringInfoSpaces(es->str, es->indent * 2 + 2);
+ appendStringInfoString(es->str, "- ");
+ escape_yaml(es->str, (const char *) lfirst(lc));
+ }
+ break;
+ }
+}
+
+/*
+ * Explain a property that takes the form of a list of unlabeled items within
+ * another list. "data" is a list of C strings.
+ */
+void
+ExplainPropertyListNested(const char *qlabel, List *data, ExplainState *es)
+{
+ ListCell *lc;
+ bool first = true;
+
+ switch (es->format)
+ {
+ case EXPLAIN_FORMAT_TEXT:
+ case EXPLAIN_FORMAT_XML:
+ ExplainPropertyList(qlabel, data, es);
+ return;
+
+ case EXPLAIN_FORMAT_JSON:
+ ExplainJSONLineEnding(es);
+ appendStringInfoSpaces(es->str, es->indent * 2);
+ appendStringInfoChar(es->str, '[');
+ foreach(lc, data)
+ {
+ if (!first)
+ appendStringInfoString(es->str, ", ");
+ escape_json(es->str, (const char *) lfirst(lc));
+ first = false;
+ }
+ appendStringInfoChar(es->str, ']');
+ break;
+
+ case EXPLAIN_FORMAT_YAML:
+ ExplainYAMLLineStarting(es);
+ appendStringInfoString(es->str, "- [");
+ foreach(lc, data)
+ {
+ if (!first)
+ appendStringInfoString(es->str, ", ");
+ escape_yaml(es->str, (const char *) lfirst(lc));
+ first = false;
+ }
+ appendStringInfoChar(es->str, ']');
+ break;
+ }
+}
+
+/*
+ * Explain a simple property.
+ *
+ * If "numeric" is true, the value is a number (or other value that
+ * doesn't need quoting in JSON).
+ *
+ * If unit is non-NULL the text format will display it after the value.
+ *
+ * This usually should not be invoked directly, but via one of the datatype
+ * specific routines ExplainPropertyText, ExplainPropertyInteger, etc.
+ */
+static void
+ExplainProperty(const char *qlabel, const char *unit, const char *value,
+ bool numeric, ExplainState *es)
+{
+ switch (es->format)
+ {
+ case EXPLAIN_FORMAT_TEXT:
+ ExplainIndentText(es);
+ if (unit)
+ appendStringInfo(es->str, "%s: %s %s\n", qlabel, value, unit);
+ else
+ appendStringInfo(es->str, "%s: %s\n", qlabel, value);
+ break;
+
+ case EXPLAIN_FORMAT_XML:
+ {
+ char *str;
+
+ appendStringInfoSpaces(es->str, es->indent * 2);
+ ExplainXMLTag(qlabel, X_OPENING | X_NOWHITESPACE, es);
+ str = escape_xml(value);
+ appendStringInfoString(es->str, str);
+ pfree(str);
+ ExplainXMLTag(qlabel, X_CLOSING | X_NOWHITESPACE, es);
+ appendStringInfoChar(es->str, '\n');
+ }
+ break;
+
+ case EXPLAIN_FORMAT_JSON:
+ ExplainJSONLineEnding(es);
+ appendStringInfoSpaces(es->str, es->indent * 2);
+ escape_json(es->str, qlabel);
+ appendStringInfoString(es->str, ": ");
+ if (numeric)
+ appendStringInfoString(es->str, value);
+ else
+ escape_json(es->str, value);
+ break;
+
+ case EXPLAIN_FORMAT_YAML:
+ ExplainYAMLLineStarting(es);
+ appendStringInfo(es->str, "%s: ", qlabel);
+ if (numeric)
+ appendStringInfoString(es->str, value);
+ else
+ escape_yaml(es->str, value);
+ break;
+ }
+}
+
+/*
+ * Explain a string-valued property.
+ */
+void
+ExplainPropertyText(const char *qlabel, const char *value, ExplainState *es)
+{
+ ExplainProperty(qlabel, NULL, value, false, es);
+}
+
+/*
+ * Explain an integer-valued property.
+ */
+void
+ExplainPropertyInteger(const char *qlabel, const char *unit, int64 value,
+ ExplainState *es)
+{
+ char buf[32];
+
+ snprintf(buf, sizeof(buf), INT64_FORMAT, value);
+ ExplainProperty(qlabel, unit, buf, true, es);
+}
+
+/*
+ * Explain an unsigned integer-valued property.
+ */
+void
+ExplainPropertyUInteger(const char *qlabel, const char *unit, uint64 value,
+ ExplainState *es)
+{
+ char buf[32];
+
+ snprintf(buf, sizeof(buf), UINT64_FORMAT, value);
+ ExplainProperty(qlabel, unit, buf, true, es);
+}
+
+/*
+ * Explain a float-valued property, using the specified number of
+ * fractional digits.
+ */
+void
+ExplainPropertyFloat(const char *qlabel, const char *unit, double value,
+ int ndigits, ExplainState *es)
+{
+ char *buf;
+
+ buf = psprintf("%.*f", ndigits, value);
+ ExplainProperty(qlabel, unit, buf, true, es);
+ pfree(buf);
+}
+
+/*
+ * Explain a bool-valued property.
+ */
+void
+ExplainPropertyBool(const char *qlabel, bool value, ExplainState *es)
+{
+ ExplainProperty(qlabel, NULL, value ? "true" : "false", true, es);
+}
+
+/*
+ * Open a group of related objects.
+ *
+ * objtype is the type of the group object, labelname is its label within
+ * a containing object (if any).
+ *
+ * If labeled is true, the group members will be labeled properties,
+ * while if it's false, they'll be unlabeled objects.
+ */
+void
+ExplainOpenGroup(const char *objtype, const char *labelname,
+ bool labeled, ExplainState *es)
+{
+ switch (es->format)
+ {
+ case EXPLAIN_FORMAT_TEXT:
+ /* nothing to do */
+ break;
+
+ case EXPLAIN_FORMAT_XML:
+ ExplainXMLTag(objtype, X_OPENING, es);
+ es->indent++;
+ break;
+
+ case EXPLAIN_FORMAT_JSON:
+ ExplainJSONLineEnding(es);
+ appendStringInfoSpaces(es->str, 2 * es->indent);
+ if (labelname)
+ {
+ escape_json(es->str, labelname);
+ appendStringInfoString(es->str, ": ");
+ }
+ appendStringInfoChar(es->str, labeled ? '{' : '[');
+
+ /*
+ * In JSON format, the grouping_stack is an integer list. 0 means
+ * we've emitted nothing at this grouping level, 1 means we've
+ * emitted something (and so the next item needs a comma). See
+ * ExplainJSONLineEnding().
+ */
+ es->grouping_stack = lcons_int(0, es->grouping_stack);
+ es->indent++;
+ break;
+
+ case EXPLAIN_FORMAT_YAML:
+
+ /*
+ * In YAML format, the grouping stack is an integer list. 0 means
+ * we've emitted nothing at this grouping level AND this grouping
+ * level is unlabeled and must be marked with "- ". See
+ * ExplainYAMLLineStarting().
+ */
+ ExplainYAMLLineStarting(es);
+ if (labelname)
+ {
+ appendStringInfo(es->str, "%s: ", labelname);
+ es->grouping_stack = lcons_int(1, es->grouping_stack);
+ }
+ else
+ {
+ appendStringInfoString(es->str, "- ");
+ es->grouping_stack = lcons_int(0, es->grouping_stack);
+ }
+ es->indent++;
+ break;
+ }
+}
+
+/*
+ * Close a group of related objects.
+ * Parameters must match the corresponding ExplainOpenGroup call.
+ */
+void
+ExplainCloseGroup(const char *objtype, const char *labelname,
+ bool labeled, ExplainState *es)
+{
+ switch (es->format)
+ {
+ case EXPLAIN_FORMAT_TEXT:
+ /* nothing to do */
+ break;
+
+ case EXPLAIN_FORMAT_XML:
+ es->indent--;
+ ExplainXMLTag(objtype, X_CLOSING, es);
+ break;
+
+ case EXPLAIN_FORMAT_JSON:
+ es->indent--;
+ appendStringInfoChar(es->str, '\n');
+ appendStringInfoSpaces(es->str, 2 * es->indent);
+ appendStringInfoChar(es->str, labeled ? '}' : ']');
+ es->grouping_stack = list_delete_first(es->grouping_stack);
+ break;
+
+ case EXPLAIN_FORMAT_YAML:
+ es->indent--;
+ es->grouping_stack = list_delete_first(es->grouping_stack);
+ break;
+ }
+}
+
+/*
+ * Open a group of related objects, without emitting actual data.
+ *
+ * Prepare the formatting state as though we were beginning a group with
+ * the identified properties, but don't actually emit anything. Output
+ * subsequent to this call can be redirected into a separate output buffer,
+ * and then eventually appended to the main output buffer after doing a
+ * regular ExplainOpenGroup call (with the same parameters).
+ *
+ * The extra "depth" parameter is the new group's depth compared to current.
+ * It could be more than one, in case the eventual output will be enclosed
+ * in additional nesting group levels. We assume we don't need to track
+ * formatting state for those levels while preparing this group's output.
+ *
+ * There is no ExplainCloseSetAsideGroup --- in current usage, we always
+ * pop this state with ExplainSaveGroup.
+ */
+static void
+ExplainOpenSetAsideGroup(const char *objtype, const char *labelname,
+ bool labeled, int depth, ExplainState *es)
+{
+ switch (es->format)
+ {
+ case EXPLAIN_FORMAT_TEXT:
+ /* nothing to do */
+ break;
+
+ case EXPLAIN_FORMAT_XML:
+ es->indent += depth;
+ break;
+
+ case EXPLAIN_FORMAT_JSON:
+ es->grouping_stack = lcons_int(0, es->grouping_stack);
+ es->indent += depth;
+ break;
+
+ case EXPLAIN_FORMAT_YAML:
+ if (labelname)
+ es->grouping_stack = lcons_int(1, es->grouping_stack);
+ else
+ es->grouping_stack = lcons_int(0, es->grouping_stack);
+ es->indent += depth;
+ break;
+ }
+}
+
+/*
+ * Pop one level of grouping state, allowing for a re-push later.
+ *
+ * This is typically used after ExplainOpenSetAsideGroup; pass the
+ * same "depth" used for that.
+ *
+ * This should not emit any output. If state needs to be saved,
+ * save it at *state_save. Currently, an integer save area is sufficient
+ * for all formats, but we might need to revisit that someday.
+ */
+static void
+ExplainSaveGroup(ExplainState *es, int depth, int *state_save)
+{
+ switch (es->format)
+ {
+ case EXPLAIN_FORMAT_TEXT:
+ /* nothing to do */
+ break;
+
+ case EXPLAIN_FORMAT_XML:
+ es->indent -= depth;
+ break;
+
+ case EXPLAIN_FORMAT_JSON:
+ es->indent -= depth;
+ *state_save = linitial_int(es->grouping_stack);
+ es->grouping_stack = list_delete_first(es->grouping_stack);
+ break;
+
+ case EXPLAIN_FORMAT_YAML:
+ es->indent -= depth;
+ *state_save = linitial_int(es->grouping_stack);
+ es->grouping_stack = list_delete_first(es->grouping_stack);
+ break;
+ }
+}
+
+/*
+ * Re-push one level of grouping state, undoing the effects of ExplainSaveGroup.
+ */
+static void
+ExplainRestoreGroup(ExplainState *es, int depth, int *state_save)
+{
+ switch (es->format)
+ {
+ case EXPLAIN_FORMAT_TEXT:
+ /* nothing to do */
+ break;
+
+ case EXPLAIN_FORMAT_XML:
+ es->indent += depth;
+ break;
+
+ case EXPLAIN_FORMAT_JSON:
+ es->grouping_stack = lcons_int(*state_save, es->grouping_stack);
+ es->indent += depth;
+ break;
+
+ case EXPLAIN_FORMAT_YAML:
+ es->grouping_stack = lcons_int(*state_save, es->grouping_stack);
+ es->indent += depth;
+ break;
+ }
+}
+
+/*
+ * Emit a "dummy" group that never has any members.
+ *
+ * objtype is the type of the group object, labelname is its label within
+ * a containing object (if any).
+ */
+static void
+ExplainDummyGroup(const char *objtype, const char *labelname, ExplainState *es)
+{
+ switch (es->format)
+ {
+ case EXPLAIN_FORMAT_TEXT:
+ /* nothing to do */
+ break;
+
+ case EXPLAIN_FORMAT_XML:
+ ExplainXMLTag(objtype, X_CLOSE_IMMEDIATE, es);
+ break;
+
+ case EXPLAIN_FORMAT_JSON:
+ ExplainJSONLineEnding(es);
+ appendStringInfoSpaces(es->str, 2 * es->indent);
+ if (labelname)
+ {
+ escape_json(es->str, labelname);
+ appendStringInfoString(es->str, ": ");
+ }
+ escape_json(es->str, objtype);
+ break;
+
+ case EXPLAIN_FORMAT_YAML:
+ ExplainYAMLLineStarting(es);
+ if (labelname)
+ {
+ escape_yaml(es->str, labelname);
+ appendStringInfoString(es->str, ": ");
+ }
+ else
+ {
+ appendStringInfoString(es->str, "- ");
+ }
+ escape_yaml(es->str, objtype);
+ break;
+ }
+}
+
+/*
+ * Emit the start-of-output boilerplate.
+ *
+ * This is just enough different from processing a subgroup that we need
+ * a separate pair of subroutines.
+ */
+void
+ExplainBeginOutput(ExplainState *es)
+{
+ switch (es->format)
+ {
+ case EXPLAIN_FORMAT_TEXT:
+ /* nothing to do */
+ break;
+
+ case EXPLAIN_FORMAT_XML:
+ appendStringInfoString(es->str,
+ "<explain xmlns=\"http://www.postgresql.org/2009/explain\">\n");
+ es->indent++;
+ break;
+
+ case EXPLAIN_FORMAT_JSON:
+ /* top-level structure is an array of plans */
+ appendStringInfoChar(es->str, '[');
+ es->grouping_stack = lcons_int(0, es->grouping_stack);
+ es->indent++;
+ break;
+
+ case EXPLAIN_FORMAT_YAML:
+ es->grouping_stack = lcons_int(0, es->grouping_stack);
+ break;
+ }
+}
+
+/*
+ * Emit the end-of-output boilerplate.
+ */
+void
+ExplainEndOutput(ExplainState *es)
+{
+ switch (es->format)
+ {
+ case EXPLAIN_FORMAT_TEXT:
+ /* nothing to do */
+ break;
+
+ case EXPLAIN_FORMAT_XML:
+ es->indent--;
+ appendStringInfoString(es->str, "</explain>");
+ break;
+
+ case EXPLAIN_FORMAT_JSON:
+ es->indent--;
+ appendStringInfoString(es->str, "\n]");
+ es->grouping_stack = list_delete_first(es->grouping_stack);
+ break;
+
+ case EXPLAIN_FORMAT_YAML:
+ es->grouping_stack = list_delete_first(es->grouping_stack);
+ break;
+ }
+}
+
+/*
+ * Put an appropriate separator between multiple plans
+ */
+void
+ExplainSeparatePlans(ExplainState *es)
+{
+ switch (es->format)
+ {
+ case EXPLAIN_FORMAT_TEXT:
+ /* add a blank line */
+ appendStringInfoChar(es->str, '\n');
+ break;
+
+ case EXPLAIN_FORMAT_XML:
+ case EXPLAIN_FORMAT_JSON:
+ case EXPLAIN_FORMAT_YAML:
+ /* nothing to do */
+ break;
+ }
+}
+
+/*
+ * Emit opening or closing XML tag.
+ *
+ * "flags" must contain X_OPENING, X_CLOSING, or X_CLOSE_IMMEDIATE.
+ * Optionally, OR in X_NOWHITESPACE to suppress the whitespace we'd normally
+ * add.
+ *
+ * XML restricts tag names more than our other output formats, eg they can't
+ * contain white space or slashes. Replace invalid characters with dashes,
+ * so that for example "I/O Read Time" becomes "I-O-Read-Time".
+ */
+static void
+ExplainXMLTag(const char *tagname, int flags, ExplainState *es)
+{
+ const char *s;
+ const char *valid = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.";
+
+ if ((flags & X_NOWHITESPACE) == 0)
+ appendStringInfoSpaces(es->str, 2 * es->indent);
+ appendStringInfoCharMacro(es->str, '<');
+ if ((flags & X_CLOSING) != 0)
+ appendStringInfoCharMacro(es->str, '/');
+ for (s = tagname; *s; s++)
+ appendStringInfoChar(es->str, strchr(valid, *s) ? *s : '-');
+ if ((flags & X_CLOSE_IMMEDIATE) != 0)
+ appendStringInfoString(es->str, " /");
+ appendStringInfoCharMacro(es->str, '>');
+ if ((flags & X_NOWHITESPACE) == 0)
+ appendStringInfoCharMacro(es->str, '\n');
+}
+
+/*
+ * Indent a text-format line.
+ *
+ * We indent by two spaces per indentation level. However, when emitting
+ * data for a parallel worker there might already be data on the current line
+ * (cf. ExplainOpenWorker); in that case, don't indent any more.
+ */
+static void
+ExplainIndentText(ExplainState *es)
+{
+ Assert(es->format == EXPLAIN_FORMAT_TEXT);
+ if (es->str->len == 0 || es->str->data[es->str->len - 1] == '\n')
+ appendStringInfoSpaces(es->str, es->indent * 2);
+}
+
+/*
+ * Emit a JSON line ending.
+ *
+ * JSON requires a comma after each property but the last. To facilitate this,
+ * in JSON format, the text emitted for each property begins just prior to the
+ * preceding line-break (and comma, if applicable).
+ */
+static void
+ExplainJSONLineEnding(ExplainState *es)
+{
+ Assert(es->format == EXPLAIN_FORMAT_JSON);
+ if (linitial_int(es->grouping_stack) != 0)
+ appendStringInfoChar(es->str, ',');
+ else
+ linitial_int(es->grouping_stack) = 1;
+ appendStringInfoChar(es->str, '\n');
+}
+
+/*
+ * Indent a YAML line.
+ *
+ * YAML lines are ordinarily indented by two spaces per indentation level.
+ * The text emitted for each property begins just prior to the preceding
+ * line-break, except for the first property in an unlabeled group, for which
+ * it begins immediately after the "- " that introduces the group. The first
+ * property of the group appears on the same line as the opening "- ".
+ */
+static void
+ExplainYAMLLineStarting(ExplainState *es)
+{
+ Assert(es->format == EXPLAIN_FORMAT_YAML);
+ if (linitial_int(es->grouping_stack) == 0)
+ {
+ linitial_int(es->grouping_stack) = 1;
+ }
+ else
+ {
+ appendStringInfoChar(es->str, '\n');
+ appendStringInfoSpaces(es->str, es->indent * 2);
+ }
+}
+
+/*
+ * YAML is a superset of JSON; unfortunately, the YAML quoting rules are
+ * ridiculously complicated -- as documented in sections 5.3 and 7.3.3 of
+ * http://yaml.org/spec/1.2/spec.html -- so we chose to just quote everything.
+ * Empty strings, strings with leading or trailing whitespace, and strings
+ * containing a variety of special characters must certainly be quoted or the
+ * output is invalid; and other seemingly harmless strings like "0xa" or
+ * "true" must be quoted, lest they be interpreted as a hexadecimal or Boolean
+ * constant rather than a string.
+ */
+static void
+escape_yaml(StringInfo buf, const char *str)
+{
+ escape_json(buf, str);
+}
diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c
new file mode 100644
index 0000000..df6f021
--- /dev/null
+++ b/src/backend/commands/extension.c
@@ -0,0 +1,3417 @@
+/*-------------------------------------------------------------------------
+ *
+ * extension.c
+ * Commands to manipulate extensions
+ *
+ * Extensions in PostgreSQL allow management of collections of SQL objects.
+ *
+ * All we need internally to manage an extension is an OID so that the
+ * dependent objects can be associated with it. An extension is created by
+ * populating the pg_extension catalog from a "control" file.
+ * The extension control file is parsed with the same parser we use for
+ * postgresql.conf. An extension also has an installation script file,
+ * containing SQL commands to create the extension's objects.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/extension.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <dirent.h>
+#include <limits.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/relation.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_depend.h"
+#include "catalog/pg_extension.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_type.h"
+#include "commands/alter.h"
+#include "commands/comment.h"
+#include "commands/defrem.h"
+#include "commands/extension.h"
+#include "commands/schemacmds.h"
+#include "funcapi.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "storage/fd.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/varlena.h"
+
+
+/* Globally visible state variables */
+bool creating_extension = false;
+Oid CurrentExtensionObject = InvalidOid;
+
+/*
+ * Internal data structure to hold the results of parsing a control file
+ */
+typedef struct ExtensionControlFile
+{
+ char *name; /* name of the extension */
+ char *directory; /* directory for script files */
+ char *default_version; /* default install target version, if any */
+ char *module_pathname; /* string to substitute for
+ * MODULE_PATHNAME */
+ char *comment; /* comment, if any */
+ char *schema; /* target schema (allowed if !relocatable) */
+ bool relocatable; /* is ALTER EXTENSION SET SCHEMA supported? */
+ bool superuser; /* must be superuser to install? */
+ bool trusted; /* allow becoming superuser on the fly? */
+ int encoding; /* encoding of the script file, or -1 */
+ List *requires; /* names of prerequisite extensions */
+} ExtensionControlFile;
+
+/*
+ * Internal data structure for update path information
+ */
+typedef struct ExtensionVersionInfo
+{
+ char *name; /* name of the starting version */
+ List *reachable; /* List of ExtensionVersionInfo's */
+ bool installable; /* does this version have an install script? */
+ /* working state for Dijkstra's algorithm: */
+ bool distance_known; /* is distance from start known yet? */
+ int distance; /* current worst-case distance estimate */
+ struct ExtensionVersionInfo *previous; /* current best predecessor */
+} ExtensionVersionInfo;
+
+/* Local functions */
+static List *find_update_path(List *evi_list,
+ ExtensionVersionInfo *evi_start,
+ ExtensionVersionInfo *evi_target,
+ bool reject_indirect,
+ bool reinitialize);
+static Oid get_required_extension(char *reqExtensionName,
+ char *extensionName,
+ char *origSchemaName,
+ bool cascade,
+ List *parents,
+ bool is_create);
+static void get_available_versions_for_extension(ExtensionControlFile *pcontrol,
+ Tuplestorestate *tupstore,
+ TupleDesc tupdesc);
+static Datum convert_requires_to_datum(List *requires);
+static void ApplyExtensionUpdates(Oid extensionOid,
+ ExtensionControlFile *pcontrol,
+ const char *initialVersion,
+ List *updateVersions,
+ char *origSchemaName,
+ bool cascade,
+ bool is_create);
+static char *read_whole_file(const char *filename, int *length);
+
+
+/*
+ * get_extension_oid - given an extension name, look up the OID
+ *
+ * If missing_ok is false, throw an error if extension name not found. If
+ * true, just return InvalidOid.
+ */
+Oid
+get_extension_oid(const char *extname, bool missing_ok)
+{
+ Oid result;
+ Relation rel;
+ SysScanDesc scandesc;
+ HeapTuple tuple;
+ ScanKeyData entry[1];
+
+ rel = table_open(ExtensionRelationId, AccessShareLock);
+
+ ScanKeyInit(&entry[0],
+ Anum_pg_extension_extname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(extname));
+
+ scandesc = systable_beginscan(rel, ExtensionNameIndexId, true,
+ NULL, 1, entry);
+
+ tuple = systable_getnext(scandesc);
+
+ /* We assume that there can be at most one matching tuple */
+ if (HeapTupleIsValid(tuple))
+ result = ((Form_pg_extension) GETSTRUCT(tuple))->oid;
+ else
+ result = InvalidOid;
+
+ systable_endscan(scandesc);
+
+ table_close(rel, AccessShareLock);
+
+ if (!OidIsValid(result) && !missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("extension \"%s\" does not exist",
+ extname)));
+
+ return result;
+}
+
+/*
+ * get_extension_name - given an extension OID, look up the name
+ *
+ * Returns a palloc'd string, or NULL if no such extension.
+ */
+char *
+get_extension_name(Oid ext_oid)
+{
+ char *result;
+ Relation rel;
+ SysScanDesc scandesc;
+ HeapTuple tuple;
+ ScanKeyData entry[1];
+
+ rel = table_open(ExtensionRelationId, AccessShareLock);
+
+ ScanKeyInit(&entry[0],
+ Anum_pg_extension_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(ext_oid));
+
+ scandesc = systable_beginscan(rel, ExtensionOidIndexId, true,
+ NULL, 1, entry);
+
+ tuple = systable_getnext(scandesc);
+
+ /* We assume that there can be at most one matching tuple */
+ if (HeapTupleIsValid(tuple))
+ result = pstrdup(NameStr(((Form_pg_extension) GETSTRUCT(tuple))->extname));
+ else
+ result = NULL;
+
+ systable_endscan(scandesc);
+
+ table_close(rel, AccessShareLock);
+
+ return result;
+}
+
+/*
+ * get_extension_schema - given an extension OID, fetch its extnamespace
+ *
+ * Returns InvalidOid if no such extension.
+ */
+static Oid
+get_extension_schema(Oid ext_oid)
+{
+ Oid result;
+ Relation rel;
+ SysScanDesc scandesc;
+ HeapTuple tuple;
+ ScanKeyData entry[1];
+
+ rel = table_open(ExtensionRelationId, AccessShareLock);
+
+ ScanKeyInit(&entry[0],
+ Anum_pg_extension_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(ext_oid));
+
+ scandesc = systable_beginscan(rel, ExtensionOidIndexId, true,
+ NULL, 1, entry);
+
+ tuple = systable_getnext(scandesc);
+
+ /* We assume that there can be at most one matching tuple */
+ if (HeapTupleIsValid(tuple))
+ result = ((Form_pg_extension) GETSTRUCT(tuple))->extnamespace;
+ else
+ result = InvalidOid;
+
+ systable_endscan(scandesc);
+
+ table_close(rel, AccessShareLock);
+
+ return result;
+}
+
+/*
+ * Utility functions to check validity of extension and version names
+ */
+static void
+check_valid_extension_name(const char *extensionname)
+{
+ int namelen = strlen(extensionname);
+
+ /*
+ * Disallow empty names (the parser rejects empty identifiers anyway, but
+ * let's check).
+ */
+ if (namelen == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid extension name: \"%s\"", extensionname),
+ errdetail("Extension names must not be empty.")));
+
+ /*
+ * No double dashes, since that would make script filenames ambiguous.
+ */
+ if (strstr(extensionname, "--"))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid extension name: \"%s\"", extensionname),
+ errdetail("Extension names must not contain \"--\".")));
+
+ /*
+ * No leading or trailing dash either. (We could probably allow this, but
+ * it would require much care in filename parsing and would make filenames
+ * visually if not formally ambiguous. Since there's no real-world use
+ * case, let's just forbid it.)
+ */
+ if (extensionname[0] == '-' || extensionname[namelen - 1] == '-')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid extension name: \"%s\"", extensionname),
+ errdetail("Extension names must not begin or end with \"-\".")));
+
+ /*
+ * No directory separators either (this is sufficient to prevent ".."
+ * style attacks).
+ */
+ if (first_dir_separator(extensionname) != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid extension name: \"%s\"", extensionname),
+ errdetail("Extension names must not contain directory separator characters.")));
+}
+
+static void
+check_valid_version_name(const char *versionname)
+{
+ int namelen = strlen(versionname);
+
+ /*
+ * Disallow empty names (we could possibly allow this, but there seems
+ * little point).
+ */
+ if (namelen == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid extension version name: \"%s\"", versionname),
+ errdetail("Version names must not be empty.")));
+
+ /*
+ * No double dashes, since that would make script filenames ambiguous.
+ */
+ if (strstr(versionname, "--"))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid extension version name: \"%s\"", versionname),
+ errdetail("Version names must not contain \"--\".")));
+
+ /*
+ * No leading or trailing dash either.
+ */
+ if (versionname[0] == '-' || versionname[namelen - 1] == '-')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid extension version name: \"%s\"", versionname),
+ errdetail("Version names must not begin or end with \"-\".")));
+
+ /*
+ * No directory separators either (this is sufficient to prevent ".."
+ * style attacks).
+ */
+ if (first_dir_separator(versionname) != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid extension version name: \"%s\"", versionname),
+ errdetail("Version names must not contain directory separator characters.")));
+}
+
+/*
+ * Utility functions to handle extension-related path names
+ */
+static bool
+is_extension_control_filename(const char *filename)
+{
+ const char *extension = strrchr(filename, '.');
+
+ return (extension != NULL) && (strcmp(extension, ".control") == 0);
+}
+
+static bool
+is_extension_script_filename(const char *filename)
+{
+ const char *extension = strrchr(filename, '.');
+
+ return (extension != NULL) && (strcmp(extension, ".sql") == 0);
+}
+
+static char *
+get_extension_control_directory(void)
+{
+ char sharepath[MAXPGPATH];
+ char *result;
+
+ get_share_path(my_exec_path, sharepath);
+ result = (char *) palloc(MAXPGPATH);
+ snprintf(result, MAXPGPATH, "%s/extension", sharepath);
+
+ return result;
+}
+
+static char *
+get_extension_control_filename(const char *extname)
+{
+ char sharepath[MAXPGPATH];
+ char *result;
+
+ get_share_path(my_exec_path, sharepath);
+ result = (char *) palloc(MAXPGPATH);
+ snprintf(result, MAXPGPATH, "%s/extension/%s.control",
+ sharepath, extname);
+
+ return result;
+}
+
+static char *
+get_extension_script_directory(ExtensionControlFile *control)
+{
+ char sharepath[MAXPGPATH];
+ char *result;
+
+ /*
+ * The directory parameter can be omitted, absolute, or relative to the
+ * installation's share directory.
+ */
+ if (!control->directory)
+ return get_extension_control_directory();
+
+ if (is_absolute_path(control->directory))
+ return pstrdup(control->directory);
+
+ get_share_path(my_exec_path, sharepath);
+ result = (char *) palloc(MAXPGPATH);
+ snprintf(result, MAXPGPATH, "%s/%s", sharepath, control->directory);
+
+ return result;
+}
+
+static char *
+get_extension_aux_control_filename(ExtensionControlFile *control,
+ const char *version)
+{
+ char *result;
+ char *scriptdir;
+
+ scriptdir = get_extension_script_directory(control);
+
+ result = (char *) palloc(MAXPGPATH);
+ snprintf(result, MAXPGPATH, "%s/%s--%s.control",
+ scriptdir, control->name, version);
+
+ pfree(scriptdir);
+
+ return result;
+}
+
+static char *
+get_extension_script_filename(ExtensionControlFile *control,
+ const char *from_version, const char *version)
+{
+ char *result;
+ char *scriptdir;
+
+ scriptdir = get_extension_script_directory(control);
+
+ result = (char *) palloc(MAXPGPATH);
+ if (from_version)
+ snprintf(result, MAXPGPATH, "%s/%s--%s--%s.sql",
+ scriptdir, control->name, from_version, version);
+ else
+ snprintf(result, MAXPGPATH, "%s/%s--%s.sql",
+ scriptdir, control->name, version);
+
+ pfree(scriptdir);
+
+ return result;
+}
+
+
+/*
+ * Parse contents of primary or auxiliary control file, and fill in
+ * fields of *control. We parse primary file if version == NULL,
+ * else the optional auxiliary file for that version.
+ *
+ * Control files are supposed to be very short, half a dozen lines,
+ * so we don't worry about memory allocation risks here. Also we don't
+ * worry about what encoding it's in; all values are expected to be ASCII.
+ */
+static void
+parse_extension_control_file(ExtensionControlFile *control,
+ const char *version)
+{
+ char *filename;
+ FILE *file;
+ ConfigVariable *item,
+ *head = NULL,
+ *tail = NULL;
+
+ /*
+ * Locate the file to read. Auxiliary files are optional.
+ */
+ if (version)
+ filename = get_extension_aux_control_filename(control, version);
+ else
+ filename = get_extension_control_filename(control->name);
+
+ if ((file = AllocateFile(filename, "r")) == NULL)
+ {
+ if (errno == ENOENT)
+ {
+ /* no complaint for missing auxiliary file */
+ if (version)
+ {
+ pfree(filename);
+ return;
+ }
+
+ /* missing control file indicates extension is not installed */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("extension \"%s\" is not available", control->name),
+ errdetail("Could not open extension control file \"%s\": %m.",
+ filename),
+ errhint("The extension must first be installed on the system where PostgreSQL is running.")));
+ }
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open extension control file \"%s\": %m",
+ filename)));
+ }
+
+ /*
+ * Parse the file content, using GUC's file parsing code. We need not
+ * check the return value since any errors will be thrown at ERROR level.
+ */
+ (void) ParseConfigFp(file, filename, 0, ERROR, &head, &tail);
+
+ FreeFile(file);
+
+ /*
+ * Convert the ConfigVariable list into ExtensionControlFile entries.
+ */
+ for (item = head; item != NULL; item = item->next)
+ {
+ if (strcmp(item->name, "directory") == 0)
+ {
+ if (version)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("parameter \"%s\" cannot be set in a secondary extension control file",
+ item->name)));
+
+ control->directory = pstrdup(item->value);
+ }
+ else if (strcmp(item->name, "default_version") == 0)
+ {
+ if (version)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("parameter \"%s\" cannot be set in a secondary extension control file",
+ item->name)));
+
+ control->default_version = pstrdup(item->value);
+ }
+ else if (strcmp(item->name, "module_pathname") == 0)
+ {
+ control->module_pathname = pstrdup(item->value);
+ }
+ else if (strcmp(item->name, "comment") == 0)
+ {
+ control->comment = pstrdup(item->value);
+ }
+ else if (strcmp(item->name, "schema") == 0)
+ {
+ control->schema = pstrdup(item->value);
+ }
+ else if (strcmp(item->name, "relocatable") == 0)
+ {
+ if (!parse_bool(item->value, &control->relocatable))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("parameter \"%s\" requires a Boolean value",
+ item->name)));
+ }
+ else if (strcmp(item->name, "superuser") == 0)
+ {
+ if (!parse_bool(item->value, &control->superuser))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("parameter \"%s\" requires a Boolean value",
+ item->name)));
+ }
+ else if (strcmp(item->name, "trusted") == 0)
+ {
+ if (!parse_bool(item->value, &control->trusted))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("parameter \"%s\" requires a Boolean value",
+ item->name)));
+ }
+ else if (strcmp(item->name, "encoding") == 0)
+ {
+ control->encoding = pg_valid_server_encoding(item->value);
+ if (control->encoding < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("\"%s\" is not a valid encoding name",
+ item->value)));
+ }
+ else if (strcmp(item->name, "requires") == 0)
+ {
+ /* Need a modifiable copy of string */
+ char *rawnames = pstrdup(item->value);
+
+ /* Parse string into list of identifiers */
+ if (!SplitIdentifierString(rawnames, ',', &control->requires))
+ {
+ /* syntax error in name list */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("parameter \"%s\" must be a list of extension names",
+ item->name)));
+ }
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized parameter \"%s\" in file \"%s\"",
+ item->name, filename)));
+ }
+
+ FreeConfigVariables(head);
+
+ if (control->relocatable && control->schema != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("parameter \"schema\" cannot be specified when \"relocatable\" is true")));
+
+ pfree(filename);
+}
+
+/*
+ * Read the primary control file for the specified extension.
+ */
+static ExtensionControlFile *
+read_extension_control_file(const char *extname)
+{
+ ExtensionControlFile *control;
+
+ /*
+ * Set up default values. Pointer fields are initially null.
+ */
+ control = (ExtensionControlFile *) palloc0(sizeof(ExtensionControlFile));
+ control->name = pstrdup(extname);
+ control->relocatable = false;
+ control->superuser = true;
+ control->trusted = false;
+ control->encoding = -1;
+
+ /*
+ * Parse the primary control file.
+ */
+ parse_extension_control_file(control, NULL);
+
+ return control;
+}
+
+/*
+ * Read the auxiliary control file for the specified extension and version.
+ *
+ * Returns a new modified ExtensionControlFile struct; the original struct
+ * (reflecting just the primary control file) is not modified.
+ */
+static ExtensionControlFile *
+read_extension_aux_control_file(const ExtensionControlFile *pcontrol,
+ const char *version)
+{
+ ExtensionControlFile *acontrol;
+
+ /*
+ * Flat-copy the struct. Pointer fields share values with original.
+ */
+ acontrol = (ExtensionControlFile *) palloc(sizeof(ExtensionControlFile));
+ memcpy(acontrol, pcontrol, sizeof(ExtensionControlFile));
+
+ /*
+ * Parse the auxiliary control file, overwriting struct fields
+ */
+ parse_extension_control_file(acontrol, version);
+
+ return acontrol;
+}
+
+/*
+ * Read an SQL script file into a string, and convert to database encoding
+ */
+static char *
+read_extension_script_file(const ExtensionControlFile *control,
+ const char *filename)
+{
+ int src_encoding;
+ char *src_str;
+ char *dest_str;
+ int len;
+
+ src_str = read_whole_file(filename, &len);
+
+ /* use database encoding if not given */
+ if (control->encoding < 0)
+ src_encoding = GetDatabaseEncoding();
+ else
+ src_encoding = control->encoding;
+
+ /* make sure that source string is valid in the expected encoding */
+ (void) pg_verify_mbstr(src_encoding, src_str, len, false);
+
+ /*
+ * Convert the encoding to the database encoding. read_whole_file
+ * null-terminated the string, so if no conversion happens the string is
+ * valid as is.
+ */
+ dest_str = pg_any_to_server(src_str, len, src_encoding);
+
+ return dest_str;
+}
+
+/*
+ * Execute given SQL string.
+ *
+ * Note: it's tempting to just use SPI to execute the string, but that does
+ * not work very well. The really serious problem is that SPI will parse,
+ * analyze, and plan the whole string before executing any of it; of course
+ * this fails if there are any plannable statements referring to objects
+ * created earlier in the script. A lesser annoyance is that SPI insists
+ * on printing the whole string as errcontext in case of any error, and that
+ * could be very long.
+ */
+static void
+execute_sql_string(const char *sql)
+{
+ List *raw_parsetree_list;
+ DestReceiver *dest;
+ ListCell *lc1;
+
+ /*
+ * Parse the SQL string into a list of raw parse trees.
+ */
+ raw_parsetree_list = pg_parse_query(sql);
+
+ /* All output from SELECTs goes to the bit bucket */
+ dest = CreateDestReceiver(DestNone);
+
+ /*
+ * Do parse analysis, rule rewrite, planning, and execution for each raw
+ * parsetree. We must fully execute each query before beginning parse
+ * analysis on the next one, since there may be interdependencies.
+ */
+ foreach(lc1, raw_parsetree_list)
+ {
+ RawStmt *parsetree = lfirst_node(RawStmt, lc1);
+ MemoryContext per_parsetree_context,
+ oldcontext;
+ List *stmt_list;
+ ListCell *lc2;
+
+ /*
+ * We do the work for each parsetree in a short-lived context, to
+ * limit the memory used when there are many commands in the string.
+ */
+ per_parsetree_context =
+ AllocSetContextCreate(CurrentMemoryContext,
+ "execute_sql_string per-statement context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(per_parsetree_context);
+
+ /* Be sure parser can see any DDL done so far */
+ CommandCounterIncrement();
+
+ stmt_list = pg_analyze_and_rewrite_fixedparams(parsetree,
+ sql,
+ NULL,
+ 0,
+ NULL);
+ stmt_list = pg_plan_queries(stmt_list, sql, CURSOR_OPT_PARALLEL_OK, NULL);
+
+ foreach(lc2, stmt_list)
+ {
+ PlannedStmt *stmt = lfirst_node(PlannedStmt, lc2);
+
+ CommandCounterIncrement();
+
+ PushActiveSnapshot(GetTransactionSnapshot());
+
+ if (stmt->utilityStmt == NULL)
+ {
+ QueryDesc *qdesc;
+
+ qdesc = CreateQueryDesc(stmt,
+ sql,
+ GetActiveSnapshot(), NULL,
+ dest, NULL, NULL, 0);
+
+ ExecutorStart(qdesc, 0);
+ ExecutorRun(qdesc, ForwardScanDirection, 0, true);
+ ExecutorFinish(qdesc);
+ ExecutorEnd(qdesc);
+
+ FreeQueryDesc(qdesc);
+ }
+ else
+ {
+ if (IsA(stmt->utilityStmt, TransactionStmt))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("transaction control statements are not allowed within an extension script")));
+
+ ProcessUtility(stmt,
+ sql,
+ false,
+ PROCESS_UTILITY_QUERY,
+ NULL,
+ NULL,
+ dest,
+ NULL);
+ }
+
+ PopActiveSnapshot();
+ }
+
+ /* Clean up per-parsetree context. */
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(per_parsetree_context);
+ }
+
+ /* Be sure to advance the command counter after the last script command */
+ CommandCounterIncrement();
+}
+
+/*
+ * Policy function: is the given extension trusted for installation by a
+ * non-superuser?
+ *
+ * (Update the errhint logic below if you change this.)
+ */
+static bool
+extension_is_trusted(ExtensionControlFile *control)
+{
+ AclResult aclresult;
+
+ /* Never trust unless extension's control file says it's okay */
+ if (!control->trusted)
+ return false;
+ /* Allow if user has CREATE privilege on current database */
+ aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE);
+ if (aclresult == ACLCHECK_OK)
+ return true;
+ return false;
+}
+
+/*
+ * Execute the appropriate script file for installing or updating the extension
+ *
+ * If from_version isn't NULL, it's an update
+ */
+static void
+execute_extension_script(Oid extensionOid, ExtensionControlFile *control,
+ const char *from_version,
+ const char *version,
+ List *requiredSchemas,
+ const char *schemaName, Oid schemaOid)
+{
+ bool switch_to_superuser = false;
+ char *filename;
+ Oid save_userid = 0;
+ int save_sec_context = 0;
+ int save_nestlevel;
+ StringInfoData pathbuf;
+ ListCell *lc;
+
+ /*
+ * Enforce superuser-ness if appropriate. We postpone these checks until
+ * here so that the control flags are correctly associated with the right
+ * script(s) if they happen to be set in secondary control files.
+ */
+ if (control->superuser && !superuser())
+ {
+ if (extension_is_trusted(control))
+ switch_to_superuser = true;
+ else if (from_version == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to create extension \"%s\"",
+ control->name),
+ control->trusted
+ ? errhint("Must have CREATE privilege on current database to create this extension.")
+ : errhint("Must be superuser to create this extension.")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to update extension \"%s\"",
+ control->name),
+ control->trusted
+ ? errhint("Must have CREATE privilege on current database to update this extension.")
+ : errhint("Must be superuser to update this extension.")));
+ }
+
+ filename = get_extension_script_filename(control, from_version, version);
+
+ /*
+ * If installing a trusted extension on behalf of a non-superuser, become
+ * the bootstrap superuser. (This switch will be cleaned up automatically
+ * if the transaction aborts, as will the GUC changes below.)
+ */
+ if (switch_to_superuser)
+ {
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+ SetUserIdAndSecContext(BOOTSTRAP_SUPERUSERID,
+ save_sec_context | SECURITY_LOCAL_USERID_CHANGE);
+ }
+
+ /*
+ * Force client_min_messages and log_min_messages to be at least WARNING,
+ * so that we won't spam the user with useless NOTICE messages from common
+ * script actions like creating shell types.
+ *
+ * We use the equivalent of a function SET option to allow the setting to
+ * persist for exactly the duration of the script execution. guc.c also
+ * takes care of undoing the setting on error.
+ *
+ * log_min_messages can't be set by ordinary users, so for that one we
+ * pretend to be superuser.
+ */
+ save_nestlevel = NewGUCNestLevel();
+
+ if (client_min_messages < WARNING)
+ (void) set_config_option("client_min_messages", "warning",
+ PGC_USERSET, PGC_S_SESSION,
+ GUC_ACTION_SAVE, true, 0, false);
+ if (log_min_messages < WARNING)
+ (void) set_config_option_ext("log_min_messages", "warning",
+ PGC_SUSET, PGC_S_SESSION,
+ BOOTSTRAP_SUPERUSERID,
+ GUC_ACTION_SAVE, true, 0, false);
+
+ /*
+ * Similarly disable check_function_bodies, to ensure that SQL functions
+ * won't be parsed during creation.
+ */
+ if (check_function_bodies)
+ (void) set_config_option("check_function_bodies", "off",
+ PGC_USERSET, PGC_S_SESSION,
+ GUC_ACTION_SAVE, true, 0, false);
+
+ /*
+ * Set up the search path to have the target schema first, making it be
+ * the default creation target namespace. Then add the schemas of any
+ * prerequisite extensions, unless they are in pg_catalog which would be
+ * searched anyway. (Listing pg_catalog explicitly in a non-first
+ * position would be bad for security.) Finally add pg_temp to ensure
+ * that temp objects can't take precedence over others.
+ *
+ * Note: it might look tempting to use PushOverrideSearchPath for this,
+ * but we cannot do that. We have to actually set the search_path GUC in
+ * case the extension script examines or changes it. In any case, the
+ * GUC_ACTION_SAVE method is just as convenient.
+ */
+ initStringInfo(&pathbuf);
+ appendStringInfoString(&pathbuf, quote_identifier(schemaName));
+ foreach(lc, requiredSchemas)
+ {
+ Oid reqschema = lfirst_oid(lc);
+ char *reqname = get_namespace_name(reqschema);
+
+ if (reqname && strcmp(reqname, "pg_catalog") != 0)
+ appendStringInfo(&pathbuf, ", %s", quote_identifier(reqname));
+ }
+ appendStringInfoString(&pathbuf, ", pg_temp");
+
+ (void) set_config_option("search_path", pathbuf.data,
+ PGC_USERSET, PGC_S_SESSION,
+ GUC_ACTION_SAVE, true, 0, false);
+
+ /*
+ * Set creating_extension and related variables so that
+ * recordDependencyOnCurrentExtension and other functions do the right
+ * things. On failure, ensure we reset these variables.
+ */
+ creating_extension = true;
+ CurrentExtensionObject = extensionOid;
+ PG_TRY();
+ {
+ char *c_sql = read_extension_script_file(control, filename);
+ Datum t_sql;
+
+ /*
+ * We filter each substitution through quote_identifier(). When the
+ * arg contains one of the following characters, no one collection of
+ * quoting can work inside $$dollar-quoted string literals$$,
+ * 'single-quoted string literals', and outside of any literal. To
+ * avoid a security snare for extension authors, error on substitution
+ * for arguments containing these.
+ */
+ const char *quoting_relevant_chars = "\"$'\\";
+
+ /* We use various functions that want to operate on text datums */
+ t_sql = CStringGetTextDatum(c_sql);
+
+ /*
+ * Reduce any lines beginning with "\echo" to empty. This allows
+ * scripts to contain messages telling people not to run them via
+ * psql, which has been found to be necessary due to old habits.
+ */
+ t_sql = DirectFunctionCall4Coll(textregexreplace,
+ C_COLLATION_OID,
+ t_sql,
+ CStringGetTextDatum("^\\\\echo.*$"),
+ CStringGetTextDatum(""),
+ CStringGetTextDatum("ng"));
+
+ /*
+ * If the script uses @extowner@, substitute the calling username.
+ */
+ if (strstr(c_sql, "@extowner@"))
+ {
+ Oid uid = switch_to_superuser ? save_userid : GetUserId();
+ const char *userName = GetUserNameFromId(uid, false);
+ const char *qUserName = quote_identifier(userName);
+
+ t_sql = DirectFunctionCall3Coll(replace_text,
+ C_COLLATION_OID,
+ t_sql,
+ CStringGetTextDatum("@extowner@"),
+ CStringGetTextDatum(qUserName));
+ if (strpbrk(userName, quoting_relevant_chars))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid character in extension owner: must not contain any of \"%s\"",
+ quoting_relevant_chars)));
+ }
+
+ /*
+ * If it's not relocatable, substitute the target schema name for
+ * occurrences of @extschema@.
+ *
+ * For a relocatable extension, we needn't do this. There cannot be
+ * any need for @extschema@, else it wouldn't be relocatable.
+ */
+ if (!control->relocatable)
+ {
+ Datum old = t_sql;
+ const char *qSchemaName = quote_identifier(schemaName);
+
+ t_sql = DirectFunctionCall3Coll(replace_text,
+ C_COLLATION_OID,
+ t_sql,
+ CStringGetTextDatum("@extschema@"),
+ CStringGetTextDatum(qSchemaName));
+ if (t_sql != old && strpbrk(schemaName, quoting_relevant_chars))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid character in extension \"%s\" schema: must not contain any of \"%s\"",
+ control->name, quoting_relevant_chars)));
+ }
+
+ /*
+ * If module_pathname was set in the control file, substitute its
+ * value for occurrences of MODULE_PATHNAME.
+ */
+ if (control->module_pathname)
+ {
+ t_sql = DirectFunctionCall3Coll(replace_text,
+ C_COLLATION_OID,
+ t_sql,
+ CStringGetTextDatum("MODULE_PATHNAME"),
+ CStringGetTextDatum(control->module_pathname));
+ }
+
+ /* And now back to C string */
+ c_sql = text_to_cstring(DatumGetTextPP(t_sql));
+
+ execute_sql_string(c_sql);
+ }
+ PG_FINALLY();
+ {
+ creating_extension = false;
+ CurrentExtensionObject = InvalidOid;
+ }
+ PG_END_TRY();
+
+ /*
+ * Restore the GUC variables we set above.
+ */
+ AtEOXact_GUC(true, save_nestlevel);
+
+ /*
+ * Restore authentication state if needed.
+ */
+ if (switch_to_superuser)
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+}
+
+/*
+ * Find or create an ExtensionVersionInfo for the specified version name
+ *
+ * Currently, we just use a List of the ExtensionVersionInfo's. Searching
+ * for them therefore uses about O(N^2) time when there are N versions of
+ * the extension. We could change the data structure to a hash table if
+ * this ever becomes a bottleneck.
+ */
+static ExtensionVersionInfo *
+get_ext_ver_info(const char *versionname, List **evi_list)
+{
+ ExtensionVersionInfo *evi;
+ ListCell *lc;
+
+ foreach(lc, *evi_list)
+ {
+ evi = (ExtensionVersionInfo *) lfirst(lc);
+ if (strcmp(evi->name, versionname) == 0)
+ return evi;
+ }
+
+ evi = (ExtensionVersionInfo *) palloc(sizeof(ExtensionVersionInfo));
+ evi->name = pstrdup(versionname);
+ evi->reachable = NIL;
+ evi->installable = false;
+ /* initialize for later application of Dijkstra's algorithm */
+ evi->distance_known = false;
+ evi->distance = INT_MAX;
+ evi->previous = NULL;
+
+ *evi_list = lappend(*evi_list, evi);
+
+ return evi;
+}
+
+/*
+ * Locate the nearest unprocessed ExtensionVersionInfo
+ *
+ * This part of the algorithm is also about O(N^2). A priority queue would
+ * make it much faster, but for now there's no need.
+ */
+static ExtensionVersionInfo *
+get_nearest_unprocessed_vertex(List *evi_list)
+{
+ ExtensionVersionInfo *evi = NULL;
+ ListCell *lc;
+
+ foreach(lc, evi_list)
+ {
+ ExtensionVersionInfo *evi2 = (ExtensionVersionInfo *) lfirst(lc);
+
+ /* only vertices whose distance is still uncertain are candidates */
+ if (evi2->distance_known)
+ continue;
+ /* remember the closest such vertex */
+ if (evi == NULL ||
+ evi->distance > evi2->distance)
+ evi = evi2;
+ }
+
+ return evi;
+}
+
+/*
+ * Obtain information about the set of update scripts available for the
+ * specified extension. The result is a List of ExtensionVersionInfo
+ * structs, each with a subsidiary list of the ExtensionVersionInfos for
+ * the versions that can be reached in one step from that version.
+ */
+static List *
+get_ext_ver_list(ExtensionControlFile *control)
+{
+ List *evi_list = NIL;
+ int extnamelen = strlen(control->name);
+ char *location;
+ DIR *dir;
+ struct dirent *de;
+
+ location = get_extension_script_directory(control);
+ dir = AllocateDir(location);
+ while ((de = ReadDir(dir, location)) != NULL)
+ {
+ char *vername;
+ char *vername2;
+ ExtensionVersionInfo *evi;
+ ExtensionVersionInfo *evi2;
+
+ /* must be a .sql file ... */
+ if (!is_extension_script_filename(de->d_name))
+ continue;
+
+ /* ... matching extension name followed by separator */
+ if (strncmp(de->d_name, control->name, extnamelen) != 0 ||
+ de->d_name[extnamelen] != '-' ||
+ de->d_name[extnamelen + 1] != '-')
+ continue;
+
+ /* extract version name(s) from 'extname--something.sql' filename */
+ vername = pstrdup(de->d_name + extnamelen + 2);
+ *strrchr(vername, '.') = '\0';
+ vername2 = strstr(vername, "--");
+ if (!vername2)
+ {
+ /* It's an install, not update, script; record its version name */
+ evi = get_ext_ver_info(vername, &evi_list);
+ evi->installable = true;
+ continue;
+ }
+ *vername2 = '\0'; /* terminate first version */
+ vername2 += 2; /* and point to second */
+
+ /* if there's a third --, it's bogus, ignore it */
+ if (strstr(vername2, "--"))
+ continue;
+
+ /* Create ExtensionVersionInfos and link them together */
+ evi = get_ext_ver_info(vername, &evi_list);
+ evi2 = get_ext_ver_info(vername2, &evi_list);
+ evi->reachable = lappend(evi->reachable, evi2);
+ }
+ FreeDir(dir);
+
+ return evi_list;
+}
+
+/*
+ * Given an initial and final version name, identify the sequence of update
+ * scripts that have to be applied to perform that update.
+ *
+ * Result is a List of names of versions to transition through (the initial
+ * version is *not* included).
+ */
+static List *
+identify_update_path(ExtensionControlFile *control,
+ const char *oldVersion, const char *newVersion)
+{
+ List *result;
+ List *evi_list;
+ ExtensionVersionInfo *evi_start;
+ ExtensionVersionInfo *evi_target;
+
+ /* Extract the version update graph from the script directory */
+ evi_list = get_ext_ver_list(control);
+
+ /* Initialize start and end vertices */
+ evi_start = get_ext_ver_info(oldVersion, &evi_list);
+ evi_target = get_ext_ver_info(newVersion, &evi_list);
+
+ /* Find shortest path */
+ result = find_update_path(evi_list, evi_start, evi_target, false, false);
+
+ if (result == NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("extension \"%s\" has no update path from version \"%s\" to version \"%s\"",
+ control->name, oldVersion, newVersion)));
+
+ return result;
+}
+
+/*
+ * Apply Dijkstra's algorithm to find the shortest path from evi_start to
+ * evi_target.
+ *
+ * If reject_indirect is true, ignore paths that go through installable
+ * versions. This saves work when the caller will consider starting from
+ * all installable versions anyway.
+ *
+ * If reinitialize is false, assume the ExtensionVersionInfo list has not
+ * been used for this before, and the initialization done by get_ext_ver_info
+ * is still good. Otherwise, reinitialize all transient fields used here.
+ *
+ * Result is a List of names of versions to transition through (the initial
+ * version is *not* included). Returns NIL if no such path.
+ */
+static List *
+find_update_path(List *evi_list,
+ ExtensionVersionInfo *evi_start,
+ ExtensionVersionInfo *evi_target,
+ bool reject_indirect,
+ bool reinitialize)
+{
+ List *result;
+ ExtensionVersionInfo *evi;
+ ListCell *lc;
+
+ /* Caller error if start == target */
+ Assert(evi_start != evi_target);
+ /* Caller error if reject_indirect and target is installable */
+ Assert(!(reject_indirect && evi_target->installable));
+
+ if (reinitialize)
+ {
+ foreach(lc, evi_list)
+ {
+ evi = (ExtensionVersionInfo *) lfirst(lc);
+ evi->distance_known = false;
+ evi->distance = INT_MAX;
+ evi->previous = NULL;
+ }
+ }
+
+ evi_start->distance = 0;
+
+ while ((evi = get_nearest_unprocessed_vertex(evi_list)) != NULL)
+ {
+ if (evi->distance == INT_MAX)
+ break; /* all remaining vertices are unreachable */
+ evi->distance_known = true;
+ if (evi == evi_target)
+ break; /* found shortest path to target */
+ foreach(lc, evi->reachable)
+ {
+ ExtensionVersionInfo *evi2 = (ExtensionVersionInfo *) lfirst(lc);
+ int newdist;
+
+ /* if reject_indirect, treat installable versions as unreachable */
+ if (reject_indirect && evi2->installable)
+ continue;
+ newdist = evi->distance + 1;
+ if (newdist < evi2->distance)
+ {
+ evi2->distance = newdist;
+ evi2->previous = evi;
+ }
+ else if (newdist == evi2->distance &&
+ evi2->previous != NULL &&
+ strcmp(evi->name, evi2->previous->name) < 0)
+ {
+ /*
+ * Break ties in favor of the version name that comes first
+ * according to strcmp(). This behavior is undocumented and
+ * users shouldn't rely on it. We do it just to ensure that
+ * if there is a tie, the update path that is chosen does not
+ * depend on random factors like the order in which directory
+ * entries get visited.
+ */
+ evi2->previous = evi;
+ }
+ }
+ }
+
+ /* Return NIL if target is not reachable from start */
+ if (!evi_target->distance_known)
+ return NIL;
+
+ /* Build and return list of version names representing the update path */
+ result = NIL;
+ for (evi = evi_target; evi != evi_start; evi = evi->previous)
+ result = lcons(evi->name, result);
+
+ return result;
+}
+
+/*
+ * Given a target version that is not directly installable, find the
+ * best installation sequence starting from a directly-installable version.
+ *
+ * evi_list: previously-collected version update graph
+ * evi_target: member of that list that we want to reach
+ *
+ * Returns the best starting-point version, or NULL if there is none.
+ * On success, *best_path is set to the path from the start point.
+ *
+ * If there's more than one possible start point, prefer shorter update paths,
+ * and break any ties arbitrarily on the basis of strcmp'ing the starting
+ * versions' names.
+ */
+static ExtensionVersionInfo *
+find_install_path(List *evi_list, ExtensionVersionInfo *evi_target,
+ List **best_path)
+{
+ ExtensionVersionInfo *evi_start = NULL;
+ ListCell *lc;
+
+ *best_path = NIL;
+
+ /*
+ * We don't expect to be called for an installable target, but if we are,
+ * the answer is easy: just start from there, with an empty update path.
+ */
+ if (evi_target->installable)
+ return evi_target;
+
+ /* Consider all installable versions as start points */
+ foreach(lc, evi_list)
+ {
+ ExtensionVersionInfo *evi1 = (ExtensionVersionInfo *) lfirst(lc);
+ List *path;
+
+ if (!evi1->installable)
+ continue;
+
+ /*
+ * Find shortest path from evi1 to evi_target; but no need to consider
+ * paths going through other installable versions.
+ */
+ path = find_update_path(evi_list, evi1, evi_target, true, true);
+ if (path == NIL)
+ continue;
+
+ /* Remember best path */
+ if (evi_start == NULL ||
+ list_length(path) < list_length(*best_path) ||
+ (list_length(path) == list_length(*best_path) &&
+ strcmp(evi_start->name, evi1->name) < 0))
+ {
+ evi_start = evi1;
+ *best_path = path;
+ }
+ }
+
+ return evi_start;
+}
+
+/*
+ * CREATE EXTENSION worker
+ *
+ * When CASCADE is specified, CreateExtensionInternal() recurses if required
+ * extensions need to be installed. To sanely handle cyclic dependencies,
+ * the "parents" list contains a list of names of extensions already being
+ * installed, allowing us to error out if we recurse to one of those.
+ */
+static ObjectAddress
+CreateExtensionInternal(char *extensionName,
+ char *schemaName,
+ const char *versionName,
+ bool cascade,
+ List *parents,
+ bool is_create)
+{
+ char *origSchemaName = schemaName;
+ Oid schemaOid = InvalidOid;
+ Oid extowner = GetUserId();
+ ExtensionControlFile *pcontrol;
+ ExtensionControlFile *control;
+ char *filename;
+ struct stat fst;
+ List *updateVersions;
+ List *requiredExtensions;
+ List *requiredSchemas;
+ Oid extensionOid;
+ ObjectAddress address;
+ ListCell *lc;
+
+ /*
+ * Read the primary control file. Note we assume that it does not contain
+ * any non-ASCII data, so there is no need to worry about encoding at this
+ * point.
+ */
+ pcontrol = read_extension_control_file(extensionName);
+
+ /*
+ * Determine the version to install
+ */
+ if (versionName == NULL)
+ {
+ if (pcontrol->default_version)
+ versionName = pcontrol->default_version;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("version to install must be specified")));
+ }
+ check_valid_version_name(versionName);
+
+ /*
+ * Figure out which script(s) we need to run to install the desired
+ * version of the extension. If we do not have a script that directly
+ * does what is needed, we try to find a sequence of update scripts that
+ * will get us there.
+ */
+ filename = get_extension_script_filename(pcontrol, NULL, versionName);
+ if (stat(filename, &fst) == 0)
+ {
+ /* Easy, no extra scripts */
+ updateVersions = NIL;
+ }
+ else
+ {
+ /* Look for best way to install this version */
+ List *evi_list;
+ ExtensionVersionInfo *evi_start;
+ ExtensionVersionInfo *evi_target;
+
+ /* Extract the version update graph from the script directory */
+ evi_list = get_ext_ver_list(pcontrol);
+
+ /* Identify the target version */
+ evi_target = get_ext_ver_info(versionName, &evi_list);
+
+ /* Identify best path to reach target */
+ evi_start = find_install_path(evi_list, evi_target,
+ &updateVersions);
+
+ /* Fail if no path ... */
+ if (evi_start == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("extension \"%s\" has no installation script nor update path for version \"%s\"",
+ pcontrol->name, versionName)));
+
+ /* Otherwise, install best starting point and then upgrade */
+ versionName = evi_start->name;
+ }
+
+ /*
+ * Fetch control parameters for installation target version
+ */
+ control = read_extension_aux_control_file(pcontrol, versionName);
+
+ /*
+ * Determine the target schema to install the extension into
+ */
+ if (schemaName)
+ {
+ /* If the user is giving us the schema name, it must exist already. */
+ schemaOid = get_namespace_oid(schemaName, false);
+ }
+
+ if (control->schema != NULL)
+ {
+ /*
+ * The extension is not relocatable and the author gave us a schema
+ * for it.
+ *
+ * Unless CASCADE parameter was given, it's an error to give a schema
+ * different from control->schema if control->schema is specified.
+ */
+ if (schemaName && strcmp(control->schema, schemaName) != 0 &&
+ !cascade)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("extension \"%s\" must be installed in schema \"%s\"",
+ control->name,
+ control->schema)));
+
+ /* Always use the schema from control file for current extension. */
+ schemaName = control->schema;
+
+ /* Find or create the schema in case it does not exist. */
+ schemaOid = get_namespace_oid(schemaName, true);
+
+ if (!OidIsValid(schemaOid))
+ {
+ CreateSchemaStmt *csstmt = makeNode(CreateSchemaStmt);
+
+ csstmt->schemaname = schemaName;
+ csstmt->authrole = NULL; /* will be created by current user */
+ csstmt->schemaElts = NIL;
+ csstmt->if_not_exists = false;
+ CreateSchemaCommand(csstmt, "(generated CREATE SCHEMA command)",
+ -1, -1);
+
+ /*
+ * CreateSchemaCommand includes CommandCounterIncrement, so new
+ * schema is now visible.
+ */
+ schemaOid = get_namespace_oid(schemaName, false);
+ }
+ }
+ else if (!OidIsValid(schemaOid))
+ {
+ /*
+ * Neither user nor author of the extension specified schema; use the
+ * current default creation namespace, which is the first explicit
+ * entry in the search_path.
+ */
+ List *search_path = fetch_search_path(false);
+
+ if (search_path == NIL) /* nothing valid in search_path? */
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_SCHEMA),
+ errmsg("no schema has been selected to create in")));
+ schemaOid = linitial_oid(search_path);
+ schemaName = get_namespace_name(schemaOid);
+ if (schemaName == NULL) /* recently-deleted namespace? */
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_SCHEMA),
+ errmsg("no schema has been selected to create in")));
+
+ list_free(search_path);
+ }
+
+ /*
+ * Make note if a temporary namespace has been accessed in this
+ * transaction.
+ */
+ if (isTempNamespace(schemaOid))
+ MyXactFlags |= XACT_FLAGS_ACCESSEDTEMPNAMESPACE;
+
+ /*
+ * We don't check creation rights on the target namespace here. If the
+ * extension script actually creates any objects there, it will fail if
+ * the user doesn't have such permissions. But there are cases such as
+ * procedural languages where it's convenient to set schema = pg_catalog
+ * yet we don't want to restrict the command to users with ACL_CREATE for
+ * pg_catalog.
+ */
+
+ /*
+ * Look up the prerequisite extensions, install them if necessary, and
+ * build lists of their OIDs and the OIDs of their target schemas.
+ */
+ requiredExtensions = NIL;
+ requiredSchemas = NIL;
+ foreach(lc, control->requires)
+ {
+ char *curreq = (char *) lfirst(lc);
+ Oid reqext;
+ Oid reqschema;
+
+ reqext = get_required_extension(curreq,
+ extensionName,
+ origSchemaName,
+ cascade,
+ parents,
+ is_create);
+ reqschema = get_extension_schema(reqext);
+ requiredExtensions = lappend_oid(requiredExtensions, reqext);
+ requiredSchemas = lappend_oid(requiredSchemas, reqschema);
+ }
+
+ /*
+ * Insert new tuple into pg_extension, and create dependency entries.
+ */
+ address = InsertExtensionTuple(control->name, extowner,
+ schemaOid, control->relocatable,
+ versionName,
+ PointerGetDatum(NULL),
+ PointerGetDatum(NULL),
+ requiredExtensions);
+ extensionOid = address.objectId;
+
+ /*
+ * Apply any control-file comment on extension
+ */
+ if (control->comment != NULL)
+ CreateComments(extensionOid, ExtensionRelationId, 0, control->comment);
+
+ /*
+ * Execute the installation script file
+ */
+ execute_extension_script(extensionOid, control,
+ NULL, versionName,
+ requiredSchemas,
+ schemaName, schemaOid);
+
+ /*
+ * If additional update scripts have to be executed, apply the updates as
+ * though a series of ALTER EXTENSION UPDATE commands were given
+ */
+ ApplyExtensionUpdates(extensionOid, pcontrol,
+ versionName, updateVersions,
+ origSchemaName, cascade, is_create);
+
+ return address;
+}
+
+/*
+ * Get the OID of an extension listed in "requires", possibly creating it.
+ */
+static Oid
+get_required_extension(char *reqExtensionName,
+ char *extensionName,
+ char *origSchemaName,
+ bool cascade,
+ List *parents,
+ bool is_create)
+{
+ Oid reqExtensionOid;
+
+ reqExtensionOid = get_extension_oid(reqExtensionName, true);
+ if (!OidIsValid(reqExtensionOid))
+ {
+ if (cascade)
+ {
+ /* Must install it. */
+ ObjectAddress addr;
+ List *cascade_parents;
+ ListCell *lc;
+
+ /* Check extension name validity before trying to cascade. */
+ check_valid_extension_name(reqExtensionName);
+
+ /* Check for cyclic dependency between extensions. */
+ foreach(lc, parents)
+ {
+ char *pname = (char *) lfirst(lc);
+
+ if (strcmp(pname, reqExtensionName) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_RECURSION),
+ errmsg("cyclic dependency detected between extensions \"%s\" and \"%s\"",
+ reqExtensionName, extensionName)));
+ }
+
+ ereport(NOTICE,
+ (errmsg("installing required extension \"%s\"",
+ reqExtensionName)));
+
+ /* Add current extension to list of parents to pass down. */
+ cascade_parents = lappend(list_copy(parents), extensionName);
+
+ /*
+ * Create the required extension. We propagate the SCHEMA option
+ * if any, and CASCADE, but no other options.
+ */
+ addr = CreateExtensionInternal(reqExtensionName,
+ origSchemaName,
+ NULL,
+ cascade,
+ cascade_parents,
+ is_create);
+
+ /* Get its newly-assigned OID. */
+ reqExtensionOid = addr.objectId;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("required extension \"%s\" is not installed",
+ reqExtensionName),
+ is_create ?
+ errhint("Use CREATE EXTENSION ... CASCADE to install required extensions too.") : 0));
+ }
+
+ return reqExtensionOid;
+}
+
+/*
+ * CREATE EXTENSION
+ */
+ObjectAddress
+CreateExtension(ParseState *pstate, CreateExtensionStmt *stmt)
+{
+ DefElem *d_schema = NULL;
+ DefElem *d_new_version = NULL;
+ DefElem *d_cascade = NULL;
+ char *schemaName = NULL;
+ char *versionName = NULL;
+ bool cascade = false;
+ ListCell *lc;
+
+ /* Check extension name validity before any filesystem access */
+ check_valid_extension_name(stmt->extname);
+
+ /*
+ * Check for duplicate extension name. The unique index on
+ * pg_extension.extname would catch this anyway, and serves as a backstop
+ * in case of race conditions; but this is a friendlier error message, and
+ * besides we need a check to support IF NOT EXISTS.
+ */
+ if (get_extension_oid(stmt->extname, true) != InvalidOid)
+ {
+ if (stmt->if_not_exists)
+ {
+ ereport(NOTICE,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("extension \"%s\" already exists, skipping",
+ stmt->extname)));
+ return InvalidObjectAddress;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("extension \"%s\" already exists",
+ stmt->extname)));
+ }
+
+ /*
+ * We use global variables to track the extension being created, so we can
+ * create only one extension at the same time.
+ */
+ if (creating_extension)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nested CREATE EXTENSION is not supported")));
+
+ /* Deconstruct the statement option list */
+ foreach(lc, stmt->options)
+ {
+ DefElem *defel = (DefElem *) lfirst(lc);
+
+ if (strcmp(defel->defname, "schema") == 0)
+ {
+ if (d_schema)
+ errorConflictingDefElem(defel, pstate);
+ d_schema = defel;
+ schemaName = defGetString(d_schema);
+ }
+ else if (strcmp(defel->defname, "new_version") == 0)
+ {
+ if (d_new_version)
+ errorConflictingDefElem(defel, pstate);
+ d_new_version = defel;
+ versionName = defGetString(d_new_version);
+ }
+ else if (strcmp(defel->defname, "cascade") == 0)
+ {
+ if (d_cascade)
+ errorConflictingDefElem(defel, pstate);
+ d_cascade = defel;
+ cascade = defGetBoolean(d_cascade);
+ }
+ else
+ elog(ERROR, "unrecognized option: %s", defel->defname);
+ }
+
+ /* Call CreateExtensionInternal to do the real work. */
+ return CreateExtensionInternal(stmt->extname,
+ schemaName,
+ versionName,
+ cascade,
+ NIL,
+ true);
+}
+
+/*
+ * InsertExtensionTuple
+ *
+ * Insert the new pg_extension row, and create extension's dependency entries.
+ * Return the OID assigned to the new row.
+ *
+ * This is exported for the benefit of pg_upgrade, which has to create a
+ * pg_extension entry (and the extension-level dependencies) without
+ * actually running the extension's script.
+ *
+ * extConfig and extCondition should be arrays or PointerGetDatum(NULL).
+ * We declare them as plain Datum to avoid needing array.h in extension.h.
+ */
+ObjectAddress
+InsertExtensionTuple(const char *extName, Oid extOwner,
+ Oid schemaOid, bool relocatable, const char *extVersion,
+ Datum extConfig, Datum extCondition,
+ List *requiredExtensions)
+{
+ Oid extensionOid;
+ Relation rel;
+ Datum values[Natts_pg_extension];
+ bool nulls[Natts_pg_extension];
+ HeapTuple tuple;
+ ObjectAddress myself;
+ ObjectAddress nsp;
+ ObjectAddresses *refobjs;
+ ListCell *lc;
+
+ /*
+ * Build and insert the pg_extension tuple
+ */
+ rel = table_open(ExtensionRelationId, RowExclusiveLock);
+
+ memset(values, 0, sizeof(values));
+ memset(nulls, 0, sizeof(nulls));
+
+ extensionOid = GetNewOidWithIndex(rel, ExtensionOidIndexId,
+ Anum_pg_extension_oid);
+ values[Anum_pg_extension_oid - 1] = ObjectIdGetDatum(extensionOid);
+ values[Anum_pg_extension_extname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(extName));
+ values[Anum_pg_extension_extowner - 1] = ObjectIdGetDatum(extOwner);
+ values[Anum_pg_extension_extnamespace - 1] = ObjectIdGetDatum(schemaOid);
+ values[Anum_pg_extension_extrelocatable - 1] = BoolGetDatum(relocatable);
+ values[Anum_pg_extension_extversion - 1] = CStringGetTextDatum(extVersion);
+
+ if (extConfig == PointerGetDatum(NULL))
+ nulls[Anum_pg_extension_extconfig - 1] = true;
+ else
+ values[Anum_pg_extension_extconfig - 1] = extConfig;
+
+ if (extCondition == PointerGetDatum(NULL))
+ nulls[Anum_pg_extension_extcondition - 1] = true;
+ else
+ values[Anum_pg_extension_extcondition - 1] = extCondition;
+
+ tuple = heap_form_tuple(rel->rd_att, values, nulls);
+
+ CatalogTupleInsert(rel, tuple);
+
+ heap_freetuple(tuple);
+ table_close(rel, RowExclusiveLock);
+
+ /*
+ * Record dependencies on owner, schema, and prerequisite extensions
+ */
+ recordDependencyOnOwner(ExtensionRelationId, extensionOid, extOwner);
+
+ refobjs = new_object_addresses();
+
+ ObjectAddressSet(myself, ExtensionRelationId, extensionOid);
+
+ ObjectAddressSet(nsp, NamespaceRelationId, schemaOid);
+ add_exact_object_address(&nsp, refobjs);
+
+ foreach(lc, requiredExtensions)
+ {
+ Oid reqext = lfirst_oid(lc);
+ ObjectAddress otherext;
+
+ ObjectAddressSet(otherext, ExtensionRelationId, reqext);
+ add_exact_object_address(&otherext, refobjs);
+ }
+
+ /* Record all of them (this includes duplicate elimination) */
+ record_object_address_dependencies(&myself, refobjs, DEPENDENCY_NORMAL);
+ free_object_addresses(refobjs);
+
+ /* Post creation hook for new extension */
+ InvokeObjectPostCreateHook(ExtensionRelationId, extensionOid, 0);
+
+ return myself;
+}
+
+/*
+ * Guts of extension deletion.
+ *
+ * All we need do here is remove the pg_extension tuple itself. Everything
+ * else is taken care of by the dependency infrastructure.
+ */
+void
+RemoveExtensionById(Oid extId)
+{
+ Relation rel;
+ SysScanDesc scandesc;
+ HeapTuple tuple;
+ ScanKeyData entry[1];
+
+ /*
+ * Disallow deletion of any extension that's currently open for insertion;
+ * else subsequent executions of recordDependencyOnCurrentExtension()
+ * could create dangling pg_depend records that refer to a no-longer-valid
+ * pg_extension OID. This is needed not so much because we think people
+ * might write "DROP EXTENSION foo" in foo's own script files, as because
+ * errors in dependency management in extension script files could give
+ * rise to cases where an extension is dropped as a result of recursing
+ * from some contained object. Because of that, we must test for the case
+ * here, not at some higher level of the DROP EXTENSION command.
+ */
+ if (extId == CurrentExtensionObject)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot drop extension \"%s\" because it is being modified",
+ get_extension_name(extId))));
+
+ rel = table_open(ExtensionRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&entry[0],
+ Anum_pg_extension_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(extId));
+ scandesc = systable_beginscan(rel, ExtensionOidIndexId, true,
+ NULL, 1, entry);
+
+ tuple = systable_getnext(scandesc);
+
+ /* We assume that there can be at most one matching tuple */
+ if (HeapTupleIsValid(tuple))
+ CatalogTupleDelete(rel, &tuple->t_self);
+
+ systable_endscan(scandesc);
+
+ table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * This function lists the available extensions (one row per primary control
+ * file in the control directory). We parse each control file and report the
+ * interesting fields.
+ *
+ * The system view pg_available_extensions provides a user interface to this
+ * SRF, adding information about whether the extensions are installed in the
+ * current DB.
+ */
+Datum
+pg_available_extensions(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ char *location;
+ DIR *dir;
+ struct dirent *de;
+
+ /* Build tuplestore to hold the result rows */
+ InitMaterializedSRF(fcinfo, 0);
+
+ location = get_extension_control_directory();
+ dir = AllocateDir(location);
+
+ /*
+ * If the control directory doesn't exist, we want to silently return an
+ * empty set. Any other error will be reported by ReadDir.
+ */
+ if (dir == NULL && errno == ENOENT)
+ {
+ /* do nothing */
+ }
+ else
+ {
+ while ((de = ReadDir(dir, location)) != NULL)
+ {
+ ExtensionControlFile *control;
+ char *extname;
+ Datum values[3];
+ bool nulls[3];
+
+ if (!is_extension_control_filename(de->d_name))
+ continue;
+
+ /* extract extension name from 'name.control' filename */
+ extname = pstrdup(de->d_name);
+ *strrchr(extname, '.') = '\0';
+
+ /* ignore it if it's an auxiliary control file */
+ if (strstr(extname, "--"))
+ continue;
+
+ control = read_extension_control_file(extname);
+
+ memset(values, 0, sizeof(values));
+ memset(nulls, 0, sizeof(nulls));
+
+ /* name */
+ values[0] = DirectFunctionCall1(namein,
+ CStringGetDatum(control->name));
+ /* default_version */
+ if (control->default_version == NULL)
+ nulls[1] = true;
+ else
+ values[1] = CStringGetTextDatum(control->default_version);
+ /* comment */
+ if (control->comment == NULL)
+ nulls[2] = true;
+ else
+ values[2] = CStringGetTextDatum(control->comment);
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+
+ FreeDir(dir);
+ }
+
+ return (Datum) 0;
+}
+
+/*
+ * This function lists the available extension versions (one row per
+ * extension installation script). For each version, we parse the related
+ * control file(s) and report the interesting fields.
+ *
+ * The system view pg_available_extension_versions provides a user interface
+ * to this SRF, adding information about which versions are installed in the
+ * current DB.
+ */
+Datum
+pg_available_extension_versions(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ char *location;
+ DIR *dir;
+ struct dirent *de;
+
+ /* Build tuplestore to hold the result rows */
+ InitMaterializedSRF(fcinfo, 0);
+
+ location = get_extension_control_directory();
+ dir = AllocateDir(location);
+
+ /*
+ * If the control directory doesn't exist, we want to silently return an
+ * empty set. Any other error will be reported by ReadDir.
+ */
+ if (dir == NULL && errno == ENOENT)
+ {
+ /* do nothing */
+ }
+ else
+ {
+ while ((de = ReadDir(dir, location)) != NULL)
+ {
+ ExtensionControlFile *control;
+ char *extname;
+
+ if (!is_extension_control_filename(de->d_name))
+ continue;
+
+ /* extract extension name from 'name.control' filename */
+ extname = pstrdup(de->d_name);
+ *strrchr(extname, '.') = '\0';
+
+ /* ignore it if it's an auxiliary control file */
+ if (strstr(extname, "--"))
+ continue;
+
+ /* read the control file */
+ control = read_extension_control_file(extname);
+
+ /* scan extension's script directory for install scripts */
+ get_available_versions_for_extension(control, rsinfo->setResult,
+ rsinfo->setDesc);
+ }
+
+ FreeDir(dir);
+ }
+
+ return (Datum) 0;
+}
+
+/*
+ * Inner loop for pg_available_extension_versions:
+ * read versions of one extension, add rows to tupstore
+ */
+static void
+get_available_versions_for_extension(ExtensionControlFile *pcontrol,
+ Tuplestorestate *tupstore,
+ TupleDesc tupdesc)
+{
+ List *evi_list;
+ ListCell *lc;
+
+ /* Extract the version update graph from the script directory */
+ evi_list = get_ext_ver_list(pcontrol);
+
+ /* For each installable version ... */
+ foreach(lc, evi_list)
+ {
+ ExtensionVersionInfo *evi = (ExtensionVersionInfo *) lfirst(lc);
+ ExtensionControlFile *control;
+ Datum values[8];
+ bool nulls[8];
+ ListCell *lc2;
+
+ if (!evi->installable)
+ continue;
+
+ /*
+ * Fetch parameters for specific version (pcontrol is not changed)
+ */
+ control = read_extension_aux_control_file(pcontrol, evi->name);
+
+ memset(values, 0, sizeof(values));
+ memset(nulls, 0, sizeof(nulls));
+
+ /* name */
+ values[0] = DirectFunctionCall1(namein,
+ CStringGetDatum(control->name));
+ /* version */
+ values[1] = CStringGetTextDatum(evi->name);
+ /* superuser */
+ values[2] = BoolGetDatum(control->superuser);
+ /* trusted */
+ values[3] = BoolGetDatum(control->trusted);
+ /* relocatable */
+ values[4] = BoolGetDatum(control->relocatable);
+ /* schema */
+ if (control->schema == NULL)
+ nulls[5] = true;
+ else
+ values[5] = DirectFunctionCall1(namein,
+ CStringGetDatum(control->schema));
+ /* requires */
+ if (control->requires == NIL)
+ nulls[6] = true;
+ else
+ values[6] = convert_requires_to_datum(control->requires);
+ /* comment */
+ if (control->comment == NULL)
+ nulls[7] = true;
+ else
+ values[7] = CStringGetTextDatum(control->comment);
+
+ tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+
+ /*
+ * Find all non-directly-installable versions that would be installed
+ * starting from this version, and report them, inheriting the
+ * parameters that aren't changed in updates from this version.
+ */
+ foreach(lc2, evi_list)
+ {
+ ExtensionVersionInfo *evi2 = (ExtensionVersionInfo *) lfirst(lc2);
+ List *best_path;
+
+ if (evi2->installable)
+ continue;
+ if (find_install_path(evi_list, evi2, &best_path) == evi)
+ {
+ /*
+ * Fetch parameters for this version (pcontrol is not changed)
+ */
+ control = read_extension_aux_control_file(pcontrol, evi2->name);
+
+ /* name stays the same */
+ /* version */
+ values[1] = CStringGetTextDatum(evi2->name);
+ /* superuser */
+ values[2] = BoolGetDatum(control->superuser);
+ /* trusted */
+ values[3] = BoolGetDatum(control->trusted);
+ /* relocatable */
+ values[4] = BoolGetDatum(control->relocatable);
+ /* schema stays the same */
+ /* requires */
+ if (control->requires == NIL)
+ nulls[6] = true;
+ else
+ {
+ values[6] = convert_requires_to_datum(control->requires);
+ nulls[6] = false;
+ }
+ /* comment stays the same */
+
+ tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+ }
+ }
+ }
+}
+
+/*
+ * Test whether the given extension exists (not whether it's installed)
+ *
+ * This checks for the existence of a matching control file in the extension
+ * directory. That's not a bulletproof check, since the file might be
+ * invalid, but this is only used for hints so it doesn't have to be 100%
+ * right.
+ */
+bool
+extension_file_exists(const char *extensionName)
+{
+ bool result = false;
+ char *location;
+ DIR *dir;
+ struct dirent *de;
+
+ location = get_extension_control_directory();
+ dir = AllocateDir(location);
+
+ /*
+ * If the control directory doesn't exist, we want to silently return
+ * false. Any other error will be reported by ReadDir.
+ */
+ if (dir == NULL && errno == ENOENT)
+ {
+ /* do nothing */
+ }
+ else
+ {
+ while ((de = ReadDir(dir, location)) != NULL)
+ {
+ char *extname;
+
+ if (!is_extension_control_filename(de->d_name))
+ continue;
+
+ /* extract extension name from 'name.control' filename */
+ extname = pstrdup(de->d_name);
+ *strrchr(extname, '.') = '\0';
+
+ /* ignore it if it's an auxiliary control file */
+ if (strstr(extname, "--"))
+ continue;
+
+ /* done if it matches request */
+ if (strcmp(extname, extensionName) == 0)
+ {
+ result = true;
+ break;
+ }
+ }
+
+ FreeDir(dir);
+ }
+
+ return result;
+}
+
+/*
+ * Convert a list of extension names to a name[] Datum
+ */
+static Datum
+convert_requires_to_datum(List *requires)
+{
+ Datum *datums;
+ int ndatums;
+ ArrayType *a;
+ ListCell *lc;
+
+ ndatums = list_length(requires);
+ datums = (Datum *) palloc(ndatums * sizeof(Datum));
+ ndatums = 0;
+ foreach(lc, requires)
+ {
+ char *curreq = (char *) lfirst(lc);
+
+ datums[ndatums++] =
+ DirectFunctionCall1(namein, CStringGetDatum(curreq));
+ }
+ a = construct_array(datums, ndatums,
+ NAMEOID,
+ NAMEDATALEN, false, TYPALIGN_CHAR);
+ return PointerGetDatum(a);
+}
+
+/*
+ * This function reports the version update paths that exist for the
+ * specified extension.
+ */
+Datum
+pg_extension_update_paths(PG_FUNCTION_ARGS)
+{
+ Name extname = PG_GETARG_NAME(0);
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ List *evi_list;
+ ExtensionControlFile *control;
+ ListCell *lc1;
+
+ /* Check extension name validity before any filesystem access */
+ check_valid_extension_name(NameStr(*extname));
+
+ /* Build tuplestore to hold the result rows */
+ InitMaterializedSRF(fcinfo, 0);
+
+ /* Read the extension's control file */
+ control = read_extension_control_file(NameStr(*extname));
+
+ /* Extract the version update graph from the script directory */
+ evi_list = get_ext_ver_list(control);
+
+ /* Iterate over all pairs of versions */
+ foreach(lc1, evi_list)
+ {
+ ExtensionVersionInfo *evi1 = (ExtensionVersionInfo *) lfirst(lc1);
+ ListCell *lc2;
+
+ foreach(lc2, evi_list)
+ {
+ ExtensionVersionInfo *evi2 = (ExtensionVersionInfo *) lfirst(lc2);
+ List *path;
+ Datum values[3];
+ bool nulls[3];
+
+ if (evi1 == evi2)
+ continue;
+
+ /* Find shortest path from evi1 to evi2 */
+ path = find_update_path(evi_list, evi1, evi2, false, true);
+
+ /* Emit result row */
+ memset(values, 0, sizeof(values));
+ memset(nulls, 0, sizeof(nulls));
+
+ /* source */
+ values[0] = CStringGetTextDatum(evi1->name);
+ /* target */
+ values[1] = CStringGetTextDatum(evi2->name);
+ /* path */
+ if (path == NIL)
+ nulls[2] = true;
+ else
+ {
+ StringInfoData pathbuf;
+ ListCell *lcv;
+
+ initStringInfo(&pathbuf);
+ /* The path doesn't include start vertex, but show it */
+ appendStringInfoString(&pathbuf, evi1->name);
+ foreach(lcv, path)
+ {
+ char *versionName = (char *) lfirst(lcv);
+
+ appendStringInfoString(&pathbuf, "--");
+ appendStringInfoString(&pathbuf, versionName);
+ }
+ values[2] = CStringGetTextDatum(pathbuf.data);
+ pfree(pathbuf.data);
+ }
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+ }
+
+ return (Datum) 0;
+}
+
+/*
+ * pg_extension_config_dump
+ *
+ * Record information about a configuration table that belongs to an
+ * extension being created, but whose contents should be dumped in whole
+ * or in part during pg_dump.
+ */
+Datum
+pg_extension_config_dump(PG_FUNCTION_ARGS)
+{
+ Oid tableoid = PG_GETARG_OID(0);
+ text *wherecond = PG_GETARG_TEXT_PP(1);
+ char *tablename;
+ Relation extRel;
+ ScanKeyData key[1];
+ SysScanDesc extScan;
+ HeapTuple extTup;
+ Datum arrayDatum;
+ Datum elementDatum;
+ int arrayLength;
+ int arrayIndex;
+ bool isnull;
+ Datum repl_val[Natts_pg_extension];
+ bool repl_null[Natts_pg_extension];
+ bool repl_repl[Natts_pg_extension];
+ ArrayType *a;
+
+ /*
+ * We only allow this to be called from an extension's SQL script. We
+ * shouldn't need any permissions check beyond that.
+ */
+ if (!creating_extension)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("%s can only be called from an SQL script executed by CREATE EXTENSION",
+ "pg_extension_config_dump()")));
+
+ /*
+ * Check that the table exists and is a member of the extension being
+ * created. This ensures that we don't need to register an additional
+ * dependency to protect the extconfig entry.
+ */
+ tablename = get_rel_name(tableoid);
+ if (tablename == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_TABLE),
+ errmsg("OID %u does not refer to a table", tableoid)));
+ if (getExtensionOfObject(RelationRelationId, tableoid) !=
+ CurrentExtensionObject)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("table \"%s\" is not a member of the extension being created",
+ tablename)));
+
+ /*
+ * Add the table OID and WHERE condition to the extension's extconfig and
+ * extcondition arrays.
+ *
+ * If the table is already in extconfig, treat this as an update of the
+ * WHERE condition.
+ */
+
+ /* Find the pg_extension tuple */
+ extRel = table_open(ExtensionRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_extension_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(CurrentExtensionObject));
+
+ extScan = systable_beginscan(extRel, ExtensionOidIndexId, true,
+ NULL, 1, key);
+
+ extTup = systable_getnext(extScan);
+
+ if (!HeapTupleIsValid(extTup)) /* should not happen */
+ elog(ERROR, "could not find tuple for extension %u",
+ CurrentExtensionObject);
+
+ memset(repl_val, 0, sizeof(repl_val));
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ /* Build or modify the extconfig value */
+ elementDatum = ObjectIdGetDatum(tableoid);
+
+ arrayDatum = heap_getattr(extTup, Anum_pg_extension_extconfig,
+ RelationGetDescr(extRel), &isnull);
+ if (isnull)
+ {
+ /* Previously empty extconfig, so build 1-element array */
+ arrayLength = 0;
+ arrayIndex = 1;
+
+ a = construct_array(&elementDatum, 1,
+ OIDOID,
+ sizeof(Oid), true, TYPALIGN_INT);
+ }
+ else
+ {
+ /* Modify or extend existing extconfig array */
+ Oid *arrayData;
+ int i;
+
+ a = DatumGetArrayTypeP(arrayDatum);
+
+ arrayLength = ARR_DIMS(a)[0];
+ if (ARR_NDIM(a) != 1 ||
+ ARR_LBOUND(a)[0] != 1 ||
+ arrayLength < 0 ||
+ ARR_HASNULL(a) ||
+ ARR_ELEMTYPE(a) != OIDOID)
+ elog(ERROR, "extconfig is not a 1-D Oid array");
+ arrayData = (Oid *) ARR_DATA_PTR(a);
+
+ arrayIndex = arrayLength + 1; /* set up to add after end */
+
+ for (i = 0; i < arrayLength; i++)
+ {
+ if (arrayData[i] == tableoid)
+ {
+ arrayIndex = i + 1; /* replace this element instead */
+ break;
+ }
+ }
+
+ a = array_set(a, 1, &arrayIndex,
+ elementDatum,
+ false,
+ -1 /* varlena array */ ,
+ sizeof(Oid) /* OID's typlen */ ,
+ true /* OID's typbyval */ ,
+ TYPALIGN_INT /* OID's typalign */ );
+ }
+ repl_val[Anum_pg_extension_extconfig - 1] = PointerGetDatum(a);
+ repl_repl[Anum_pg_extension_extconfig - 1] = true;
+
+ /* Build or modify the extcondition value */
+ elementDatum = PointerGetDatum(wherecond);
+
+ arrayDatum = heap_getattr(extTup, Anum_pg_extension_extcondition,
+ RelationGetDescr(extRel), &isnull);
+ if (isnull)
+ {
+ if (arrayLength != 0)
+ elog(ERROR, "extconfig and extcondition arrays do not match");
+
+ a = construct_array(&elementDatum, 1,
+ TEXTOID,
+ -1, false, TYPALIGN_INT);
+ }
+ else
+ {
+ a = DatumGetArrayTypeP(arrayDatum);
+
+ if (ARR_NDIM(a) != 1 ||
+ ARR_LBOUND(a)[0] != 1 ||
+ ARR_HASNULL(a) ||
+ ARR_ELEMTYPE(a) != TEXTOID)
+ elog(ERROR, "extcondition is not a 1-D text array");
+ if (ARR_DIMS(a)[0] != arrayLength)
+ elog(ERROR, "extconfig and extcondition arrays do not match");
+
+ /* Add or replace at same index as in extconfig */
+ a = array_set(a, 1, &arrayIndex,
+ elementDatum,
+ false,
+ -1 /* varlena array */ ,
+ -1 /* TEXT's typlen */ ,
+ false /* TEXT's typbyval */ ,
+ TYPALIGN_INT /* TEXT's typalign */ );
+ }
+ repl_val[Anum_pg_extension_extcondition - 1] = PointerGetDatum(a);
+ repl_repl[Anum_pg_extension_extcondition - 1] = true;
+
+ extTup = heap_modify_tuple(extTup, RelationGetDescr(extRel),
+ repl_val, repl_null, repl_repl);
+
+ CatalogTupleUpdate(extRel, &extTup->t_self, extTup);
+
+ systable_endscan(extScan);
+
+ table_close(extRel, RowExclusiveLock);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * extension_config_remove
+ *
+ * Remove the specified table OID from extension's extconfig, if present.
+ * This is not currently exposed as a function, but it could be;
+ * for now, we just invoke it from ALTER EXTENSION DROP.
+ */
+static void
+extension_config_remove(Oid extensionoid, Oid tableoid)
+{
+ Relation extRel;
+ ScanKeyData key[1];
+ SysScanDesc extScan;
+ HeapTuple extTup;
+ Datum arrayDatum;
+ int arrayLength;
+ int arrayIndex;
+ bool isnull;
+ Datum repl_val[Natts_pg_extension];
+ bool repl_null[Natts_pg_extension];
+ bool repl_repl[Natts_pg_extension];
+ ArrayType *a;
+
+ /* Find the pg_extension tuple */
+ extRel = table_open(ExtensionRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_extension_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(extensionoid));
+
+ extScan = systable_beginscan(extRel, ExtensionOidIndexId, true,
+ NULL, 1, key);
+
+ extTup = systable_getnext(extScan);
+
+ if (!HeapTupleIsValid(extTup)) /* should not happen */
+ elog(ERROR, "could not find tuple for extension %u",
+ extensionoid);
+
+ /* Search extconfig for the tableoid */
+ arrayDatum = heap_getattr(extTup, Anum_pg_extension_extconfig,
+ RelationGetDescr(extRel), &isnull);
+ if (isnull)
+ {
+ /* nothing to do */
+ a = NULL;
+ arrayLength = 0;
+ arrayIndex = -1;
+ }
+ else
+ {
+ Oid *arrayData;
+ int i;
+
+ a = DatumGetArrayTypeP(arrayDatum);
+
+ arrayLength = ARR_DIMS(a)[0];
+ if (ARR_NDIM(a) != 1 ||
+ ARR_LBOUND(a)[0] != 1 ||
+ arrayLength < 0 ||
+ ARR_HASNULL(a) ||
+ ARR_ELEMTYPE(a) != OIDOID)
+ elog(ERROR, "extconfig is not a 1-D Oid array");
+ arrayData = (Oid *) ARR_DATA_PTR(a);
+
+ arrayIndex = -1; /* flag for no deletion needed */
+
+ for (i = 0; i < arrayLength; i++)
+ {
+ if (arrayData[i] == tableoid)
+ {
+ arrayIndex = i; /* index to remove */
+ break;
+ }
+ }
+ }
+
+ /* If tableoid is not in extconfig, nothing to do */
+ if (arrayIndex < 0)
+ {
+ systable_endscan(extScan);
+ table_close(extRel, RowExclusiveLock);
+ return;
+ }
+
+ /* Modify or delete the extconfig value */
+ memset(repl_val, 0, sizeof(repl_val));
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ if (arrayLength <= 1)
+ {
+ /* removing only element, just set array to null */
+ repl_null[Anum_pg_extension_extconfig - 1] = true;
+ }
+ else
+ {
+ /* squeeze out the target element */
+ Datum *dvalues;
+ int nelems;
+ int i;
+
+ /* We already checked there are no nulls */
+ deconstruct_array(a, OIDOID, sizeof(Oid), true, TYPALIGN_INT,
+ &dvalues, NULL, &nelems);
+
+ for (i = arrayIndex; i < arrayLength - 1; i++)
+ dvalues[i] = dvalues[i + 1];
+
+ a = construct_array(dvalues, arrayLength - 1,
+ OIDOID, sizeof(Oid), true, TYPALIGN_INT);
+
+ repl_val[Anum_pg_extension_extconfig - 1] = PointerGetDatum(a);
+ }
+ repl_repl[Anum_pg_extension_extconfig - 1] = true;
+
+ /* Modify or delete the extcondition value */
+ arrayDatum = heap_getattr(extTup, Anum_pg_extension_extcondition,
+ RelationGetDescr(extRel), &isnull);
+ if (isnull)
+ {
+ elog(ERROR, "extconfig and extcondition arrays do not match");
+ }
+ else
+ {
+ a = DatumGetArrayTypeP(arrayDatum);
+
+ if (ARR_NDIM(a) != 1 ||
+ ARR_LBOUND(a)[0] != 1 ||
+ ARR_HASNULL(a) ||
+ ARR_ELEMTYPE(a) != TEXTOID)
+ elog(ERROR, "extcondition is not a 1-D text array");
+ if (ARR_DIMS(a)[0] != arrayLength)
+ elog(ERROR, "extconfig and extcondition arrays do not match");
+ }
+
+ if (arrayLength <= 1)
+ {
+ /* removing only element, just set array to null */
+ repl_null[Anum_pg_extension_extcondition - 1] = true;
+ }
+ else
+ {
+ /* squeeze out the target element */
+ Datum *dvalues;
+ int nelems;
+ int i;
+
+ /* We already checked there are no nulls */
+ deconstruct_array(a, TEXTOID, -1, false, TYPALIGN_INT,
+ &dvalues, NULL, &nelems);
+
+ for (i = arrayIndex; i < arrayLength - 1; i++)
+ dvalues[i] = dvalues[i + 1];
+
+ a = construct_array(dvalues, arrayLength - 1,
+ TEXTOID, -1, false, TYPALIGN_INT);
+
+ repl_val[Anum_pg_extension_extcondition - 1] = PointerGetDatum(a);
+ }
+ repl_repl[Anum_pg_extension_extcondition - 1] = true;
+
+ extTup = heap_modify_tuple(extTup, RelationGetDescr(extRel),
+ repl_val, repl_null, repl_repl);
+
+ CatalogTupleUpdate(extRel, &extTup->t_self, extTup);
+
+ systable_endscan(extScan);
+
+ table_close(extRel, RowExclusiveLock);
+}
+
+/*
+ * Execute ALTER EXTENSION SET SCHEMA
+ */
+ObjectAddress
+AlterExtensionNamespace(const char *extensionName, const char *newschema, Oid *oldschema)
+{
+ Oid extensionOid;
+ Oid nspOid;
+ Oid oldNspOid;
+ AclResult aclresult;
+ Relation extRel;
+ ScanKeyData key[2];
+ SysScanDesc extScan;
+ HeapTuple extTup;
+ Form_pg_extension extForm;
+ Relation depRel;
+ SysScanDesc depScan;
+ HeapTuple depTup;
+ ObjectAddresses *objsMoved;
+ ObjectAddress extAddr;
+
+ extensionOid = get_extension_oid(extensionName, false);
+
+ nspOid = LookupCreationNamespace(newschema);
+
+ /*
+ * Permission check: must own extension. Note that we don't bother to
+ * check ownership of the individual member objects ...
+ */
+ if (!pg_extension_ownercheck(extensionOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_EXTENSION,
+ extensionName);
+
+ /* Permission check: must have creation rights in target namespace */
+ aclresult = pg_namespace_aclcheck(nspOid, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA, newschema);
+
+ /*
+ * If the schema is currently a member of the extension, disallow moving
+ * the extension into the schema. That would create a dependency loop.
+ */
+ if (getExtensionOfObject(NamespaceRelationId, nspOid) == extensionOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot move extension \"%s\" into schema \"%s\" "
+ "because the extension contains the schema",
+ extensionName, newschema)));
+
+ /* Locate the pg_extension tuple */
+ extRel = table_open(ExtensionRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_extension_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(extensionOid));
+
+ extScan = systable_beginscan(extRel, ExtensionOidIndexId, true,
+ NULL, 1, key);
+
+ extTup = systable_getnext(extScan);
+
+ if (!HeapTupleIsValid(extTup)) /* should not happen */
+ elog(ERROR, "could not find tuple for extension %u",
+ extensionOid);
+
+ /* Copy tuple so we can modify it below */
+ extTup = heap_copytuple(extTup);
+ extForm = (Form_pg_extension) GETSTRUCT(extTup);
+
+ systable_endscan(extScan);
+
+ /*
+ * If the extension is already in the target schema, just silently do
+ * nothing.
+ */
+ if (extForm->extnamespace == nspOid)
+ {
+ table_close(extRel, RowExclusiveLock);
+ return InvalidObjectAddress;
+ }
+
+ /* Check extension is supposed to be relocatable */
+ if (!extForm->extrelocatable)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("extension \"%s\" does not support SET SCHEMA",
+ NameStr(extForm->extname))));
+
+ objsMoved = new_object_addresses();
+
+ /* store the OID of the namespace to-be-changed */
+ oldNspOid = extForm->extnamespace;
+
+ /*
+ * Scan pg_depend to find objects that depend directly on the extension,
+ * and alter each one's schema.
+ */
+ depRel = table_open(DependRelationId, AccessShareLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_depend_refclassid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(ExtensionRelationId));
+ ScanKeyInit(&key[1],
+ Anum_pg_depend_refobjid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(extensionOid));
+
+ depScan = systable_beginscan(depRel, DependReferenceIndexId, true,
+ NULL, 2, key);
+
+ while (HeapTupleIsValid(depTup = systable_getnext(depScan)))
+ {
+ Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup);
+ ObjectAddress dep;
+ Oid dep_oldNspOid;
+
+ /*
+ * Ignore non-membership dependencies. (Currently, the only other
+ * case we could see here is a normal dependency from another
+ * extension.)
+ */
+ if (pg_depend->deptype != DEPENDENCY_EXTENSION)
+ continue;
+
+ dep.classId = pg_depend->classid;
+ dep.objectId = pg_depend->objid;
+ dep.objectSubId = pg_depend->objsubid;
+
+ if (dep.objectSubId != 0) /* should not happen */
+ elog(ERROR, "extension should not have a sub-object dependency");
+
+ /* Relocate the object */
+ dep_oldNspOid = AlterObjectNamespace_oid(dep.classId,
+ dep.objectId,
+ nspOid,
+ objsMoved);
+
+ /*
+ * If not all the objects had the same old namespace (ignoring any
+ * that are not in namespaces), complain.
+ */
+ if (dep_oldNspOid != InvalidOid && dep_oldNspOid != oldNspOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("extension \"%s\" does not support SET SCHEMA",
+ NameStr(extForm->extname)),
+ errdetail("%s is not in the extension's schema \"%s\"",
+ getObjectDescription(&dep, false),
+ get_namespace_name(oldNspOid))));
+ }
+
+ /* report old schema, if caller wants it */
+ if (oldschema)
+ *oldschema = oldNspOid;
+
+ systable_endscan(depScan);
+
+ relation_close(depRel, AccessShareLock);
+
+ /* Now adjust pg_extension.extnamespace */
+ extForm->extnamespace = nspOid;
+
+ CatalogTupleUpdate(extRel, &extTup->t_self, extTup);
+
+ table_close(extRel, RowExclusiveLock);
+
+ /* update dependencies to point to the new schema */
+ changeDependencyFor(ExtensionRelationId, extensionOid,
+ NamespaceRelationId, oldNspOid, nspOid);
+
+ InvokeObjectPostAlterHook(ExtensionRelationId, extensionOid, 0);
+
+ ObjectAddressSet(extAddr, ExtensionRelationId, extensionOid);
+
+ return extAddr;
+}
+
+/*
+ * Execute ALTER EXTENSION UPDATE
+ */
+ObjectAddress
+ExecAlterExtensionStmt(ParseState *pstate, AlterExtensionStmt *stmt)
+{
+ DefElem *d_new_version = NULL;
+ char *versionName;
+ char *oldVersionName;
+ ExtensionControlFile *control;
+ Oid extensionOid;
+ Relation extRel;
+ ScanKeyData key[1];
+ SysScanDesc extScan;
+ HeapTuple extTup;
+ List *updateVersions;
+ Datum datum;
+ bool isnull;
+ ListCell *lc;
+ ObjectAddress address;
+
+ /*
+ * We use global variables to track the extension being created, so we can
+ * create/update only one extension at the same time.
+ */
+ if (creating_extension)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nested ALTER EXTENSION is not supported")));
+
+ /*
+ * Look up the extension --- it must already exist in pg_extension
+ */
+ extRel = table_open(ExtensionRelationId, AccessShareLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_extension_extname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(stmt->extname));
+
+ extScan = systable_beginscan(extRel, ExtensionNameIndexId, true,
+ NULL, 1, key);
+
+ extTup = systable_getnext(extScan);
+
+ if (!HeapTupleIsValid(extTup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("extension \"%s\" does not exist",
+ stmt->extname)));
+
+ extensionOid = ((Form_pg_extension) GETSTRUCT(extTup))->oid;
+
+ /*
+ * Determine the existing version we are updating from
+ */
+ datum = heap_getattr(extTup, Anum_pg_extension_extversion,
+ RelationGetDescr(extRel), &isnull);
+ if (isnull)
+ elog(ERROR, "extversion is null");
+ oldVersionName = text_to_cstring(DatumGetTextPP(datum));
+
+ systable_endscan(extScan);
+
+ table_close(extRel, AccessShareLock);
+
+ /* Permission check: must own extension */
+ if (!pg_extension_ownercheck(extensionOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_EXTENSION,
+ stmt->extname);
+
+ /*
+ * Read the primary control file. Note we assume that it does not contain
+ * any non-ASCII data, so there is no need to worry about encoding at this
+ * point.
+ */
+ control = read_extension_control_file(stmt->extname);
+
+ /*
+ * Read the statement option list
+ */
+ foreach(lc, stmt->options)
+ {
+ DefElem *defel = (DefElem *) lfirst(lc);
+
+ if (strcmp(defel->defname, "new_version") == 0)
+ {
+ if (d_new_version)
+ errorConflictingDefElem(defel, pstate);
+ d_new_version = defel;
+ }
+ else
+ elog(ERROR, "unrecognized option: %s", defel->defname);
+ }
+
+ /*
+ * Determine the version to update to
+ */
+ if (d_new_version && d_new_version->arg)
+ versionName = strVal(d_new_version->arg);
+ else if (control->default_version)
+ versionName = control->default_version;
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("version to install must be specified")));
+ versionName = NULL; /* keep compiler quiet */
+ }
+ check_valid_version_name(versionName);
+
+ /*
+ * If we're already at that version, just say so
+ */
+ if (strcmp(oldVersionName, versionName) == 0)
+ {
+ ereport(NOTICE,
+ (errmsg("version \"%s\" of extension \"%s\" is already installed",
+ versionName, stmt->extname)));
+ return InvalidObjectAddress;
+ }
+
+ /*
+ * Identify the series of update script files we need to execute
+ */
+ updateVersions = identify_update_path(control,
+ oldVersionName,
+ versionName);
+
+ /*
+ * Update the pg_extension row and execute the update scripts, one at a
+ * time
+ */
+ ApplyExtensionUpdates(extensionOid, control,
+ oldVersionName, updateVersions,
+ NULL, false, false);
+
+ ObjectAddressSet(address, ExtensionRelationId, extensionOid);
+
+ return address;
+}
+
+/*
+ * Apply a series of update scripts as though individual ALTER EXTENSION
+ * UPDATE commands had been given, including altering the pg_extension row
+ * and dependencies each time.
+ *
+ * This might be more work than necessary, but it ensures that old update
+ * scripts don't break if newer versions have different control parameters.
+ */
+static void
+ApplyExtensionUpdates(Oid extensionOid,
+ ExtensionControlFile *pcontrol,
+ const char *initialVersion,
+ List *updateVersions,
+ char *origSchemaName,
+ bool cascade,
+ bool is_create)
+{
+ const char *oldVersionName = initialVersion;
+ ListCell *lcv;
+
+ foreach(lcv, updateVersions)
+ {
+ char *versionName = (char *) lfirst(lcv);
+ ExtensionControlFile *control;
+ char *schemaName;
+ Oid schemaOid;
+ List *requiredExtensions;
+ List *requiredSchemas;
+ Relation extRel;
+ ScanKeyData key[1];
+ SysScanDesc extScan;
+ HeapTuple extTup;
+ Form_pg_extension extForm;
+ Datum values[Natts_pg_extension];
+ bool nulls[Natts_pg_extension];
+ bool repl[Natts_pg_extension];
+ ObjectAddress myself;
+ ListCell *lc;
+
+ /*
+ * Fetch parameters for specific version (pcontrol is not changed)
+ */
+ control = read_extension_aux_control_file(pcontrol, versionName);
+
+ /* Find the pg_extension tuple */
+ extRel = table_open(ExtensionRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_extension_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(extensionOid));
+
+ extScan = systable_beginscan(extRel, ExtensionOidIndexId, true,
+ NULL, 1, key);
+
+ extTup = systable_getnext(extScan);
+
+ if (!HeapTupleIsValid(extTup)) /* should not happen */
+ elog(ERROR, "could not find tuple for extension %u",
+ extensionOid);
+
+ extForm = (Form_pg_extension) GETSTRUCT(extTup);
+
+ /*
+ * Determine the target schema (set by original install)
+ */
+ schemaOid = extForm->extnamespace;
+ schemaName = get_namespace_name(schemaOid);
+
+ /*
+ * Modify extrelocatable and extversion in the pg_extension tuple
+ */
+ memset(values, 0, sizeof(values));
+ memset(nulls, 0, sizeof(nulls));
+ memset(repl, 0, sizeof(repl));
+
+ values[Anum_pg_extension_extrelocatable - 1] =
+ BoolGetDatum(control->relocatable);
+ repl[Anum_pg_extension_extrelocatable - 1] = true;
+ values[Anum_pg_extension_extversion - 1] =
+ CStringGetTextDatum(versionName);
+ repl[Anum_pg_extension_extversion - 1] = true;
+
+ extTup = heap_modify_tuple(extTup, RelationGetDescr(extRel),
+ values, nulls, repl);
+
+ CatalogTupleUpdate(extRel, &extTup->t_self, extTup);
+
+ systable_endscan(extScan);
+
+ table_close(extRel, RowExclusiveLock);
+
+ /*
+ * Look up the prerequisite extensions for this version, install them
+ * if necessary, and build lists of their OIDs and the OIDs of their
+ * target schemas.
+ */
+ requiredExtensions = NIL;
+ requiredSchemas = NIL;
+ foreach(lc, control->requires)
+ {
+ char *curreq = (char *) lfirst(lc);
+ Oid reqext;
+ Oid reqschema;
+
+ reqext = get_required_extension(curreq,
+ control->name,
+ origSchemaName,
+ cascade,
+ NIL,
+ is_create);
+ reqschema = get_extension_schema(reqext);
+ requiredExtensions = lappend_oid(requiredExtensions, reqext);
+ requiredSchemas = lappend_oid(requiredSchemas, reqschema);
+ }
+
+ /*
+ * Remove and recreate dependencies on prerequisite extensions
+ */
+ deleteDependencyRecordsForClass(ExtensionRelationId, extensionOid,
+ ExtensionRelationId,
+ DEPENDENCY_NORMAL);
+
+ myself.classId = ExtensionRelationId;
+ myself.objectId = extensionOid;
+ myself.objectSubId = 0;
+
+ foreach(lc, requiredExtensions)
+ {
+ Oid reqext = lfirst_oid(lc);
+ ObjectAddress otherext;
+
+ otherext.classId = ExtensionRelationId;
+ otherext.objectId = reqext;
+ otherext.objectSubId = 0;
+
+ recordDependencyOn(&myself, &otherext, DEPENDENCY_NORMAL);
+ }
+
+ InvokeObjectPostAlterHook(ExtensionRelationId, extensionOid, 0);
+
+ /*
+ * Finally, execute the update script file
+ */
+ execute_extension_script(extensionOid, control,
+ oldVersionName, versionName,
+ requiredSchemas,
+ schemaName, schemaOid);
+
+ /*
+ * Update prior-version name and loop around. Since
+ * execute_sql_string did a final CommandCounterIncrement, we can
+ * update the pg_extension row again.
+ */
+ oldVersionName = versionName;
+ }
+}
+
+/*
+ * Execute ALTER EXTENSION ADD/DROP
+ *
+ * Return value is the address of the altered extension.
+ *
+ * objAddr is an output argument which, if not NULL, is set to the address of
+ * the added/dropped object.
+ */
+ObjectAddress
+ExecAlterExtensionContentsStmt(AlterExtensionContentsStmt *stmt,
+ ObjectAddress *objAddr)
+{
+ ObjectAddress extension;
+ ObjectAddress object;
+ Relation relation;
+ Oid oldExtension;
+
+ switch (stmt->objtype)
+ {
+ case OBJECT_DATABASE:
+ case OBJECT_EXTENSION:
+ case OBJECT_INDEX:
+ case OBJECT_PUBLICATION:
+ case OBJECT_ROLE:
+ case OBJECT_STATISTIC_EXT:
+ case OBJECT_SUBSCRIPTION:
+ case OBJECT_TABLESPACE:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot add an object of this type to an extension")));
+ break;
+ default:
+ /* OK */
+ break;
+ }
+
+ /*
+ * Find the extension and acquire a lock on it, to ensure it doesn't get
+ * dropped concurrently. A sharable lock seems sufficient: there's no
+ * reason not to allow other sorts of manipulations, such as add/drop of
+ * other objects, to occur concurrently. Concurrently adding/dropping the
+ * *same* object would be bad, but we prevent that by using a non-sharable
+ * lock on the individual object, below.
+ */
+ extension = get_object_address(OBJECT_EXTENSION,
+ (Node *) makeString(stmt->extname),
+ &relation, AccessShareLock, false);
+
+ /* Permission check: must own extension */
+ if (!pg_extension_ownercheck(extension.objectId, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_EXTENSION,
+ stmt->extname);
+
+ /*
+ * Translate the parser representation that identifies the object into an
+ * ObjectAddress. get_object_address() will throw an error if the object
+ * does not exist, and will also acquire a lock on the object to guard
+ * against concurrent DROP and ALTER EXTENSION ADD/DROP operations.
+ */
+ object = get_object_address(stmt->objtype, stmt->object,
+ &relation, ShareUpdateExclusiveLock, false);
+
+ Assert(object.objectSubId == 0);
+ if (objAddr)
+ *objAddr = object;
+
+ /* Permission check: must own target object, too */
+ check_object_ownership(GetUserId(), stmt->objtype, object,
+ stmt->object, relation);
+
+ /*
+ * Check existing extension membership.
+ */
+ oldExtension = getExtensionOfObject(object.classId, object.objectId);
+
+ if (stmt->action > 0)
+ {
+ /*
+ * ADD, so complain if object is already attached to some extension.
+ */
+ if (OidIsValid(oldExtension))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("%s is already a member of extension \"%s\"",
+ getObjectDescription(&object, false),
+ get_extension_name(oldExtension))));
+
+ /*
+ * Prevent a schema from being added to an extension if the schema
+ * contains the extension. That would create a dependency loop.
+ */
+ if (object.classId == NamespaceRelationId &&
+ object.objectId == get_extension_schema(extension.objectId))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot add schema \"%s\" to extension \"%s\" "
+ "because the schema contains the extension",
+ get_namespace_name(object.objectId),
+ stmt->extname)));
+
+ /*
+ * OK, add the dependency.
+ */
+ recordDependencyOn(&object, &extension, DEPENDENCY_EXTENSION);
+
+ /*
+ * Also record the initial ACL on the object, if any.
+ *
+ * Note that this will handle the object's ACLs, as well as any ACLs
+ * on object subIds. (In other words, when the object is a table,
+ * this will record the table's ACL and the ACLs for the columns on
+ * the table, if any).
+ */
+ recordExtObjInitPriv(object.objectId, object.classId);
+ }
+ else
+ {
+ /*
+ * DROP, so complain if it's not a member.
+ */
+ if (oldExtension != extension.objectId)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("%s is not a member of extension \"%s\"",
+ getObjectDescription(&object, false),
+ stmt->extname)));
+
+ /*
+ * OK, drop the dependency.
+ */
+ if (deleteDependencyRecordsForClass(object.classId, object.objectId,
+ ExtensionRelationId,
+ DEPENDENCY_EXTENSION) != 1)
+ elog(ERROR, "unexpected number of extension dependency records");
+
+ /*
+ * If it's a relation, it might have an entry in the extension's
+ * extconfig array, which we must remove.
+ */
+ if (object.classId == RelationRelationId)
+ extension_config_remove(extension.objectId, object.objectId);
+
+ /*
+ * Remove all the initial ACLs, if any.
+ *
+ * Note that this will remove the object's ACLs, as well as any ACLs
+ * on object subIds. (In other words, when the object is a table,
+ * this will remove the table's ACL and the ACLs for the columns on
+ * the table, if any).
+ */
+ removeExtObjInitPriv(object.objectId, object.classId);
+ }
+
+ InvokeObjectPostAlterHook(ExtensionRelationId, extension.objectId, 0);
+
+ /*
+ * If get_object_address() opened the relation for us, we close it to keep
+ * the reference count correct - but we retain any locks acquired by
+ * get_object_address() until commit time, to guard against concurrent
+ * activity.
+ */
+ if (relation != NULL)
+ relation_close(relation, NoLock);
+
+ return extension;
+}
+
+/*
+ * Read the whole of file into memory.
+ *
+ * The file contents are returned as a single palloc'd chunk. For convenience
+ * of the callers, an extra \0 byte is added to the end.
+ */
+static char *
+read_whole_file(const char *filename, int *length)
+{
+ char *buf;
+ FILE *file;
+ size_t bytes_to_read;
+ struct stat fst;
+
+ if (stat(filename, &fst) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m", filename)));
+
+ if (fst.st_size > (MaxAllocSize - 1))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("file \"%s\" is too large", filename)));
+ bytes_to_read = (size_t) fst.st_size;
+
+ if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\" for reading: %m",
+ filename)));
+
+ buf = (char *) palloc(bytes_to_read + 1);
+
+ *length = fread(buf, 1, bytes_to_read, file);
+
+ if (ferror(file))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m", filename)));
+
+ FreeFile(file);
+
+ buf[*length] = '\0';
+ return buf;
+}
diff --git a/src/backend/commands/foreigncmds.c b/src/backend/commands/foreigncmds.c
new file mode 100644
index 0000000..91f4dd3
--- /dev/null
+++ b/src/backend/commands/foreigncmds.c
@@ -0,0 +1,1617 @@
+/*-------------------------------------------------------------------------
+ *
+ * foreigncmds.c
+ * foreign-data wrapper/server creation/manipulation commands
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/foreigncmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/reloptions.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_foreign_data_wrapper.h"
+#include "catalog/pg_foreign_server.h"
+#include "catalog/pg_foreign_table.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_type.h"
+#include "catalog/pg_user_mapping.h"
+#include "commands/defrem.h"
+#include "foreign/fdwapi.h"
+#include "foreign/foreign.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+typedef struct
+{
+ char *tablename;
+ char *cmd;
+} import_error_callback_arg;
+
+/* Internal functions */
+static void import_error_callback(void *arg);
+
+
+/*
+ * Convert a DefElem list to the text array format that is used in
+ * pg_foreign_data_wrapper, pg_foreign_server, pg_user_mapping, and
+ * pg_foreign_table.
+ *
+ * Returns the array in the form of a Datum, or PointerGetDatum(NULL)
+ * if the list is empty.
+ *
+ * Note: The array is usually stored to database without further
+ * processing, hence any validation should be done before this
+ * conversion.
+ */
+static Datum
+optionListToArray(List *options)
+{
+ ArrayBuildState *astate = NULL;
+ ListCell *cell;
+
+ foreach(cell, options)
+ {
+ DefElem *def = lfirst(cell);
+ const char *value;
+ Size len;
+ text *t;
+
+ value = defGetString(def);
+ len = VARHDRSZ + strlen(def->defname) + 1 + strlen(value);
+ t = palloc(len + 1);
+ SET_VARSIZE(t, len);
+ sprintf(VARDATA(t), "%s=%s", def->defname, value);
+
+ astate = accumArrayResult(astate, PointerGetDatum(t),
+ false, TEXTOID,
+ CurrentMemoryContext);
+ }
+
+ if (astate)
+ return makeArrayResult(astate, CurrentMemoryContext);
+
+ return PointerGetDatum(NULL);
+}
+
+
+/*
+ * Transform a list of DefElem into text array format. This is substantially
+ * the same thing as optionListToArray(), except we recognize SET/ADD/DROP
+ * actions for modifying an existing list of options, which is passed in
+ * Datum form as oldOptions. Also, if fdwvalidator isn't InvalidOid
+ * it specifies a validator function to call on the result.
+ *
+ * Returns the array in the form of a Datum, or PointerGetDatum(NULL)
+ * if the list is empty.
+ *
+ * This is used by CREATE/ALTER of FOREIGN DATA WRAPPER/SERVER/USER MAPPING/
+ * FOREIGN TABLE.
+ */
+Datum
+transformGenericOptions(Oid catalogId,
+ Datum oldOptions,
+ List *options,
+ Oid fdwvalidator)
+{
+ List *resultOptions = untransformRelOptions(oldOptions);
+ ListCell *optcell;
+ Datum result;
+
+ foreach(optcell, options)
+ {
+ DefElem *od = lfirst(optcell);
+ ListCell *cell;
+
+ /*
+ * Find the element in resultOptions. We need this for validation in
+ * all cases.
+ */
+ foreach(cell, resultOptions)
+ {
+ DefElem *def = lfirst(cell);
+
+ if (strcmp(def->defname, od->defname) == 0)
+ break;
+ }
+
+ /*
+ * It is possible to perform multiple SET/DROP actions on the same
+ * option. The standard permits this, as long as the options to be
+ * added are unique. Note that an unspecified action is taken to be
+ * ADD.
+ */
+ switch (od->defaction)
+ {
+ case DEFELEM_DROP:
+ if (!cell)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("option \"%s\" not found",
+ od->defname)));
+ resultOptions = list_delete_cell(resultOptions, cell);
+ break;
+
+ case DEFELEM_SET:
+ if (!cell)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("option \"%s\" not found",
+ od->defname)));
+ lfirst(cell) = od;
+ break;
+
+ case DEFELEM_ADD:
+ case DEFELEM_UNSPEC:
+ if (cell)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("option \"%s\" provided more than once",
+ od->defname)));
+ resultOptions = lappend(resultOptions, od);
+ break;
+
+ default:
+ elog(ERROR, "unrecognized action %d on option \"%s\"",
+ (int) od->defaction, od->defname);
+ break;
+ }
+ }
+
+ result = optionListToArray(resultOptions);
+
+ if (OidIsValid(fdwvalidator))
+ {
+ Datum valarg = result;
+
+ /*
+ * Pass a null options list as an empty array, so that validators
+ * don't have to be declared non-strict to handle the case.
+ */
+ if (DatumGetPointer(valarg) == NULL)
+ valarg = PointerGetDatum(construct_empty_array(TEXTOID));
+ OidFunctionCall2(fdwvalidator, valarg, ObjectIdGetDatum(catalogId));
+ }
+
+ return result;
+}
+
+
+/*
+ * Internal workhorse for changing a data wrapper's owner.
+ *
+ * Allow this only for superusers; also the new owner must be a
+ * superuser.
+ */
+static void
+AlterForeignDataWrapperOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId)
+{
+ Form_pg_foreign_data_wrapper form;
+ Datum repl_val[Natts_pg_foreign_data_wrapper];
+ bool repl_null[Natts_pg_foreign_data_wrapper];
+ bool repl_repl[Natts_pg_foreign_data_wrapper];
+ Acl *newAcl;
+ Datum aclDatum;
+ bool isNull;
+
+ form = (Form_pg_foreign_data_wrapper) GETSTRUCT(tup);
+
+ /* Must be a superuser to change a FDW owner */
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to change owner of foreign-data wrapper \"%s\"",
+ NameStr(form->fdwname)),
+ errhint("Must be superuser to change owner of a foreign-data wrapper.")));
+
+ /* New owner must also be a superuser */
+ if (!superuser_arg(newOwnerId))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to change owner of foreign-data wrapper \"%s\"",
+ NameStr(form->fdwname)),
+ errhint("The owner of a foreign-data wrapper must be a superuser.")));
+
+ if (form->fdwowner != newOwnerId)
+ {
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ repl_repl[Anum_pg_foreign_data_wrapper_fdwowner - 1] = true;
+ repl_val[Anum_pg_foreign_data_wrapper_fdwowner - 1] = ObjectIdGetDatum(newOwnerId);
+
+ aclDatum = heap_getattr(tup,
+ Anum_pg_foreign_data_wrapper_fdwacl,
+ RelationGetDescr(rel),
+ &isNull);
+ /* Null ACLs do not require changes */
+ if (!isNull)
+ {
+ newAcl = aclnewowner(DatumGetAclP(aclDatum),
+ form->fdwowner, newOwnerId);
+ repl_repl[Anum_pg_foreign_data_wrapper_fdwacl - 1] = true;
+ repl_val[Anum_pg_foreign_data_wrapper_fdwacl - 1] = PointerGetDatum(newAcl);
+ }
+
+ tup = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, repl_null,
+ repl_repl);
+
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+ /* Update owner dependency reference */
+ changeDependencyOnOwner(ForeignDataWrapperRelationId,
+ form->oid,
+ newOwnerId);
+ }
+
+ InvokeObjectPostAlterHook(ForeignDataWrapperRelationId,
+ form->oid, 0);
+}
+
+/*
+ * Change foreign-data wrapper owner -- by name
+ *
+ * Note restrictions in the "_internal" function, above.
+ */
+ObjectAddress
+AlterForeignDataWrapperOwner(const char *name, Oid newOwnerId)
+{
+ Oid fdwId;
+ HeapTuple tup;
+ Relation rel;
+ ObjectAddress address;
+ Form_pg_foreign_data_wrapper form;
+
+
+ rel = table_open(ForeignDataWrapperRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(FOREIGNDATAWRAPPERNAME, CStringGetDatum(name));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("foreign-data wrapper \"%s\" does not exist", name)));
+
+ form = (Form_pg_foreign_data_wrapper) GETSTRUCT(tup);
+ fdwId = form->oid;
+
+ AlterForeignDataWrapperOwner_internal(rel, tup, newOwnerId);
+
+ ObjectAddressSet(address, ForeignDataWrapperRelationId, fdwId);
+
+ heap_freetuple(tup);
+
+ table_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * Change foreign-data wrapper owner -- by OID
+ *
+ * Note restrictions in the "_internal" function, above.
+ */
+void
+AlterForeignDataWrapperOwner_oid(Oid fwdId, Oid newOwnerId)
+{
+ HeapTuple tup;
+ Relation rel;
+
+ rel = table_open(ForeignDataWrapperRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(FOREIGNDATAWRAPPEROID, ObjectIdGetDatum(fwdId));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("foreign-data wrapper with OID %u does not exist", fwdId)));
+
+ AlterForeignDataWrapperOwner_internal(rel, tup, newOwnerId);
+
+ heap_freetuple(tup);
+
+ table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Internal workhorse for changing a foreign server's owner
+ */
+static void
+AlterForeignServerOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId)
+{
+ Form_pg_foreign_server form;
+ Datum repl_val[Natts_pg_foreign_server];
+ bool repl_null[Natts_pg_foreign_server];
+ bool repl_repl[Natts_pg_foreign_server];
+ Acl *newAcl;
+ Datum aclDatum;
+ bool isNull;
+
+ form = (Form_pg_foreign_server) GETSTRUCT(tup);
+
+ if (form->srvowner != newOwnerId)
+ {
+ /* Superusers can always do it */
+ if (!superuser())
+ {
+ Oid srvId;
+ AclResult aclresult;
+
+ srvId = form->oid;
+
+ /* Must be owner */
+ if (!pg_foreign_server_ownercheck(srvId, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FOREIGN_SERVER,
+ NameStr(form->srvname));
+
+ /* Must be able to become new owner */
+ check_is_member_of_role(GetUserId(), newOwnerId);
+
+ /* New owner must have USAGE privilege on foreign-data wrapper */
+ aclresult = pg_foreign_data_wrapper_aclcheck(form->srvfdw, newOwnerId, ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ {
+ ForeignDataWrapper *fdw = GetForeignDataWrapper(form->srvfdw);
+
+ aclcheck_error(aclresult, OBJECT_FDW, fdw->fdwname);
+ }
+ }
+
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ repl_repl[Anum_pg_foreign_server_srvowner - 1] = true;
+ repl_val[Anum_pg_foreign_server_srvowner - 1] = ObjectIdGetDatum(newOwnerId);
+
+ aclDatum = heap_getattr(tup,
+ Anum_pg_foreign_server_srvacl,
+ RelationGetDescr(rel),
+ &isNull);
+ /* Null ACLs do not require changes */
+ if (!isNull)
+ {
+ newAcl = aclnewowner(DatumGetAclP(aclDatum),
+ form->srvowner, newOwnerId);
+ repl_repl[Anum_pg_foreign_server_srvacl - 1] = true;
+ repl_val[Anum_pg_foreign_server_srvacl - 1] = PointerGetDatum(newAcl);
+ }
+
+ tup = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, repl_null,
+ repl_repl);
+
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+ /* Update owner dependency reference */
+ changeDependencyOnOwner(ForeignServerRelationId, form->oid,
+ newOwnerId);
+ }
+
+ InvokeObjectPostAlterHook(ForeignServerRelationId,
+ form->oid, 0);
+}
+
+/*
+ * Change foreign server owner -- by name
+ */
+ObjectAddress
+AlterForeignServerOwner(const char *name, Oid newOwnerId)
+{
+ Oid servOid;
+ HeapTuple tup;
+ Relation rel;
+ ObjectAddress address;
+ Form_pg_foreign_server form;
+
+ rel = table_open(ForeignServerRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(FOREIGNSERVERNAME, CStringGetDatum(name));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("server \"%s\" does not exist", name)));
+
+ form = (Form_pg_foreign_server) GETSTRUCT(tup);
+ servOid = form->oid;
+
+ AlterForeignServerOwner_internal(rel, tup, newOwnerId);
+
+ ObjectAddressSet(address, ForeignServerRelationId, servOid);
+
+ heap_freetuple(tup);
+
+ table_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * Change foreign server owner -- by OID
+ */
+void
+AlterForeignServerOwner_oid(Oid srvId, Oid newOwnerId)
+{
+ HeapTuple tup;
+ Relation rel;
+
+ rel = table_open(ForeignServerRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(FOREIGNSERVEROID, ObjectIdGetDatum(srvId));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("foreign server with OID %u does not exist", srvId)));
+
+ AlterForeignServerOwner_internal(rel, tup, newOwnerId);
+
+ heap_freetuple(tup);
+
+ table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Convert a handler function name passed from the parser to an Oid.
+ */
+static Oid
+lookup_fdw_handler_func(DefElem *handler)
+{
+ Oid handlerOid;
+
+ if (handler == NULL || handler->arg == NULL)
+ return InvalidOid;
+
+ /* handlers have no arguments */
+ handlerOid = LookupFuncName((List *) handler->arg, 0, NULL, false);
+
+ /* check that handler has correct return type */
+ if (get_func_rettype(handlerOid) != FDW_HANDLEROID)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("function %s must return type %s",
+ NameListToString((List *) handler->arg), "fdw_handler")));
+
+ return handlerOid;
+}
+
+/*
+ * Convert a validator function name passed from the parser to an Oid.
+ */
+static Oid
+lookup_fdw_validator_func(DefElem *validator)
+{
+ Oid funcargtypes[2];
+
+ if (validator == NULL || validator->arg == NULL)
+ return InvalidOid;
+
+ /* validators take text[], oid */
+ funcargtypes[0] = TEXTARRAYOID;
+ funcargtypes[1] = OIDOID;
+
+ return LookupFuncName((List *) validator->arg, 2, funcargtypes, false);
+ /* validator's return value is ignored, so we don't check the type */
+}
+
+/*
+ * Process function options of CREATE/ALTER FDW
+ */
+static void
+parse_func_options(ParseState *pstate, List *func_options,
+ bool *handler_given, Oid *fdwhandler,
+ bool *validator_given, Oid *fdwvalidator)
+{
+ ListCell *cell;
+
+ *handler_given = false;
+ *validator_given = false;
+ /* return InvalidOid if not given */
+ *fdwhandler = InvalidOid;
+ *fdwvalidator = InvalidOid;
+
+ foreach(cell, func_options)
+ {
+ DefElem *def = (DefElem *) lfirst(cell);
+
+ if (strcmp(def->defname, "handler") == 0)
+ {
+ if (*handler_given)
+ errorConflictingDefElem(def, pstate);
+ *handler_given = true;
+ *fdwhandler = lookup_fdw_handler_func(def);
+ }
+ else if (strcmp(def->defname, "validator") == 0)
+ {
+ if (*validator_given)
+ errorConflictingDefElem(def, pstate);
+ *validator_given = true;
+ *fdwvalidator = lookup_fdw_validator_func(def);
+ }
+ else
+ elog(ERROR, "option \"%s\" not recognized",
+ def->defname);
+ }
+}
+
+/*
+ * Create a foreign-data wrapper
+ */
+ObjectAddress
+CreateForeignDataWrapper(ParseState *pstate, CreateFdwStmt *stmt)
+{
+ Relation rel;
+ Datum values[Natts_pg_foreign_data_wrapper];
+ bool nulls[Natts_pg_foreign_data_wrapper];
+ HeapTuple tuple;
+ Oid fdwId;
+ bool handler_given;
+ bool validator_given;
+ Oid fdwhandler;
+ Oid fdwvalidator;
+ Datum fdwoptions;
+ Oid ownerId;
+ ObjectAddress myself;
+ ObjectAddress referenced;
+
+ rel = table_open(ForeignDataWrapperRelationId, RowExclusiveLock);
+
+ /* Must be superuser */
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to create foreign-data wrapper \"%s\"",
+ stmt->fdwname),
+ errhint("Must be superuser to create a foreign-data wrapper.")));
+
+ /* For now the owner cannot be specified on create. Use effective user ID. */
+ ownerId = GetUserId();
+
+ /*
+ * Check that there is no other foreign-data wrapper by this name.
+ */
+ if (GetForeignDataWrapperByName(stmt->fdwname, true) != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("foreign-data wrapper \"%s\" already exists",
+ stmt->fdwname)));
+
+ /*
+ * Insert tuple into pg_foreign_data_wrapper.
+ */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ fdwId = GetNewOidWithIndex(rel, ForeignDataWrapperOidIndexId,
+ Anum_pg_foreign_data_wrapper_oid);
+ values[Anum_pg_foreign_data_wrapper_oid - 1] = ObjectIdGetDatum(fdwId);
+ values[Anum_pg_foreign_data_wrapper_fdwname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(stmt->fdwname));
+ values[Anum_pg_foreign_data_wrapper_fdwowner - 1] = ObjectIdGetDatum(ownerId);
+
+ /* Lookup handler and validator functions, if given */
+ parse_func_options(pstate, stmt->func_options,
+ &handler_given, &fdwhandler,
+ &validator_given, &fdwvalidator);
+
+ values[Anum_pg_foreign_data_wrapper_fdwhandler - 1] = ObjectIdGetDatum(fdwhandler);
+ values[Anum_pg_foreign_data_wrapper_fdwvalidator - 1] = ObjectIdGetDatum(fdwvalidator);
+
+ nulls[Anum_pg_foreign_data_wrapper_fdwacl - 1] = true;
+
+ fdwoptions = transformGenericOptions(ForeignDataWrapperRelationId,
+ PointerGetDatum(NULL),
+ stmt->options,
+ fdwvalidator);
+
+ if (PointerIsValid(DatumGetPointer(fdwoptions)))
+ values[Anum_pg_foreign_data_wrapper_fdwoptions - 1] = fdwoptions;
+ else
+ nulls[Anum_pg_foreign_data_wrapper_fdwoptions - 1] = true;
+
+ tuple = heap_form_tuple(rel->rd_att, values, nulls);
+
+ CatalogTupleInsert(rel, tuple);
+
+ heap_freetuple(tuple);
+
+ /* record dependencies */
+ myself.classId = ForeignDataWrapperRelationId;
+ myself.objectId = fdwId;
+ myself.objectSubId = 0;
+
+ if (OidIsValid(fdwhandler))
+ {
+ referenced.classId = ProcedureRelationId;
+ referenced.objectId = fdwhandler;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+ }
+
+ if (OidIsValid(fdwvalidator))
+ {
+ referenced.classId = ProcedureRelationId;
+ referenced.objectId = fdwvalidator;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+ }
+
+ recordDependencyOnOwner(ForeignDataWrapperRelationId, fdwId, ownerId);
+
+ /* dependency on extension */
+ recordDependencyOnCurrentExtension(&myself, false);
+
+ /* Post creation hook for new foreign data wrapper */
+ InvokeObjectPostCreateHook(ForeignDataWrapperRelationId, fdwId, 0);
+
+ table_close(rel, RowExclusiveLock);
+
+ return myself;
+}
+
+
+/*
+ * Alter foreign-data wrapper
+ */
+ObjectAddress
+AlterForeignDataWrapper(ParseState *pstate, AlterFdwStmt *stmt)
+{
+ Relation rel;
+ HeapTuple tp;
+ Form_pg_foreign_data_wrapper fdwForm;
+ Datum repl_val[Natts_pg_foreign_data_wrapper];
+ bool repl_null[Natts_pg_foreign_data_wrapper];
+ bool repl_repl[Natts_pg_foreign_data_wrapper];
+ Oid fdwId;
+ bool isnull;
+ Datum datum;
+ bool handler_given;
+ bool validator_given;
+ Oid fdwhandler;
+ Oid fdwvalidator;
+ ObjectAddress myself;
+
+ rel = table_open(ForeignDataWrapperRelationId, RowExclusiveLock);
+
+ /* Must be superuser */
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to alter foreign-data wrapper \"%s\"",
+ stmt->fdwname),
+ errhint("Must be superuser to alter a foreign-data wrapper.")));
+
+ tp = SearchSysCacheCopy1(FOREIGNDATAWRAPPERNAME,
+ CStringGetDatum(stmt->fdwname));
+
+ if (!HeapTupleIsValid(tp))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("foreign-data wrapper \"%s\" does not exist", stmt->fdwname)));
+
+ fdwForm = (Form_pg_foreign_data_wrapper) GETSTRUCT(tp);
+ fdwId = fdwForm->oid;
+
+ memset(repl_val, 0, sizeof(repl_val));
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ parse_func_options(pstate, stmt->func_options,
+ &handler_given, &fdwhandler,
+ &validator_given, &fdwvalidator);
+
+ if (handler_given)
+ {
+ repl_val[Anum_pg_foreign_data_wrapper_fdwhandler - 1] = ObjectIdGetDatum(fdwhandler);
+ repl_repl[Anum_pg_foreign_data_wrapper_fdwhandler - 1] = true;
+
+ /*
+ * It could be that the behavior of accessing foreign table changes
+ * with the new handler. Warn about this.
+ */
+ ereport(WARNING,
+ (errmsg("changing the foreign-data wrapper handler can change behavior of existing foreign tables")));
+ }
+
+ if (validator_given)
+ {
+ repl_val[Anum_pg_foreign_data_wrapper_fdwvalidator - 1] = ObjectIdGetDatum(fdwvalidator);
+ repl_repl[Anum_pg_foreign_data_wrapper_fdwvalidator - 1] = true;
+
+ /*
+ * It could be that existing options for the FDW or dependent SERVER,
+ * USER MAPPING or FOREIGN TABLE objects are no longer valid according
+ * to the new validator. Warn about this.
+ */
+ if (OidIsValid(fdwvalidator))
+ ereport(WARNING,
+ (errmsg("changing the foreign-data wrapper validator can cause "
+ "the options for dependent objects to become invalid")));
+ }
+ else
+ {
+ /*
+ * Validator is not changed, but we need it for validating options.
+ */
+ fdwvalidator = fdwForm->fdwvalidator;
+ }
+
+ /*
+ * If options specified, validate and update.
+ */
+ if (stmt->options)
+ {
+ /* Extract the current options */
+ datum = SysCacheGetAttr(FOREIGNDATAWRAPPEROID,
+ tp,
+ Anum_pg_foreign_data_wrapper_fdwoptions,
+ &isnull);
+ if (isnull)
+ datum = PointerGetDatum(NULL);
+
+ /* Transform the options */
+ datum = transformGenericOptions(ForeignDataWrapperRelationId,
+ datum,
+ stmt->options,
+ fdwvalidator);
+
+ if (PointerIsValid(DatumGetPointer(datum)))
+ repl_val[Anum_pg_foreign_data_wrapper_fdwoptions - 1] = datum;
+ else
+ repl_null[Anum_pg_foreign_data_wrapper_fdwoptions - 1] = true;
+
+ repl_repl[Anum_pg_foreign_data_wrapper_fdwoptions - 1] = true;
+ }
+
+ /* Everything looks good - update the tuple */
+ tp = heap_modify_tuple(tp, RelationGetDescr(rel),
+ repl_val, repl_null, repl_repl);
+
+ CatalogTupleUpdate(rel, &tp->t_self, tp);
+
+ heap_freetuple(tp);
+
+ ObjectAddressSet(myself, ForeignDataWrapperRelationId, fdwId);
+
+ /* Update function dependencies if we changed them */
+ if (handler_given || validator_given)
+ {
+ ObjectAddress referenced;
+
+ /*
+ * Flush all existing dependency records of this FDW on functions; we
+ * assume there can be none other than the ones we are fixing.
+ */
+ deleteDependencyRecordsForClass(ForeignDataWrapperRelationId,
+ fdwId,
+ ProcedureRelationId,
+ DEPENDENCY_NORMAL);
+
+ /* And build new ones. */
+
+ if (OidIsValid(fdwhandler))
+ {
+ referenced.classId = ProcedureRelationId;
+ referenced.objectId = fdwhandler;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+ }
+
+ if (OidIsValid(fdwvalidator))
+ {
+ referenced.classId = ProcedureRelationId;
+ referenced.objectId = fdwvalidator;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+ }
+ }
+
+ InvokeObjectPostAlterHook(ForeignDataWrapperRelationId, fdwId, 0);
+
+ table_close(rel, RowExclusiveLock);
+
+ return myself;
+}
+
+
+/*
+ * Create a foreign server
+ */
+ObjectAddress
+CreateForeignServer(CreateForeignServerStmt *stmt)
+{
+ Relation rel;
+ Datum srvoptions;
+ Datum values[Natts_pg_foreign_server];
+ bool nulls[Natts_pg_foreign_server];
+ HeapTuple tuple;
+ Oid srvId;
+ Oid ownerId;
+ AclResult aclresult;
+ ObjectAddress myself;
+ ObjectAddress referenced;
+ ForeignDataWrapper *fdw;
+
+ rel = table_open(ForeignServerRelationId, RowExclusiveLock);
+
+ /* For now the owner cannot be specified on create. Use effective user ID. */
+ ownerId = GetUserId();
+
+ /*
+ * Check that there is no other foreign server by this name. If there is
+ * one, do nothing if IF NOT EXISTS was specified.
+ */
+ srvId = get_foreign_server_oid(stmt->servername, true);
+ if (OidIsValid(srvId))
+ {
+ if (stmt->if_not_exists)
+ {
+ /*
+ * If we are in an extension script, insist that the pre-existing
+ * object be a member of the extension, to avoid security risks.
+ */
+ ObjectAddressSet(myself, ForeignServerRelationId, srvId);
+ checkMembershipInCurrentExtension(&myself);
+
+ /* OK to skip */
+ ereport(NOTICE,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("server \"%s\" already exists, skipping",
+ stmt->servername)));
+ table_close(rel, RowExclusiveLock);
+ return InvalidObjectAddress;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("server \"%s\" already exists",
+ stmt->servername)));
+ }
+
+ /*
+ * Check that the FDW exists and that we have USAGE on it. Also get the
+ * actual FDW for option validation etc.
+ */
+ fdw = GetForeignDataWrapperByName(stmt->fdwname, false);
+
+ aclresult = pg_foreign_data_wrapper_aclcheck(fdw->fdwid, ownerId, ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_FDW, fdw->fdwname);
+
+ /*
+ * Insert tuple into pg_foreign_server.
+ */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ srvId = GetNewOidWithIndex(rel, ForeignServerOidIndexId,
+ Anum_pg_foreign_server_oid);
+ values[Anum_pg_foreign_server_oid - 1] = ObjectIdGetDatum(srvId);
+ values[Anum_pg_foreign_server_srvname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(stmt->servername));
+ values[Anum_pg_foreign_server_srvowner - 1] = ObjectIdGetDatum(ownerId);
+ values[Anum_pg_foreign_server_srvfdw - 1] = ObjectIdGetDatum(fdw->fdwid);
+
+ /* Add server type if supplied */
+ if (stmt->servertype)
+ values[Anum_pg_foreign_server_srvtype - 1] =
+ CStringGetTextDatum(stmt->servertype);
+ else
+ nulls[Anum_pg_foreign_server_srvtype - 1] = true;
+
+ /* Add server version if supplied */
+ if (stmt->version)
+ values[Anum_pg_foreign_server_srvversion - 1] =
+ CStringGetTextDatum(stmt->version);
+ else
+ nulls[Anum_pg_foreign_server_srvversion - 1] = true;
+
+ /* Start with a blank acl */
+ nulls[Anum_pg_foreign_server_srvacl - 1] = true;
+
+ /* Add server options */
+ srvoptions = transformGenericOptions(ForeignServerRelationId,
+ PointerGetDatum(NULL),
+ stmt->options,
+ fdw->fdwvalidator);
+
+ if (PointerIsValid(DatumGetPointer(srvoptions)))
+ values[Anum_pg_foreign_server_srvoptions - 1] = srvoptions;
+ else
+ nulls[Anum_pg_foreign_server_srvoptions - 1] = true;
+
+ tuple = heap_form_tuple(rel->rd_att, values, nulls);
+
+ CatalogTupleInsert(rel, tuple);
+
+ heap_freetuple(tuple);
+
+ /* record dependencies */
+ myself.classId = ForeignServerRelationId;
+ myself.objectId = srvId;
+ myself.objectSubId = 0;
+
+ referenced.classId = ForeignDataWrapperRelationId;
+ referenced.objectId = fdw->fdwid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+ recordDependencyOnOwner(ForeignServerRelationId, srvId, ownerId);
+
+ /* dependency on extension */
+ recordDependencyOnCurrentExtension(&myself, false);
+
+ /* Post creation hook for new foreign server */
+ InvokeObjectPostCreateHook(ForeignServerRelationId, srvId, 0);
+
+ table_close(rel, RowExclusiveLock);
+
+ return myself;
+}
+
+
+/*
+ * Alter foreign server
+ */
+ObjectAddress
+AlterForeignServer(AlterForeignServerStmt *stmt)
+{
+ Relation rel;
+ HeapTuple tp;
+ Datum repl_val[Natts_pg_foreign_server];
+ bool repl_null[Natts_pg_foreign_server];
+ bool repl_repl[Natts_pg_foreign_server];
+ Oid srvId;
+ Form_pg_foreign_server srvForm;
+ ObjectAddress address;
+
+ rel = table_open(ForeignServerRelationId, RowExclusiveLock);
+
+ tp = SearchSysCacheCopy1(FOREIGNSERVERNAME,
+ CStringGetDatum(stmt->servername));
+
+ if (!HeapTupleIsValid(tp))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("server \"%s\" does not exist", stmt->servername)));
+
+ srvForm = (Form_pg_foreign_server) GETSTRUCT(tp);
+ srvId = srvForm->oid;
+
+ /*
+ * Only owner or a superuser can ALTER a SERVER.
+ */
+ if (!pg_foreign_server_ownercheck(srvId, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FOREIGN_SERVER,
+ stmt->servername);
+
+ memset(repl_val, 0, sizeof(repl_val));
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ if (stmt->has_version)
+ {
+ /*
+ * Change the server VERSION string.
+ */
+ if (stmt->version)
+ repl_val[Anum_pg_foreign_server_srvversion - 1] =
+ CStringGetTextDatum(stmt->version);
+ else
+ repl_null[Anum_pg_foreign_server_srvversion - 1] = true;
+
+ repl_repl[Anum_pg_foreign_server_srvversion - 1] = true;
+ }
+
+ if (stmt->options)
+ {
+ ForeignDataWrapper *fdw = GetForeignDataWrapper(srvForm->srvfdw);
+ Datum datum;
+ bool isnull;
+
+ /* Extract the current srvoptions */
+ datum = SysCacheGetAttr(FOREIGNSERVEROID,
+ tp,
+ Anum_pg_foreign_server_srvoptions,
+ &isnull);
+ if (isnull)
+ datum = PointerGetDatum(NULL);
+
+ /* Prepare the options array */
+ datum = transformGenericOptions(ForeignServerRelationId,
+ datum,
+ stmt->options,
+ fdw->fdwvalidator);
+
+ if (PointerIsValid(DatumGetPointer(datum)))
+ repl_val[Anum_pg_foreign_server_srvoptions - 1] = datum;
+ else
+ repl_null[Anum_pg_foreign_server_srvoptions - 1] = true;
+
+ repl_repl[Anum_pg_foreign_server_srvoptions - 1] = true;
+ }
+
+ /* Everything looks good - update the tuple */
+ tp = heap_modify_tuple(tp, RelationGetDescr(rel),
+ repl_val, repl_null, repl_repl);
+
+ CatalogTupleUpdate(rel, &tp->t_self, tp);
+
+ InvokeObjectPostAlterHook(ForeignServerRelationId, srvId, 0);
+
+ ObjectAddressSet(address, ForeignServerRelationId, srvId);
+
+ heap_freetuple(tp);
+
+ table_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+
+/*
+ * Common routine to check permission for user-mapping-related DDL
+ * commands. We allow server owners to operate on any mapping, and
+ * users to operate on their own mapping.
+ */
+static void
+user_mapping_ddl_aclcheck(Oid umuserid, Oid serverid, const char *servername)
+{
+ Oid curuserid = GetUserId();
+
+ if (!pg_foreign_server_ownercheck(serverid, curuserid))
+ {
+ if (umuserid == curuserid)
+ {
+ AclResult aclresult;
+
+ aclresult = pg_foreign_server_aclcheck(serverid, curuserid, ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_FOREIGN_SERVER, servername);
+ }
+ else
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FOREIGN_SERVER,
+ servername);
+ }
+}
+
+
+/*
+ * Create user mapping
+ */
+ObjectAddress
+CreateUserMapping(CreateUserMappingStmt *stmt)
+{
+ Relation rel;
+ Datum useoptions;
+ Datum values[Natts_pg_user_mapping];
+ bool nulls[Natts_pg_user_mapping];
+ HeapTuple tuple;
+ Oid useId;
+ Oid umId;
+ ObjectAddress myself;
+ ObjectAddress referenced;
+ ForeignServer *srv;
+ ForeignDataWrapper *fdw;
+ RoleSpec *role = (RoleSpec *) stmt->user;
+
+ rel = table_open(UserMappingRelationId, RowExclusiveLock);
+
+ if (role->roletype == ROLESPEC_PUBLIC)
+ useId = ACL_ID_PUBLIC;
+ else
+ useId = get_rolespec_oid(stmt->user, false);
+
+ /* Check that the server exists. */
+ srv = GetForeignServerByName(stmt->servername, false);
+
+ user_mapping_ddl_aclcheck(useId, srv->serverid, stmt->servername);
+
+ /*
+ * Check that the user mapping is unique within server.
+ */
+ umId = GetSysCacheOid2(USERMAPPINGUSERSERVER, Anum_pg_user_mapping_oid,
+ ObjectIdGetDatum(useId),
+ ObjectIdGetDatum(srv->serverid));
+
+ if (OidIsValid(umId))
+ {
+ if (stmt->if_not_exists)
+ {
+ /*
+ * Since user mappings aren't members of extensions (see comments
+ * below), no need for checkMembershipInCurrentExtension here.
+ */
+ ereport(NOTICE,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("user mapping for \"%s\" already exists for server \"%s\", skipping",
+ MappingUserName(useId),
+ stmt->servername)));
+
+ table_close(rel, RowExclusiveLock);
+ return InvalidObjectAddress;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("user mapping for \"%s\" already exists for server \"%s\"",
+ MappingUserName(useId),
+ stmt->servername)));
+ }
+
+ fdw = GetForeignDataWrapper(srv->fdwid);
+
+ /*
+ * Insert tuple into pg_user_mapping.
+ */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ umId = GetNewOidWithIndex(rel, UserMappingOidIndexId,
+ Anum_pg_user_mapping_oid);
+ values[Anum_pg_user_mapping_oid - 1] = ObjectIdGetDatum(umId);
+ values[Anum_pg_user_mapping_umuser - 1] = ObjectIdGetDatum(useId);
+ values[Anum_pg_user_mapping_umserver - 1] = ObjectIdGetDatum(srv->serverid);
+
+ /* Add user options */
+ useoptions = transformGenericOptions(UserMappingRelationId,
+ PointerGetDatum(NULL),
+ stmt->options,
+ fdw->fdwvalidator);
+
+ if (PointerIsValid(DatumGetPointer(useoptions)))
+ values[Anum_pg_user_mapping_umoptions - 1] = useoptions;
+ else
+ nulls[Anum_pg_user_mapping_umoptions - 1] = true;
+
+ tuple = heap_form_tuple(rel->rd_att, values, nulls);
+
+ CatalogTupleInsert(rel, tuple);
+
+ heap_freetuple(tuple);
+
+ /* Add dependency on the server */
+ myself.classId = UserMappingRelationId;
+ myself.objectId = umId;
+ myself.objectSubId = 0;
+
+ referenced.classId = ForeignServerRelationId;
+ referenced.objectId = srv->serverid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+ if (OidIsValid(useId))
+ {
+ /* Record the mapped user dependency */
+ recordDependencyOnOwner(UserMappingRelationId, umId, useId);
+ }
+
+ /*
+ * Perhaps someday there should be a recordDependencyOnCurrentExtension
+ * call here; but since roles aren't members of extensions, it seems like
+ * user mappings shouldn't be either. Note that the grammar and pg_dump
+ * would need to be extended too if we change this.
+ */
+
+ /* Post creation hook for new user mapping */
+ InvokeObjectPostCreateHook(UserMappingRelationId, umId, 0);
+
+ table_close(rel, RowExclusiveLock);
+
+ return myself;
+}
+
+
+/*
+ * Alter user mapping
+ */
+ObjectAddress
+AlterUserMapping(AlterUserMappingStmt *stmt)
+{
+ Relation rel;
+ HeapTuple tp;
+ Datum repl_val[Natts_pg_user_mapping];
+ bool repl_null[Natts_pg_user_mapping];
+ bool repl_repl[Natts_pg_user_mapping];
+ Oid useId;
+ Oid umId;
+ ForeignServer *srv;
+ ObjectAddress address;
+ RoleSpec *role = (RoleSpec *) stmt->user;
+
+ rel = table_open(UserMappingRelationId, RowExclusiveLock);
+
+ if (role->roletype == ROLESPEC_PUBLIC)
+ useId = ACL_ID_PUBLIC;
+ else
+ useId = get_rolespec_oid(stmt->user, false);
+
+ srv = GetForeignServerByName(stmt->servername, false);
+
+ umId = GetSysCacheOid2(USERMAPPINGUSERSERVER, Anum_pg_user_mapping_oid,
+ ObjectIdGetDatum(useId),
+ ObjectIdGetDatum(srv->serverid));
+ if (!OidIsValid(umId))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("user mapping for \"%s\" does not exist for server \"%s\"",
+ MappingUserName(useId), stmt->servername)));
+
+ user_mapping_ddl_aclcheck(useId, srv->serverid, stmt->servername);
+
+ tp = SearchSysCacheCopy1(USERMAPPINGOID, ObjectIdGetDatum(umId));
+
+ if (!HeapTupleIsValid(tp))
+ elog(ERROR, "cache lookup failed for user mapping %u", umId);
+
+ memset(repl_val, 0, sizeof(repl_val));
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ if (stmt->options)
+ {
+ ForeignDataWrapper *fdw;
+ Datum datum;
+ bool isnull;
+
+ /*
+ * Process the options.
+ */
+
+ fdw = GetForeignDataWrapper(srv->fdwid);
+
+ datum = SysCacheGetAttr(USERMAPPINGUSERSERVER,
+ tp,
+ Anum_pg_user_mapping_umoptions,
+ &isnull);
+ if (isnull)
+ datum = PointerGetDatum(NULL);
+
+ /* Prepare the options array */
+ datum = transformGenericOptions(UserMappingRelationId,
+ datum,
+ stmt->options,
+ fdw->fdwvalidator);
+
+ if (PointerIsValid(DatumGetPointer(datum)))
+ repl_val[Anum_pg_user_mapping_umoptions - 1] = datum;
+ else
+ repl_null[Anum_pg_user_mapping_umoptions - 1] = true;
+
+ repl_repl[Anum_pg_user_mapping_umoptions - 1] = true;
+ }
+
+ /* Everything looks good - update the tuple */
+ tp = heap_modify_tuple(tp, RelationGetDescr(rel),
+ repl_val, repl_null, repl_repl);
+
+ CatalogTupleUpdate(rel, &tp->t_self, tp);
+
+ InvokeObjectPostAlterHook(UserMappingRelationId,
+ umId, 0);
+
+ ObjectAddressSet(address, UserMappingRelationId, umId);
+
+ heap_freetuple(tp);
+
+ table_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+
+/*
+ * Drop user mapping
+ */
+Oid
+RemoveUserMapping(DropUserMappingStmt *stmt)
+{
+ ObjectAddress object;
+ Oid useId;
+ Oid umId;
+ ForeignServer *srv;
+ RoleSpec *role = (RoleSpec *) stmt->user;
+
+ if (role->roletype == ROLESPEC_PUBLIC)
+ useId = ACL_ID_PUBLIC;
+ else
+ {
+ useId = get_rolespec_oid(stmt->user, stmt->missing_ok);
+ if (!OidIsValid(useId))
+ {
+ /*
+ * IF EXISTS specified, role not found and not public. Notice this
+ * and leave.
+ */
+ elog(NOTICE, "role \"%s\" does not exist, skipping",
+ role->rolename);
+ return InvalidOid;
+ }
+ }
+
+ srv = GetForeignServerByName(stmt->servername, true);
+
+ if (!srv)
+ {
+ if (!stmt->missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("server \"%s\" does not exist",
+ stmt->servername)));
+ /* IF EXISTS, just note it */
+ ereport(NOTICE,
+ (errmsg("server \"%s\" does not exist, skipping",
+ stmt->servername)));
+ return InvalidOid;
+ }
+
+ umId = GetSysCacheOid2(USERMAPPINGUSERSERVER, Anum_pg_user_mapping_oid,
+ ObjectIdGetDatum(useId),
+ ObjectIdGetDatum(srv->serverid));
+
+ if (!OidIsValid(umId))
+ {
+ if (!stmt->missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("user mapping for \"%s\" does not exist for server \"%s\"",
+ MappingUserName(useId), stmt->servername)));
+
+ /* IF EXISTS specified, just note it */
+ ereport(NOTICE,
+ (errmsg("user mapping for \"%s\" does not exist for server \"%s\", skipping",
+ MappingUserName(useId), stmt->servername)));
+ return InvalidOid;
+ }
+
+ user_mapping_ddl_aclcheck(useId, srv->serverid, srv->servername);
+
+ /*
+ * Do the deletion
+ */
+ object.classId = UserMappingRelationId;
+ object.objectId = umId;
+ object.objectSubId = 0;
+
+ performDeletion(&object, DROP_CASCADE, 0);
+
+ return umId;
+}
+
+
+/*
+ * Create a foreign table
+ * call after DefineRelation().
+ */
+void
+CreateForeignTable(CreateForeignTableStmt *stmt, Oid relid)
+{
+ Relation ftrel;
+ Datum ftoptions;
+ Datum values[Natts_pg_foreign_table];
+ bool nulls[Natts_pg_foreign_table];
+ HeapTuple tuple;
+ AclResult aclresult;
+ ObjectAddress myself;
+ ObjectAddress referenced;
+ Oid ownerId;
+ ForeignDataWrapper *fdw;
+ ForeignServer *server;
+
+ /*
+ * Advance command counter to ensure the pg_attribute tuple is visible;
+ * the tuple might be updated to add constraints in previous step.
+ */
+ CommandCounterIncrement();
+
+ ftrel = table_open(ForeignTableRelationId, RowExclusiveLock);
+
+ /*
+ * For now the owner cannot be specified on create. Use effective user ID.
+ */
+ ownerId = GetUserId();
+
+ /*
+ * Check that the foreign server exists and that we have USAGE on it. Also
+ * get the actual FDW for option validation etc.
+ */
+ server = GetForeignServerByName(stmt->servername, false);
+ aclresult = pg_foreign_server_aclcheck(server->serverid, ownerId, ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_FOREIGN_SERVER, server->servername);
+
+ fdw = GetForeignDataWrapper(server->fdwid);
+
+ /*
+ * Insert tuple into pg_foreign_table.
+ */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ values[Anum_pg_foreign_table_ftrelid - 1] = ObjectIdGetDatum(relid);
+ values[Anum_pg_foreign_table_ftserver - 1] = ObjectIdGetDatum(server->serverid);
+ /* Add table generic options */
+ ftoptions = transformGenericOptions(ForeignTableRelationId,
+ PointerGetDatum(NULL),
+ stmt->options,
+ fdw->fdwvalidator);
+
+ if (PointerIsValid(DatumGetPointer(ftoptions)))
+ values[Anum_pg_foreign_table_ftoptions - 1] = ftoptions;
+ else
+ nulls[Anum_pg_foreign_table_ftoptions - 1] = true;
+
+ tuple = heap_form_tuple(ftrel->rd_att, values, nulls);
+
+ CatalogTupleInsert(ftrel, tuple);
+
+ heap_freetuple(tuple);
+
+ /* Add pg_class dependency on the server */
+ myself.classId = RelationRelationId;
+ myself.objectId = relid;
+ myself.objectSubId = 0;
+
+ referenced.classId = ForeignServerRelationId;
+ referenced.objectId = server->serverid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+ table_close(ftrel, RowExclusiveLock);
+}
+
+/*
+ * Import a foreign schema
+ */
+void
+ImportForeignSchema(ImportForeignSchemaStmt *stmt)
+{
+ ForeignServer *server;
+ ForeignDataWrapper *fdw;
+ FdwRoutine *fdw_routine;
+ AclResult aclresult;
+ List *cmd_list;
+ ListCell *lc;
+
+ /* Check that the foreign server exists and that we have USAGE on it */
+ server = GetForeignServerByName(stmt->server_name, false);
+ aclresult = pg_foreign_server_aclcheck(server->serverid, GetUserId(), ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_FOREIGN_SERVER, server->servername);
+
+ /* Check that the schema exists and we have CREATE permissions on it */
+ (void) LookupCreationNamespace(stmt->local_schema);
+
+ /* Get the FDW and check it supports IMPORT */
+ fdw = GetForeignDataWrapper(server->fdwid);
+ if (!OidIsValid(fdw->fdwhandler))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("foreign-data wrapper \"%s\" has no handler",
+ fdw->fdwname)));
+ fdw_routine = GetFdwRoutine(fdw->fdwhandler);
+ if (fdw_routine->ImportForeignSchema == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FDW_NO_SCHEMAS),
+ errmsg("foreign-data wrapper \"%s\" does not support IMPORT FOREIGN SCHEMA",
+ fdw->fdwname)));
+
+ /* Call FDW to get a list of commands */
+ cmd_list = fdw_routine->ImportForeignSchema(stmt, server->serverid);
+
+ /* Parse and execute each command */
+ foreach(lc, cmd_list)
+ {
+ char *cmd = (char *) lfirst(lc);
+ import_error_callback_arg callback_arg;
+ ErrorContextCallback sqlerrcontext;
+ List *raw_parsetree_list;
+ ListCell *lc2;
+
+ /*
+ * Setup error traceback support for ereport(). This is so that any
+ * error in the generated SQL will be displayed nicely.
+ */
+ callback_arg.tablename = NULL; /* not known yet */
+ callback_arg.cmd = cmd;
+ sqlerrcontext.callback = import_error_callback;
+ sqlerrcontext.arg = (void *) &callback_arg;
+ sqlerrcontext.previous = error_context_stack;
+ error_context_stack = &sqlerrcontext;
+
+ /*
+ * Parse the SQL string into a list of raw parse trees.
+ */
+ raw_parsetree_list = pg_parse_query(cmd);
+
+ /*
+ * Process each parse tree (we allow the FDW to put more than one
+ * command per string, though this isn't really advised).
+ */
+ foreach(lc2, raw_parsetree_list)
+ {
+ RawStmt *rs = lfirst_node(RawStmt, lc2);
+ CreateForeignTableStmt *cstmt = (CreateForeignTableStmt *) rs->stmt;
+ PlannedStmt *pstmt;
+
+ /*
+ * Because we only allow CreateForeignTableStmt, we can skip parse
+ * analysis, rewrite, and planning steps here.
+ */
+ if (!IsA(cstmt, CreateForeignTableStmt))
+ elog(ERROR,
+ "foreign-data wrapper \"%s\" returned incorrect statement type %d",
+ fdw->fdwname, (int) nodeTag(cstmt));
+
+ /* Ignore commands for tables excluded by filter options */
+ if (!IsImportableForeignTable(cstmt->base.relation->relname, stmt))
+ continue;
+
+ /* Enable reporting of current table's name on error */
+ callback_arg.tablename = cstmt->base.relation->relname;
+
+ /* Ensure creation schema is the one given in IMPORT statement */
+ cstmt->base.relation->schemaname = pstrdup(stmt->local_schema);
+
+ /* No planning needed, just make a wrapper PlannedStmt */
+ pstmt = makeNode(PlannedStmt);
+ pstmt->commandType = CMD_UTILITY;
+ pstmt->canSetTag = false;
+ pstmt->utilityStmt = (Node *) cstmt;
+ pstmt->stmt_location = rs->stmt_location;
+ pstmt->stmt_len = rs->stmt_len;
+
+ /* Execute statement */
+ ProcessUtility(pstmt, cmd, false,
+ PROCESS_UTILITY_SUBCOMMAND, NULL, NULL,
+ None_Receiver, NULL);
+
+ /* Be sure to advance the command counter between subcommands */
+ CommandCounterIncrement();
+
+ callback_arg.tablename = NULL;
+ }
+
+ error_context_stack = sqlerrcontext.previous;
+ }
+}
+
+/*
+ * error context callback to let us supply the failing SQL statement's text
+ */
+static void
+import_error_callback(void *arg)
+{
+ import_error_callback_arg *callback_arg = (import_error_callback_arg *) arg;
+ int syntaxerrposition;
+
+ /* If it's a syntax error, convert to internal syntax error report */
+ syntaxerrposition = geterrposition();
+ if (syntaxerrposition > 0)
+ {
+ errposition(0);
+ internalerrposition(syntaxerrposition);
+ internalerrquery(callback_arg->cmd);
+ }
+
+ if (callback_arg->tablename)
+ errcontext("importing foreign table \"%s\"",
+ callback_arg->tablename);
+}
diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c
new file mode 100644
index 0000000..00a6d28
--- /dev/null
+++ b/src/backend/commands/functioncmds.c
@@ -0,0 +1,2374 @@
+/*-------------------------------------------------------------------------
+ *
+ * functioncmds.c
+ *
+ * Routines for CREATE and DROP FUNCTION commands and CREATE and DROP
+ * CAST commands.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/functioncmds.c
+ *
+ * DESCRIPTION
+ * These routines take the parse tree and pick out the
+ * appropriate arguments/flags, and pass the results to the
+ * corresponding "FooDefine" routines (in src/catalog) that do
+ * the actual catalog-munging. These routines also verify permission
+ * of the user to execute the command.
+ *
+ * NOTES
+ * These things must be defined and committed in the following order:
+ * "create function":
+ * input/output, recv/send procedures
+ * "create type":
+ * type
+ * "create operator":
+ * operators
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_aggregate.h"
+#include "catalog/pg_cast.h"
+#include "catalog/pg_language.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_transform.h"
+#include "catalog/pg_type.h"
+#include "commands/alter.h"
+#include "commands/defrem.h"
+#include "commands/extension.h"
+#include "commands/proclang.h"
+#include "executor/execdesc.h"
+#include "executor/executor.h"
+#include "executor/functions.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "optimizer/optimizer.h"
+#include "parser/analyze.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_func.h"
+#include "parser/parse_type.h"
+#include "pgstat.h"
+#include "tcop/pquery.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+/*
+ * Examine the RETURNS clause of the CREATE FUNCTION statement
+ * and return information about it as *prorettype_p and *returnsSet.
+ *
+ * This is more complex than the average typename lookup because we want to
+ * allow a shell type to be used, or even created if the specified return type
+ * doesn't exist yet. (Without this, there's no way to define the I/O procs
+ * for a new type.) But SQL function creation won't cope, so error out if
+ * the target language is SQL. (We do this here, not in the SQL-function
+ * validator, so as not to produce a NOTICE and then an ERROR for the same
+ * condition.)
+ */
+static void
+compute_return_type(TypeName *returnType, Oid languageOid,
+ Oid *prorettype_p, bool *returnsSet_p)
+{
+ Oid rettype;
+ Type typtup;
+ AclResult aclresult;
+
+ typtup = LookupTypeName(NULL, returnType, NULL, false);
+
+ if (typtup)
+ {
+ if (!((Form_pg_type) GETSTRUCT(typtup))->typisdefined)
+ {
+ if (languageOid == SQLlanguageId)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("SQL function cannot return shell type %s",
+ TypeNameToString(returnType))));
+ else
+ ereport(NOTICE,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("return type %s is only a shell",
+ TypeNameToString(returnType))));
+ }
+ rettype = typeTypeId(typtup);
+ ReleaseSysCache(typtup);
+ }
+ else
+ {
+ char *typnam = TypeNameToString(returnType);
+ Oid namespaceId;
+ AclResult aclresult;
+ char *typname;
+ ObjectAddress address;
+
+ /*
+ * Only C-coded functions can be I/O functions. We enforce this
+ * restriction here mainly to prevent littering the catalogs with
+ * shell types due to simple typos in user-defined function
+ * definitions.
+ */
+ if (languageOid != INTERNALlanguageId &&
+ languageOid != ClanguageId)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("type \"%s\" does not exist", typnam)));
+
+ /* Reject if there's typmod decoration, too */
+ if (returnType->typmods != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("type modifier cannot be specified for shell type \"%s\"",
+ typnam)));
+
+ /* Otherwise, go ahead and make a shell type */
+ ereport(NOTICE,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("type \"%s\" is not yet defined", typnam),
+ errdetail("Creating a shell type definition.")));
+ namespaceId = QualifiedNameGetCreationNamespace(returnType->names,
+ &typname);
+ aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(namespaceId));
+ address = TypeShellMake(typname, namespaceId, GetUserId());
+ rettype = address.objectId;
+ Assert(OidIsValid(rettype));
+ }
+
+ aclresult = pg_type_aclcheck(rettype, GetUserId(), ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error_type(aclresult, rettype);
+
+ *prorettype_p = rettype;
+ *returnsSet_p = returnType->setof;
+}
+
+/*
+ * Interpret the function parameter list of a CREATE FUNCTION,
+ * CREATE PROCEDURE, or CREATE AGGREGATE statement.
+ *
+ * Input parameters:
+ * parameters: list of FunctionParameter structs
+ * languageOid: OID of function language (InvalidOid if it's CREATE AGGREGATE)
+ * objtype: identifies type of object being created
+ *
+ * Results are stored into output parameters. parameterTypes must always
+ * be created, but the other arrays/lists can be NULL pointers if not needed.
+ * variadicArgType is set to the variadic array type if there's a VARIADIC
+ * parameter (there can be only one); or to InvalidOid if not.
+ * requiredResultType is set to InvalidOid if there are no OUT parameters,
+ * else it is set to the OID of the implied result type.
+ */
+void
+interpret_function_parameter_list(ParseState *pstate,
+ List *parameters,
+ Oid languageOid,
+ ObjectType objtype,
+ oidvector **parameterTypes,
+ List **parameterTypes_list,
+ ArrayType **allParameterTypes,
+ ArrayType **parameterModes,
+ ArrayType **parameterNames,
+ List **inParameterNames_list,
+ List **parameterDefaults,
+ Oid *variadicArgType,
+ Oid *requiredResultType)
+{
+ int parameterCount = list_length(parameters);
+ Oid *inTypes;
+ int inCount = 0;
+ Datum *allTypes;
+ Datum *paramModes;
+ Datum *paramNames;
+ int outCount = 0;
+ int varCount = 0;
+ bool have_names = false;
+ bool have_defaults = false;
+ ListCell *x;
+ int i;
+
+ *variadicArgType = InvalidOid; /* default result */
+ *requiredResultType = InvalidOid; /* default result */
+
+ inTypes = (Oid *) palloc(parameterCount * sizeof(Oid));
+ allTypes = (Datum *) palloc(parameterCount * sizeof(Datum));
+ paramModes = (Datum *) palloc(parameterCount * sizeof(Datum));
+ paramNames = (Datum *) palloc0(parameterCount * sizeof(Datum));
+ *parameterDefaults = NIL;
+
+ /* Scan the list and extract data into work arrays */
+ i = 0;
+ foreach(x, parameters)
+ {
+ FunctionParameter *fp = (FunctionParameter *) lfirst(x);
+ TypeName *t = fp->argType;
+ FunctionParameterMode fpmode = fp->mode;
+ bool isinput = false;
+ Oid toid;
+ Type typtup;
+ AclResult aclresult;
+
+ /* For our purposes here, a defaulted mode spec is identical to IN */
+ if (fpmode == FUNC_PARAM_DEFAULT)
+ fpmode = FUNC_PARAM_IN;
+
+ typtup = LookupTypeName(NULL, t, NULL, false);
+ if (typtup)
+ {
+ if (!((Form_pg_type) GETSTRUCT(typtup))->typisdefined)
+ {
+ /* As above, hard error if language is SQL */
+ if (languageOid == SQLlanguageId)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("SQL function cannot accept shell type %s",
+ TypeNameToString(t))));
+ /* We don't allow creating aggregates on shell types either */
+ else if (objtype == OBJECT_AGGREGATE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregate cannot accept shell type %s",
+ TypeNameToString(t))));
+ else
+ ereport(NOTICE,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("argument type %s is only a shell",
+ TypeNameToString(t))));
+ }
+ toid = typeTypeId(typtup);
+ ReleaseSysCache(typtup);
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("type %s does not exist",
+ TypeNameToString(t))));
+ toid = InvalidOid; /* keep compiler quiet */
+ }
+
+ aclresult = pg_type_aclcheck(toid, GetUserId(), ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error_type(aclresult, toid);
+
+ if (t->setof)
+ {
+ if (objtype == OBJECT_AGGREGATE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregates cannot accept set arguments")));
+ else if (objtype == OBJECT_PROCEDURE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("procedures cannot accept set arguments")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("functions cannot accept set arguments")));
+ }
+
+ /* handle input parameters */
+ if (fpmode != FUNC_PARAM_OUT && fpmode != FUNC_PARAM_TABLE)
+ {
+ /* other input parameters can't follow a VARIADIC parameter */
+ if (varCount > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("VARIADIC parameter must be the last input parameter")));
+ inTypes[inCount++] = toid;
+ isinput = true;
+ if (parameterTypes_list)
+ *parameterTypes_list = lappend_oid(*parameterTypes_list, toid);
+ }
+
+ /* handle output parameters */
+ if (fpmode != FUNC_PARAM_IN && fpmode != FUNC_PARAM_VARIADIC)
+ {
+ if (objtype == OBJECT_PROCEDURE)
+ {
+ /*
+ * We disallow OUT-after-VARIADIC only for procedures. While
+ * such a case causes no confusion in ordinary function calls,
+ * it would cause confusion in a CALL statement.
+ */
+ if (varCount > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("VARIADIC parameter must be the last parameter")));
+ /* Procedures with output parameters always return RECORD */
+ *requiredResultType = RECORDOID;
+ }
+ else if (outCount == 0) /* save first output param's type */
+ *requiredResultType = toid;
+ outCount++;
+ }
+
+ if (fpmode == FUNC_PARAM_VARIADIC)
+ {
+ *variadicArgType = toid;
+ varCount++;
+ /* validate variadic parameter type */
+ switch (toid)
+ {
+ case ANYARRAYOID:
+ case ANYCOMPATIBLEARRAYOID:
+ case ANYOID:
+ /* okay */
+ break;
+ default:
+ if (!OidIsValid(get_element_type(toid)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("VARIADIC parameter must be an array")));
+ break;
+ }
+ }
+
+ allTypes[i] = ObjectIdGetDatum(toid);
+
+ paramModes[i] = CharGetDatum(fpmode);
+
+ if (fp->name && fp->name[0])
+ {
+ ListCell *px;
+
+ /*
+ * As of Postgres 9.0 we disallow using the same name for two
+ * input or two output function parameters. Depending on the
+ * function's language, conflicting input and output names might
+ * be bad too, but we leave it to the PL to complain if so.
+ */
+ foreach(px, parameters)
+ {
+ FunctionParameter *prevfp = (FunctionParameter *) lfirst(px);
+ FunctionParameterMode prevfpmode;
+
+ if (prevfp == fp)
+ break;
+ /* as above, default mode is IN */
+ prevfpmode = prevfp->mode;
+ if (prevfpmode == FUNC_PARAM_DEFAULT)
+ prevfpmode = FUNC_PARAM_IN;
+ /* pure in doesn't conflict with pure out */
+ if ((fpmode == FUNC_PARAM_IN ||
+ fpmode == FUNC_PARAM_VARIADIC) &&
+ (prevfpmode == FUNC_PARAM_OUT ||
+ prevfpmode == FUNC_PARAM_TABLE))
+ continue;
+ if ((prevfpmode == FUNC_PARAM_IN ||
+ prevfpmode == FUNC_PARAM_VARIADIC) &&
+ (fpmode == FUNC_PARAM_OUT ||
+ fpmode == FUNC_PARAM_TABLE))
+ continue;
+ if (prevfp->name && prevfp->name[0] &&
+ strcmp(prevfp->name, fp->name) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("parameter name \"%s\" used more than once",
+ fp->name)));
+ }
+
+ paramNames[i] = CStringGetTextDatum(fp->name);
+ have_names = true;
+ }
+
+ if (inParameterNames_list)
+ *inParameterNames_list = lappend(*inParameterNames_list, makeString(fp->name ? fp->name : pstrdup("")));
+
+ if (fp->defexpr)
+ {
+ Node *def;
+
+ if (!isinput)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("only input parameters can have default values")));
+
+ def = transformExpr(pstate, fp->defexpr,
+ EXPR_KIND_FUNCTION_DEFAULT);
+ def = coerce_to_specific_type(pstate, def, toid, "DEFAULT");
+ assign_expr_collations(pstate, def);
+
+ /*
+ * Make sure no variables are referred to (this is probably dead
+ * code now that add_missing_from is history).
+ */
+ if (list_length(pstate->p_rtable) != 0 ||
+ contain_var_clause(def))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("cannot use table references in parameter default value")));
+
+ /*
+ * transformExpr() should have already rejected subqueries,
+ * aggregates, and window functions, based on the EXPR_KIND_ for a
+ * default expression.
+ *
+ * It can't return a set either --- but coerce_to_specific_type
+ * already checked that for us.
+ *
+ * Note: the point of these restrictions is to ensure that an
+ * expression that, on its face, hasn't got subplans, aggregates,
+ * etc cannot suddenly have them after function default arguments
+ * are inserted.
+ */
+
+ *parameterDefaults = lappend(*parameterDefaults, def);
+ have_defaults = true;
+ }
+ else
+ {
+ if (isinput && have_defaults)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("input parameters after one with a default value must also have defaults")));
+
+ /*
+ * For procedures, we also can't allow OUT parameters after one
+ * with a default, because the same sort of confusion arises in a
+ * CALL statement.
+ */
+ if (objtype == OBJECT_PROCEDURE && have_defaults)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("procedure OUT parameters cannot appear after one with a default value")));
+ }
+
+ i++;
+ }
+
+ /* Now construct the proper outputs as needed */
+ *parameterTypes = buildoidvector(inTypes, inCount);
+
+ if (outCount > 0 || varCount > 0)
+ {
+ *allParameterTypes = construct_array(allTypes, parameterCount, OIDOID,
+ sizeof(Oid), true, TYPALIGN_INT);
+ *parameterModes = construct_array(paramModes, parameterCount, CHAROID,
+ 1, true, TYPALIGN_CHAR);
+ if (outCount > 1)
+ *requiredResultType = RECORDOID;
+ /* otherwise we set requiredResultType correctly above */
+ }
+ else
+ {
+ *allParameterTypes = NULL;
+ *parameterModes = NULL;
+ }
+
+ if (have_names)
+ {
+ for (i = 0; i < parameterCount; i++)
+ {
+ if (paramNames[i] == PointerGetDatum(NULL))
+ paramNames[i] = CStringGetTextDatum("");
+ }
+ *parameterNames = construct_array(paramNames, parameterCount, TEXTOID,
+ -1, false, TYPALIGN_INT);
+ }
+ else
+ *parameterNames = NULL;
+}
+
+
+/*
+ * Recognize one of the options that can be passed to both CREATE
+ * FUNCTION and ALTER FUNCTION and return it via one of the out
+ * parameters. Returns true if the passed option was recognized. If
+ * the out parameter we were going to assign to points to non-NULL,
+ * raise a duplicate-clause error. (We don't try to detect duplicate
+ * SET parameters though --- if you're redundant, the last one wins.)
+ */
+static bool
+compute_common_attribute(ParseState *pstate,
+ bool is_procedure,
+ DefElem *defel,
+ DefElem **volatility_item,
+ DefElem **strict_item,
+ DefElem **security_item,
+ DefElem **leakproof_item,
+ List **set_items,
+ DefElem **cost_item,
+ DefElem **rows_item,
+ DefElem **support_item,
+ DefElem **parallel_item)
+{
+ if (strcmp(defel->defname, "volatility") == 0)
+ {
+ if (is_procedure)
+ goto procedure_error;
+ if (*volatility_item)
+ errorConflictingDefElem(defel, pstate);
+
+ *volatility_item = defel;
+ }
+ else if (strcmp(defel->defname, "strict") == 0)
+ {
+ if (is_procedure)
+ goto procedure_error;
+ if (*strict_item)
+ errorConflictingDefElem(defel, pstate);
+
+ *strict_item = defel;
+ }
+ else if (strcmp(defel->defname, "security") == 0)
+ {
+ if (*security_item)
+ errorConflictingDefElem(defel, pstate);
+
+ *security_item = defel;
+ }
+ else if (strcmp(defel->defname, "leakproof") == 0)
+ {
+ if (is_procedure)
+ goto procedure_error;
+ if (*leakproof_item)
+ errorConflictingDefElem(defel, pstate);
+
+ *leakproof_item = defel;
+ }
+ else if (strcmp(defel->defname, "set") == 0)
+ {
+ *set_items = lappend(*set_items, defel->arg);
+ }
+ else if (strcmp(defel->defname, "cost") == 0)
+ {
+ if (is_procedure)
+ goto procedure_error;
+ if (*cost_item)
+ errorConflictingDefElem(defel, pstate);
+
+ *cost_item = defel;
+ }
+ else if (strcmp(defel->defname, "rows") == 0)
+ {
+ if (is_procedure)
+ goto procedure_error;
+ if (*rows_item)
+ errorConflictingDefElem(defel, pstate);
+
+ *rows_item = defel;
+ }
+ else if (strcmp(defel->defname, "support") == 0)
+ {
+ if (is_procedure)
+ goto procedure_error;
+ if (*support_item)
+ errorConflictingDefElem(defel, pstate);
+
+ *support_item = defel;
+ }
+ else if (strcmp(defel->defname, "parallel") == 0)
+ {
+ if (is_procedure)
+ goto procedure_error;
+ if (*parallel_item)
+ errorConflictingDefElem(defel, pstate);
+
+ *parallel_item = defel;
+ }
+ else
+ return false;
+
+ /* Recognized an option */
+ return true;
+
+procedure_error:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("invalid attribute in procedure definition"),
+ parser_errposition(pstate, defel->location)));
+ return false;
+}
+
+static char
+interpret_func_volatility(DefElem *defel)
+{
+ char *str = strVal(defel->arg);
+
+ if (strcmp(str, "immutable") == 0)
+ return PROVOLATILE_IMMUTABLE;
+ else if (strcmp(str, "stable") == 0)
+ return PROVOLATILE_STABLE;
+ else if (strcmp(str, "volatile") == 0)
+ return PROVOLATILE_VOLATILE;
+ else
+ {
+ elog(ERROR, "invalid volatility \"%s\"", str);
+ return 0; /* keep compiler quiet */
+ }
+}
+
+static char
+interpret_func_parallel(DefElem *defel)
+{
+ char *str = strVal(defel->arg);
+
+ if (strcmp(str, "safe") == 0)
+ return PROPARALLEL_SAFE;
+ else if (strcmp(str, "unsafe") == 0)
+ return PROPARALLEL_UNSAFE;
+ else if (strcmp(str, "restricted") == 0)
+ return PROPARALLEL_RESTRICTED;
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("parameter \"parallel\" must be SAFE, RESTRICTED, or UNSAFE")));
+ return PROPARALLEL_UNSAFE; /* keep compiler quiet */
+ }
+}
+
+/*
+ * Update a proconfig value according to a list of VariableSetStmt items.
+ *
+ * The input and result may be NULL to signify a null entry.
+ */
+static ArrayType *
+update_proconfig_value(ArrayType *a, List *set_items)
+{
+ ListCell *l;
+
+ foreach(l, set_items)
+ {
+ VariableSetStmt *sstmt = lfirst_node(VariableSetStmt, l);
+
+ if (sstmt->kind == VAR_RESET_ALL)
+ a = NULL;
+ else
+ {
+ char *valuestr = ExtractSetVariableArgs(sstmt);
+
+ if (valuestr)
+ a = GUCArrayAdd(a, sstmt->name, valuestr);
+ else /* RESET */
+ a = GUCArrayDelete(a, sstmt->name);
+ }
+ }
+
+ return a;
+}
+
+static Oid
+interpret_func_support(DefElem *defel)
+{
+ List *procName = defGetQualifiedName(defel);
+ Oid procOid;
+ Oid argList[1];
+
+ /*
+ * Support functions always take one INTERNAL argument and return
+ * INTERNAL.
+ */
+ argList[0] = INTERNALOID;
+
+ procOid = LookupFuncName(procName, 1, argList, true);
+ if (!OidIsValid(procOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("function %s does not exist",
+ func_signature_string(procName, 1, NIL, argList))));
+
+ if (get_func_rettype(procOid) != INTERNALOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("support function %s must return type %s",
+ NameListToString(procName), "internal")));
+
+ /*
+ * Someday we might want an ACL check here; but for now, we insist that
+ * you be superuser to specify a support function, so privilege on the
+ * support function is moot.
+ */
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to specify a support function")));
+
+ return procOid;
+}
+
+
+/*
+ * Dissect the list of options assembled in gram.y into function
+ * attributes.
+ */
+static void
+compute_function_attributes(ParseState *pstate,
+ bool is_procedure,
+ List *options,
+ List **as,
+ char **language,
+ Node **transform,
+ bool *windowfunc_p,
+ char *volatility_p,
+ bool *strict_p,
+ bool *security_definer,
+ bool *leakproof_p,
+ ArrayType **proconfig,
+ float4 *procost,
+ float4 *prorows,
+ Oid *prosupport,
+ char *parallel_p)
+{
+ ListCell *option;
+ DefElem *as_item = NULL;
+ DefElem *language_item = NULL;
+ DefElem *transform_item = NULL;
+ DefElem *windowfunc_item = NULL;
+ DefElem *volatility_item = NULL;
+ DefElem *strict_item = NULL;
+ DefElem *security_item = NULL;
+ DefElem *leakproof_item = NULL;
+ List *set_items = NIL;
+ DefElem *cost_item = NULL;
+ DefElem *rows_item = NULL;
+ DefElem *support_item = NULL;
+ DefElem *parallel_item = NULL;
+
+ foreach(option, options)
+ {
+ DefElem *defel = (DefElem *) lfirst(option);
+
+ if (strcmp(defel->defname, "as") == 0)
+ {
+ if (as_item)
+ errorConflictingDefElem(defel, pstate);
+ as_item = defel;
+ }
+ else if (strcmp(defel->defname, "language") == 0)
+ {
+ if (language_item)
+ errorConflictingDefElem(defel, pstate);
+ language_item = defel;
+ }
+ else if (strcmp(defel->defname, "transform") == 0)
+ {
+ if (transform_item)
+ errorConflictingDefElem(defel, pstate);
+ transform_item = defel;
+ }
+ else if (strcmp(defel->defname, "window") == 0)
+ {
+ if (windowfunc_item)
+ errorConflictingDefElem(defel, pstate);
+ if (is_procedure)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("invalid attribute in procedure definition"),
+ parser_errposition(pstate, defel->location)));
+ windowfunc_item = defel;
+ }
+ else if (compute_common_attribute(pstate,
+ is_procedure,
+ defel,
+ &volatility_item,
+ &strict_item,
+ &security_item,
+ &leakproof_item,
+ &set_items,
+ &cost_item,
+ &rows_item,
+ &support_item,
+ &parallel_item))
+ {
+ /* recognized common option */
+ continue;
+ }
+ else
+ elog(ERROR, "option \"%s\" not recognized",
+ defel->defname);
+ }
+
+ if (as_item)
+ *as = (List *) as_item->arg;
+ if (language_item)
+ *language = strVal(language_item->arg);
+ if (transform_item)
+ *transform = transform_item->arg;
+ if (windowfunc_item)
+ *windowfunc_p = boolVal(windowfunc_item->arg);
+ if (volatility_item)
+ *volatility_p = interpret_func_volatility(volatility_item);
+ if (strict_item)
+ *strict_p = boolVal(strict_item->arg);
+ if (security_item)
+ *security_definer = boolVal(security_item->arg);
+ if (leakproof_item)
+ *leakproof_p = boolVal(leakproof_item->arg);
+ if (set_items)
+ *proconfig = update_proconfig_value(NULL, set_items);
+ if (cost_item)
+ {
+ *procost = defGetNumeric(cost_item);
+ if (*procost <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("COST must be positive")));
+ }
+ if (rows_item)
+ {
+ *prorows = defGetNumeric(rows_item);
+ if (*prorows <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("ROWS must be positive")));
+ }
+ if (support_item)
+ *prosupport = interpret_func_support(support_item);
+ if (parallel_item)
+ *parallel_p = interpret_func_parallel(parallel_item);
+}
+
+
+/*
+ * For a dynamically linked C language object, the form of the clause is
+ *
+ * AS <object file name> [, <link symbol name> ]
+ *
+ * In all other cases
+ *
+ * AS <object reference, or sql code>
+ */
+static void
+interpret_AS_clause(Oid languageOid, const char *languageName,
+ char *funcname, List *as, Node *sql_body_in,
+ List *parameterTypes, List *inParameterNames,
+ char **prosrc_str_p, char **probin_str_p,
+ Node **sql_body_out,
+ const char *queryString)
+{
+ if (!sql_body_in && !as)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("no function body specified")));
+
+ if (sql_body_in && as)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("duplicate function body specified")));
+
+ if (sql_body_in && languageOid != SQLlanguageId)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("inline SQL function body only valid for language SQL")));
+
+ *sql_body_out = NULL;
+
+ if (languageOid == ClanguageId)
+ {
+ /*
+ * For "C" language, store the file name in probin and, when given,
+ * the link symbol name in prosrc. If link symbol is omitted,
+ * substitute procedure name. We also allow link symbol to be
+ * specified as "-", since that was the habit in PG versions before
+ * 8.4, and there might be dump files out there that don't translate
+ * that back to "omitted".
+ */
+ *probin_str_p = strVal(linitial(as));
+ if (list_length(as) == 1)
+ *prosrc_str_p = funcname;
+ else
+ {
+ *prosrc_str_p = strVal(lsecond(as));
+ if (strcmp(*prosrc_str_p, "-") == 0)
+ *prosrc_str_p = funcname;
+ }
+ }
+ else if (sql_body_in)
+ {
+ SQLFunctionParseInfoPtr pinfo;
+
+ pinfo = (SQLFunctionParseInfoPtr) palloc0(sizeof(SQLFunctionParseInfo));
+
+ pinfo->fname = funcname;
+ pinfo->nargs = list_length(parameterTypes);
+ pinfo->argtypes = (Oid *) palloc(pinfo->nargs * sizeof(Oid));
+ pinfo->argnames = (char **) palloc(pinfo->nargs * sizeof(char *));
+ for (int i = 0; i < list_length(parameterTypes); i++)
+ {
+ char *s = strVal(list_nth(inParameterNames, i));
+
+ pinfo->argtypes[i] = list_nth_oid(parameterTypes, i);
+ if (IsPolymorphicType(pinfo->argtypes[i]))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("SQL function with unquoted function body cannot have polymorphic arguments")));
+
+ if (s[0] != '\0')
+ pinfo->argnames[i] = s;
+ else
+ pinfo->argnames[i] = NULL;
+ }
+
+ if (IsA(sql_body_in, List))
+ {
+ List *stmts = linitial_node(List, castNode(List, sql_body_in));
+ ListCell *lc;
+ List *transformed_stmts = NIL;
+
+ foreach(lc, stmts)
+ {
+ Node *stmt = lfirst(lc);
+ Query *q;
+ ParseState *pstate = make_parsestate(NULL);
+
+ pstate->p_sourcetext = queryString;
+ sql_fn_parser_setup(pstate, pinfo);
+ q = transformStmt(pstate, stmt);
+ if (q->commandType == CMD_UTILITY)
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("%s is not yet supported in unquoted SQL function body",
+ GetCommandTagName(CreateCommandTag(q->utilityStmt))));
+ transformed_stmts = lappend(transformed_stmts, q);
+ free_parsestate(pstate);
+ }
+
+ *sql_body_out = (Node *) list_make1(transformed_stmts);
+ }
+ else
+ {
+ Query *q;
+ ParseState *pstate = make_parsestate(NULL);
+
+ pstate->p_sourcetext = queryString;
+ sql_fn_parser_setup(pstate, pinfo);
+ q = transformStmt(pstate, sql_body_in);
+ if (q->commandType == CMD_UTILITY)
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("%s is not yet supported in unquoted SQL function body",
+ GetCommandTagName(CreateCommandTag(q->utilityStmt))));
+ free_parsestate(pstate);
+
+ *sql_body_out = (Node *) q;
+ }
+
+ /*
+ * We must put something in prosrc. For the moment, just record an
+ * empty string. It might be useful to store the original text of the
+ * CREATE FUNCTION statement --- but to make actual use of that in
+ * error reports, we'd also have to adjust readfuncs.c to not throw
+ * away node location fields when reading prosqlbody.
+ */
+ *prosrc_str_p = pstrdup("");
+
+ /* But we definitely don't need probin. */
+ *probin_str_p = NULL;
+ }
+ else
+ {
+ /* Everything else wants the given string in prosrc. */
+ *prosrc_str_p = strVal(linitial(as));
+ *probin_str_p = NULL;
+
+ if (list_length(as) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("only one AS item needed for language \"%s\"",
+ languageName)));
+
+ if (languageOid == INTERNALlanguageId)
+ {
+ /*
+ * In PostgreSQL versions before 6.5, the SQL name of the created
+ * function could not be different from the internal name, and
+ * "prosrc" wasn't used. So there is code out there that does
+ * CREATE FUNCTION xyz AS '' LANGUAGE internal. To preserve some
+ * modicum of backwards compatibility, accept an empty "prosrc"
+ * value as meaning the supplied SQL function name.
+ */
+ if (strlen(*prosrc_str_p) == 0)
+ *prosrc_str_p = funcname;
+ }
+ }
+}
+
+
+/*
+ * CreateFunction
+ * Execute a CREATE FUNCTION (or CREATE PROCEDURE) utility statement.
+ */
+ObjectAddress
+CreateFunction(ParseState *pstate, CreateFunctionStmt *stmt)
+{
+ char *probin_str;
+ char *prosrc_str;
+ Node *prosqlbody;
+ Oid prorettype;
+ bool returnsSet;
+ char *language;
+ Oid languageOid;
+ Oid languageValidator;
+ Node *transformDefElem = NULL;
+ char *funcname;
+ Oid namespaceId;
+ AclResult aclresult;
+ oidvector *parameterTypes;
+ List *parameterTypes_list = NIL;
+ ArrayType *allParameterTypes;
+ ArrayType *parameterModes;
+ ArrayType *parameterNames;
+ List *inParameterNames_list = NIL;
+ List *parameterDefaults;
+ Oid variadicArgType;
+ List *trftypes_list = NIL;
+ ArrayType *trftypes;
+ Oid requiredResultType;
+ bool isWindowFunc,
+ isStrict,
+ security,
+ isLeakProof;
+ char volatility;
+ ArrayType *proconfig;
+ float4 procost;
+ float4 prorows;
+ Oid prosupport;
+ HeapTuple languageTuple;
+ Form_pg_language languageStruct;
+ List *as_clause;
+ char parallel;
+
+ /* Convert list of names to a name and namespace */
+ namespaceId = QualifiedNameGetCreationNamespace(stmt->funcname,
+ &funcname);
+
+ /* Check we have creation rights in target namespace */
+ aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(namespaceId));
+
+ /* Set default attributes */
+ as_clause = NIL;
+ language = NULL;
+ isWindowFunc = false;
+ isStrict = false;
+ security = false;
+ isLeakProof = false;
+ volatility = PROVOLATILE_VOLATILE;
+ proconfig = NULL;
+ procost = -1; /* indicates not set */
+ prorows = -1; /* indicates not set */
+ prosupport = InvalidOid;
+ parallel = PROPARALLEL_UNSAFE;
+
+ /* Extract non-default attributes from stmt->options list */
+ compute_function_attributes(pstate,
+ stmt->is_procedure,
+ stmt->options,
+ &as_clause, &language, &transformDefElem,
+ &isWindowFunc, &volatility,
+ &isStrict, &security, &isLeakProof,
+ &proconfig, &procost, &prorows,
+ &prosupport, &parallel);
+
+ if (!language)
+ {
+ if (stmt->sql_body)
+ language = "sql";
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("no language specified")));
+ }
+
+ /* Look up the language and validate permissions */
+ languageTuple = SearchSysCache1(LANGNAME, PointerGetDatum(language));
+ if (!HeapTupleIsValid(languageTuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("language \"%s\" does not exist", language),
+ (extension_file_exists(language) ?
+ errhint("Use CREATE EXTENSION to load the language into the database.") : 0)));
+
+ languageStruct = (Form_pg_language) GETSTRUCT(languageTuple);
+ languageOid = languageStruct->oid;
+
+ if (languageStruct->lanpltrusted)
+ {
+ /* if trusted language, need USAGE privilege */
+ AclResult aclresult;
+
+ aclresult = pg_language_aclcheck(languageOid, GetUserId(), ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_LANGUAGE,
+ NameStr(languageStruct->lanname));
+ }
+ else
+ {
+ /* if untrusted language, must be superuser */
+ if (!superuser())
+ aclcheck_error(ACLCHECK_NO_PRIV, OBJECT_LANGUAGE,
+ NameStr(languageStruct->lanname));
+ }
+
+ languageValidator = languageStruct->lanvalidator;
+
+ ReleaseSysCache(languageTuple);
+
+ /*
+ * Only superuser is allowed to create leakproof functions because
+ * leakproof functions can see tuples which have not yet been filtered out
+ * by security barrier views or row-level security policies.
+ */
+ if (isLeakProof && !superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("only superuser can define a leakproof function")));
+
+ if (transformDefElem)
+ {
+ ListCell *lc;
+
+ foreach(lc, castNode(List, transformDefElem))
+ {
+ Oid typeid = typenameTypeId(NULL,
+ lfirst_node(TypeName, lc));
+ Oid elt = get_base_element_type(typeid);
+
+ typeid = elt ? elt : typeid;
+
+ get_transform_oid(typeid, languageOid, false);
+ trftypes_list = lappend_oid(trftypes_list, typeid);
+ }
+ }
+
+ /*
+ * Convert remaining parameters of CREATE to form wanted by
+ * ProcedureCreate.
+ */
+ interpret_function_parameter_list(pstate,
+ stmt->parameters,
+ languageOid,
+ stmt->is_procedure ? OBJECT_PROCEDURE : OBJECT_FUNCTION,
+ &parameterTypes,
+ &parameterTypes_list,
+ &allParameterTypes,
+ &parameterModes,
+ &parameterNames,
+ &inParameterNames_list,
+ &parameterDefaults,
+ &variadicArgType,
+ &requiredResultType);
+
+ if (stmt->is_procedure)
+ {
+ Assert(!stmt->returnType);
+ prorettype = requiredResultType ? requiredResultType : VOIDOID;
+ returnsSet = false;
+ }
+ else if (stmt->returnType)
+ {
+ /* explicit RETURNS clause */
+ compute_return_type(stmt->returnType, languageOid,
+ &prorettype, &returnsSet);
+ if (OidIsValid(requiredResultType) && prorettype != requiredResultType)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("function result type must be %s because of OUT parameters",
+ format_type_be(requiredResultType))));
+ }
+ else if (OidIsValid(requiredResultType))
+ {
+ /* default RETURNS clause from OUT parameters */
+ prorettype = requiredResultType;
+ returnsSet = false;
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("function result type must be specified")));
+ /* Alternative possibility: default to RETURNS VOID */
+ prorettype = VOIDOID;
+ returnsSet = false;
+ }
+
+ if (list_length(trftypes_list) > 0)
+ {
+ ListCell *lc;
+ Datum *arr;
+ int i;
+
+ arr = palloc(list_length(trftypes_list) * sizeof(Datum));
+ i = 0;
+ foreach(lc, trftypes_list)
+ arr[i++] = ObjectIdGetDatum(lfirst_oid(lc));
+ trftypes = construct_array(arr, list_length(trftypes_list),
+ OIDOID, sizeof(Oid), true, TYPALIGN_INT);
+ }
+ else
+ {
+ /* store SQL NULL instead of empty array */
+ trftypes = NULL;
+ }
+
+ interpret_AS_clause(languageOid, language, funcname, as_clause, stmt->sql_body,
+ parameterTypes_list, inParameterNames_list,
+ &prosrc_str, &probin_str, &prosqlbody,
+ pstate->p_sourcetext);
+
+ /*
+ * Set default values for COST and ROWS depending on other parameters;
+ * reject ROWS if it's not returnsSet. NB: pg_dump knows these default
+ * values, keep it in sync if you change them.
+ */
+ if (procost < 0)
+ {
+ /* SQL and PL-language functions are assumed more expensive */
+ if (languageOid == INTERNALlanguageId ||
+ languageOid == ClanguageId)
+ procost = 1;
+ else
+ procost = 100;
+ }
+ if (prorows < 0)
+ {
+ if (returnsSet)
+ prorows = 1000;
+ else
+ prorows = 0; /* dummy value if not returnsSet */
+ }
+ else if (!returnsSet)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("ROWS is not applicable when function does not return a set")));
+
+ /*
+ * And now that we have all the parameters, and know we're permitted to do
+ * so, go ahead and create the function.
+ */
+ return ProcedureCreate(funcname,
+ namespaceId,
+ stmt->replace,
+ returnsSet,
+ prorettype,
+ GetUserId(),
+ languageOid,
+ languageValidator,
+ prosrc_str, /* converted to text later */
+ probin_str, /* converted to text later */
+ prosqlbody,
+ stmt->is_procedure ? PROKIND_PROCEDURE : (isWindowFunc ? PROKIND_WINDOW : PROKIND_FUNCTION),
+ security,
+ isLeakProof,
+ isStrict,
+ volatility,
+ parallel,
+ parameterTypes,
+ PointerGetDatum(allParameterTypes),
+ PointerGetDatum(parameterModes),
+ PointerGetDatum(parameterNames),
+ parameterDefaults,
+ PointerGetDatum(trftypes),
+ PointerGetDatum(proconfig),
+ prosupport,
+ procost,
+ prorows);
+}
+
+/*
+ * Guts of function deletion.
+ *
+ * Note: this is also used for aggregate deletion, since the OIDs of
+ * both functions and aggregates point to pg_proc.
+ */
+void
+RemoveFunctionById(Oid funcOid)
+{
+ Relation relation;
+ HeapTuple tup;
+ char prokind;
+
+ /*
+ * Delete the pg_proc tuple.
+ */
+ relation = table_open(ProcedureRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcOid));
+ if (!HeapTupleIsValid(tup)) /* should not happen */
+ elog(ERROR, "cache lookup failed for function %u", funcOid);
+
+ prokind = ((Form_pg_proc) GETSTRUCT(tup))->prokind;
+
+ CatalogTupleDelete(relation, &tup->t_self);
+
+ ReleaseSysCache(tup);
+
+ table_close(relation, RowExclusiveLock);
+
+ pgstat_drop_function(funcOid);
+
+ /*
+ * If there's a pg_aggregate tuple, delete that too.
+ */
+ if (prokind == PROKIND_AGGREGATE)
+ {
+ relation = table_open(AggregateRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(funcOid));
+ if (!HeapTupleIsValid(tup)) /* should not happen */
+ elog(ERROR, "cache lookup failed for pg_aggregate tuple for function %u", funcOid);
+
+ CatalogTupleDelete(relation, &tup->t_self);
+
+ ReleaseSysCache(tup);
+
+ table_close(relation, RowExclusiveLock);
+ }
+}
+
+/*
+ * Implements the ALTER FUNCTION utility command (except for the
+ * RENAME and OWNER clauses, which are handled as part of the generic
+ * ALTER framework).
+ */
+ObjectAddress
+AlterFunction(ParseState *pstate, AlterFunctionStmt *stmt)
+{
+ HeapTuple tup;
+ Oid funcOid;
+ Form_pg_proc procForm;
+ bool is_procedure;
+ Relation rel;
+ ListCell *l;
+ DefElem *volatility_item = NULL;
+ DefElem *strict_item = NULL;
+ DefElem *security_def_item = NULL;
+ DefElem *leakproof_item = NULL;
+ List *set_items = NIL;
+ DefElem *cost_item = NULL;
+ DefElem *rows_item = NULL;
+ DefElem *support_item = NULL;
+ DefElem *parallel_item = NULL;
+ ObjectAddress address;
+
+ rel = table_open(ProcedureRelationId, RowExclusiveLock);
+
+ funcOid = LookupFuncWithArgs(stmt->objtype, stmt->func, false);
+
+ ObjectAddressSet(address, ProcedureRelationId, funcOid);
+
+ tup = SearchSysCacheCopy1(PROCOID, ObjectIdGetDatum(funcOid));
+ if (!HeapTupleIsValid(tup)) /* should not happen */
+ elog(ERROR, "cache lookup failed for function %u", funcOid);
+
+ procForm = (Form_pg_proc) GETSTRUCT(tup);
+
+ /* Permission check: must own function */
+ if (!pg_proc_ownercheck(funcOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, stmt->objtype,
+ NameListToString(stmt->func->objname));
+
+ if (procForm->prokind == PROKIND_AGGREGATE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is an aggregate function",
+ NameListToString(stmt->func->objname))));
+
+ is_procedure = (procForm->prokind == PROKIND_PROCEDURE);
+
+ /* Examine requested actions. */
+ foreach(l, stmt->actions)
+ {
+ DefElem *defel = (DefElem *) lfirst(l);
+
+ if (compute_common_attribute(pstate,
+ is_procedure,
+ defel,
+ &volatility_item,
+ &strict_item,
+ &security_def_item,
+ &leakproof_item,
+ &set_items,
+ &cost_item,
+ &rows_item,
+ &support_item,
+ &parallel_item) == false)
+ elog(ERROR, "option \"%s\" not recognized", defel->defname);
+ }
+
+ if (volatility_item)
+ procForm->provolatile = interpret_func_volatility(volatility_item);
+ if (strict_item)
+ procForm->proisstrict = boolVal(strict_item->arg);
+ if (security_def_item)
+ procForm->prosecdef = boolVal(security_def_item->arg);
+ if (leakproof_item)
+ {
+ procForm->proleakproof = boolVal(leakproof_item->arg);
+ if (procForm->proleakproof && !superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("only superuser can define a leakproof function")));
+ }
+ if (cost_item)
+ {
+ procForm->procost = defGetNumeric(cost_item);
+ if (procForm->procost <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("COST must be positive")));
+ }
+ if (rows_item)
+ {
+ procForm->prorows = defGetNumeric(rows_item);
+ if (procForm->prorows <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("ROWS must be positive")));
+ if (!procForm->proretset)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("ROWS is not applicable when function does not return a set")));
+ }
+ if (support_item)
+ {
+ /* interpret_func_support handles the privilege check */
+ Oid newsupport = interpret_func_support(support_item);
+
+ /* Add or replace dependency on support function */
+ if (OidIsValid(procForm->prosupport))
+ changeDependencyFor(ProcedureRelationId, funcOid,
+ ProcedureRelationId, procForm->prosupport,
+ newsupport);
+ else
+ {
+ ObjectAddress referenced;
+
+ referenced.classId = ProcedureRelationId;
+ referenced.objectId = newsupport;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&address, &referenced, DEPENDENCY_NORMAL);
+ }
+
+ procForm->prosupport = newsupport;
+ }
+ if (parallel_item)
+ procForm->proparallel = interpret_func_parallel(parallel_item);
+ if (set_items)
+ {
+ Datum datum;
+ bool isnull;
+ ArrayType *a;
+ Datum repl_val[Natts_pg_proc];
+ bool repl_null[Natts_pg_proc];
+ bool repl_repl[Natts_pg_proc];
+
+ /* extract existing proconfig setting */
+ datum = SysCacheGetAttr(PROCOID, tup, Anum_pg_proc_proconfig, &isnull);
+ a = isnull ? NULL : DatumGetArrayTypeP(datum);
+
+ /* update according to each SET or RESET item, left to right */
+ a = update_proconfig_value(a, set_items);
+
+ /* update the tuple */
+ memset(repl_repl, false, sizeof(repl_repl));
+ repl_repl[Anum_pg_proc_proconfig - 1] = true;
+
+ if (a == NULL)
+ {
+ repl_val[Anum_pg_proc_proconfig - 1] = (Datum) 0;
+ repl_null[Anum_pg_proc_proconfig - 1] = true;
+ }
+ else
+ {
+ repl_val[Anum_pg_proc_proconfig - 1] = PointerGetDatum(a);
+ repl_null[Anum_pg_proc_proconfig - 1] = false;
+ }
+
+ tup = heap_modify_tuple(tup, RelationGetDescr(rel),
+ repl_val, repl_null, repl_repl);
+ }
+ /* DO NOT put more touches of procForm below here; it's now dangling. */
+
+ /* Do the update */
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+ InvokeObjectPostAlterHook(ProcedureRelationId, funcOid, 0);
+
+ table_close(rel, NoLock);
+ heap_freetuple(tup);
+
+ return address;
+}
+
+
+/*
+ * CREATE CAST
+ */
+ObjectAddress
+CreateCast(CreateCastStmt *stmt)
+{
+ Oid sourcetypeid;
+ Oid targettypeid;
+ char sourcetyptype;
+ char targettyptype;
+ Oid funcid;
+ int nargs;
+ char castcontext;
+ char castmethod;
+ HeapTuple tuple;
+ AclResult aclresult;
+ ObjectAddress myself;
+
+ sourcetypeid = typenameTypeId(NULL, stmt->sourcetype);
+ targettypeid = typenameTypeId(NULL, stmt->targettype);
+ sourcetyptype = get_typtype(sourcetypeid);
+ targettyptype = get_typtype(targettypeid);
+
+ /* No pseudo-types allowed */
+ if (sourcetyptype == TYPTYPE_PSEUDO)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("source data type %s is a pseudo-type",
+ TypeNameToString(stmt->sourcetype))));
+
+ if (targettyptype == TYPTYPE_PSEUDO)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("target data type %s is a pseudo-type",
+ TypeNameToString(stmt->targettype))));
+
+ /* Permission check */
+ if (!pg_type_ownercheck(sourcetypeid, GetUserId())
+ && !pg_type_ownercheck(targettypeid, GetUserId()))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be owner of type %s or type %s",
+ format_type_be(sourcetypeid),
+ format_type_be(targettypeid))));
+
+ aclresult = pg_type_aclcheck(sourcetypeid, GetUserId(), ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error_type(aclresult, sourcetypeid);
+
+ aclresult = pg_type_aclcheck(targettypeid, GetUserId(), ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error_type(aclresult, targettypeid);
+
+ /* Domains are allowed for historical reasons, but we warn */
+ if (sourcetyptype == TYPTYPE_DOMAIN)
+ ereport(WARNING,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cast will be ignored because the source data type is a domain")));
+
+ else if (targettyptype == TYPTYPE_DOMAIN)
+ ereport(WARNING,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cast will be ignored because the target data type is a domain")));
+
+ /* Determine the cast method */
+ if (stmt->func != NULL)
+ castmethod = COERCION_METHOD_FUNCTION;
+ else if (stmt->inout)
+ castmethod = COERCION_METHOD_INOUT;
+ else
+ castmethod = COERCION_METHOD_BINARY;
+
+ if (castmethod == COERCION_METHOD_FUNCTION)
+ {
+ Form_pg_proc procstruct;
+
+ funcid = LookupFuncWithArgs(OBJECT_FUNCTION, stmt->func, false);
+
+ tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for function %u", funcid);
+
+ procstruct = (Form_pg_proc) GETSTRUCT(tuple);
+ nargs = procstruct->pronargs;
+ if (nargs < 1 || nargs > 3)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cast function must take one to three arguments")));
+ if (!IsBinaryCoercible(sourcetypeid, procstruct->proargtypes.values[0]))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("argument of cast function must match or be binary-coercible from source data type")));
+ if (nargs > 1 && procstruct->proargtypes.values[1] != INT4OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("second argument of cast function must be type %s",
+ "integer")));
+ if (nargs > 2 && procstruct->proargtypes.values[2] != BOOLOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("third argument of cast function must be type %s",
+ "boolean")));
+ if (!IsBinaryCoercible(procstruct->prorettype, targettypeid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("return data type of cast function must match or be binary-coercible to target data type")));
+
+ /*
+ * Restricting the volatility of a cast function may or may not be a
+ * good idea in the abstract, but it definitely breaks many old
+ * user-defined types. Disable this check --- tgl 2/1/03
+ */
+#ifdef NOT_USED
+ if (procstruct->provolatile == PROVOLATILE_VOLATILE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cast function must not be volatile")));
+#endif
+ if (procstruct->prokind != PROKIND_FUNCTION)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cast function must be a normal function")));
+ if (procstruct->proretset)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cast function must not return a set")));
+
+ ReleaseSysCache(tuple);
+ }
+ else
+ {
+ funcid = InvalidOid;
+ nargs = 0;
+ }
+
+ if (castmethod == COERCION_METHOD_BINARY)
+ {
+ int16 typ1len;
+ int16 typ2len;
+ bool typ1byval;
+ bool typ2byval;
+ char typ1align;
+ char typ2align;
+
+ /*
+ * Must be superuser to create binary-compatible casts, since
+ * erroneous casts can easily crash the backend.
+ */
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to create a cast WITHOUT FUNCTION")));
+
+ /*
+ * Also, insist that the types match as to size, alignment, and
+ * pass-by-value attributes; this provides at least a crude check that
+ * they have similar representations. A pair of types that fail this
+ * test should certainly not be equated.
+ */
+ get_typlenbyvalalign(sourcetypeid, &typ1len, &typ1byval, &typ1align);
+ get_typlenbyvalalign(targettypeid, &typ2len, &typ2byval, &typ2align);
+ if (typ1len != typ2len ||
+ typ1byval != typ2byval ||
+ typ1align != typ2align)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("source and target data types are not physically compatible")));
+
+ /*
+ * We know that composite, enum and array types are never binary-
+ * compatible with each other. They all have OIDs embedded in them.
+ *
+ * Theoretically you could build a user-defined base type that is
+ * binary-compatible with a composite, enum, or array type. But we
+ * disallow that too, as in practice such a cast is surely a mistake.
+ * You can always work around that by writing a cast function.
+ */
+ if (sourcetyptype == TYPTYPE_COMPOSITE ||
+ targettyptype == TYPTYPE_COMPOSITE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("composite data types are not binary-compatible")));
+
+ if (sourcetyptype == TYPTYPE_ENUM ||
+ targettyptype == TYPTYPE_ENUM)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("enum data types are not binary-compatible")));
+
+ if (OidIsValid(get_element_type(sourcetypeid)) ||
+ OidIsValid(get_element_type(targettypeid)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("array data types are not binary-compatible")));
+
+ /*
+ * We also disallow creating binary-compatibility casts involving
+ * domains. Casting from a domain to its base type is already
+ * allowed, and casting the other way ought to go through domain
+ * coercion to permit constraint checking. Again, if you're intent on
+ * having your own semantics for that, create a no-op cast function.
+ *
+ * NOTE: if we were to relax this, the above checks for composites
+ * etc. would have to be modified to look through domains to their
+ * base types.
+ */
+ if (sourcetyptype == TYPTYPE_DOMAIN ||
+ targettyptype == TYPTYPE_DOMAIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("domain data types must not be marked binary-compatible")));
+ }
+
+ /*
+ * Allow source and target types to be same only for length coercion
+ * functions. We assume a multi-arg function does length coercion.
+ */
+ if (sourcetypeid == targettypeid && nargs < 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("source data type and target data type are the same")));
+
+ /* convert CoercionContext enum to char value for castcontext */
+ switch (stmt->context)
+ {
+ case COERCION_IMPLICIT:
+ castcontext = COERCION_CODE_IMPLICIT;
+ break;
+ case COERCION_ASSIGNMENT:
+ castcontext = COERCION_CODE_ASSIGNMENT;
+ break;
+ /* COERCION_PLPGSQL is intentionally not covered here */
+ case COERCION_EXPLICIT:
+ castcontext = COERCION_CODE_EXPLICIT;
+ break;
+ default:
+ elog(ERROR, "unrecognized CoercionContext: %d", stmt->context);
+ castcontext = 0; /* keep compiler quiet */
+ break;
+ }
+
+ myself = CastCreate(sourcetypeid, targettypeid, funcid, castcontext,
+ castmethod, DEPENDENCY_NORMAL);
+ return myself;
+}
+
+
+static void
+check_transform_function(Form_pg_proc procstruct)
+{
+ if (procstruct->provolatile == PROVOLATILE_VOLATILE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("transform function must not be volatile")));
+ if (procstruct->prokind != PROKIND_FUNCTION)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("transform function must be a normal function")));
+ if (procstruct->proretset)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("transform function must not return a set")));
+ if (procstruct->pronargs != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("transform function must take one argument")));
+ if (procstruct->proargtypes.values[0] != INTERNALOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("first argument of transform function must be type %s",
+ "internal")));
+}
+
+
+/*
+ * CREATE TRANSFORM
+ */
+ObjectAddress
+CreateTransform(CreateTransformStmt *stmt)
+{
+ Oid typeid;
+ char typtype;
+ Oid langid;
+ Oid fromsqlfuncid;
+ Oid tosqlfuncid;
+ AclResult aclresult;
+ Form_pg_proc procstruct;
+ Datum values[Natts_pg_transform];
+ bool nulls[Natts_pg_transform];
+ bool replaces[Natts_pg_transform];
+ Oid transformid;
+ HeapTuple tuple;
+ HeapTuple newtuple;
+ Relation relation;
+ ObjectAddress myself,
+ referenced;
+ ObjectAddresses *addrs;
+ bool is_replace;
+
+ /*
+ * Get the type
+ */
+ typeid = typenameTypeId(NULL, stmt->type_name);
+ typtype = get_typtype(typeid);
+
+ if (typtype == TYPTYPE_PSEUDO)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("data type %s is a pseudo-type",
+ TypeNameToString(stmt->type_name))));
+
+ if (typtype == TYPTYPE_DOMAIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("data type %s is a domain",
+ TypeNameToString(stmt->type_name))));
+
+ if (!pg_type_ownercheck(typeid, GetUserId()))
+ aclcheck_error_type(ACLCHECK_NOT_OWNER, typeid);
+
+ aclresult = pg_type_aclcheck(typeid, GetUserId(), ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error_type(aclresult, typeid);
+
+ /*
+ * Get the language
+ */
+ langid = get_language_oid(stmt->lang, false);
+
+ aclresult = pg_language_aclcheck(langid, GetUserId(), ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_LANGUAGE, stmt->lang);
+
+ /*
+ * Get the functions
+ */
+ if (stmt->fromsql)
+ {
+ fromsqlfuncid = LookupFuncWithArgs(OBJECT_FUNCTION, stmt->fromsql, false);
+
+ if (!pg_proc_ownercheck(fromsqlfuncid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, NameListToString(stmt->fromsql->objname));
+
+ aclresult = pg_proc_aclcheck(fromsqlfuncid, GetUserId(), ACL_EXECUTE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_FUNCTION, NameListToString(stmt->fromsql->objname));
+
+ tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(fromsqlfuncid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for function %u", fromsqlfuncid);
+ procstruct = (Form_pg_proc) GETSTRUCT(tuple);
+ if (procstruct->prorettype != INTERNALOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("return data type of FROM SQL function must be %s",
+ "internal")));
+ check_transform_function(procstruct);
+ ReleaseSysCache(tuple);
+ }
+ else
+ fromsqlfuncid = InvalidOid;
+
+ if (stmt->tosql)
+ {
+ tosqlfuncid = LookupFuncWithArgs(OBJECT_FUNCTION, stmt->tosql, false);
+
+ if (!pg_proc_ownercheck(tosqlfuncid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, NameListToString(stmt->tosql->objname));
+
+ aclresult = pg_proc_aclcheck(tosqlfuncid, GetUserId(), ACL_EXECUTE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_FUNCTION, NameListToString(stmt->tosql->objname));
+
+ tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(tosqlfuncid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for function %u", tosqlfuncid);
+ procstruct = (Form_pg_proc) GETSTRUCT(tuple);
+ if (procstruct->prorettype != typeid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("return data type of TO SQL function must be the transform data type")));
+ check_transform_function(procstruct);
+ ReleaseSysCache(tuple);
+ }
+ else
+ tosqlfuncid = InvalidOid;
+
+ /*
+ * Ready to go
+ */
+ values[Anum_pg_transform_trftype - 1] = ObjectIdGetDatum(typeid);
+ values[Anum_pg_transform_trflang - 1] = ObjectIdGetDatum(langid);
+ values[Anum_pg_transform_trffromsql - 1] = ObjectIdGetDatum(fromsqlfuncid);
+ values[Anum_pg_transform_trftosql - 1] = ObjectIdGetDatum(tosqlfuncid);
+
+ MemSet(nulls, false, sizeof(nulls));
+
+ relation = table_open(TransformRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCache2(TRFTYPELANG,
+ ObjectIdGetDatum(typeid),
+ ObjectIdGetDatum(langid));
+ if (HeapTupleIsValid(tuple))
+ {
+ Form_pg_transform form = (Form_pg_transform) GETSTRUCT(tuple);
+
+ if (!stmt->replace)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("transform for type %s language \"%s\" already exists",
+ format_type_be(typeid),
+ stmt->lang)));
+
+ MemSet(replaces, false, sizeof(replaces));
+ replaces[Anum_pg_transform_trffromsql - 1] = true;
+ replaces[Anum_pg_transform_trftosql - 1] = true;
+
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(relation), values, nulls, replaces);
+ CatalogTupleUpdate(relation, &newtuple->t_self, newtuple);
+
+ transformid = form->oid;
+ ReleaseSysCache(tuple);
+ is_replace = true;
+ }
+ else
+ {
+ transformid = GetNewOidWithIndex(relation, TransformOidIndexId,
+ Anum_pg_transform_oid);
+ values[Anum_pg_transform_oid - 1] = ObjectIdGetDatum(transformid);
+ newtuple = heap_form_tuple(RelationGetDescr(relation), values, nulls);
+ CatalogTupleInsert(relation, newtuple);
+ is_replace = false;
+ }
+
+ if (is_replace)
+ deleteDependencyRecordsFor(TransformRelationId, transformid, true);
+
+ addrs = new_object_addresses();
+
+ /* make dependency entries */
+ ObjectAddressSet(myself, TransformRelationId, transformid);
+
+ /* dependency on language */
+ ObjectAddressSet(referenced, LanguageRelationId, langid);
+ add_exact_object_address(&referenced, addrs);
+
+ /* dependency on type */
+ ObjectAddressSet(referenced, TypeRelationId, typeid);
+ add_exact_object_address(&referenced, addrs);
+
+ /* dependencies on functions */
+ if (OidIsValid(fromsqlfuncid))
+ {
+ ObjectAddressSet(referenced, ProcedureRelationId, fromsqlfuncid);
+ add_exact_object_address(&referenced, addrs);
+ }
+ if (OidIsValid(tosqlfuncid))
+ {
+ ObjectAddressSet(referenced, ProcedureRelationId, tosqlfuncid);
+ add_exact_object_address(&referenced, addrs);
+ }
+
+ record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
+ free_object_addresses(addrs);
+
+ /* dependency on extension */
+ recordDependencyOnCurrentExtension(&myself, is_replace);
+
+ /* Post creation hook for new transform */
+ InvokeObjectPostCreateHook(TransformRelationId, transformid, 0);
+
+ heap_freetuple(newtuple);
+
+ table_close(relation, RowExclusiveLock);
+
+ return myself;
+}
+
+
+/*
+ * get_transform_oid - given type OID and language OID, look up a transform OID
+ *
+ * If missing_ok is false, throw an error if the transform is not found. If
+ * true, just return InvalidOid.
+ */
+Oid
+get_transform_oid(Oid type_id, Oid lang_id, bool missing_ok)
+{
+ Oid oid;
+
+ oid = GetSysCacheOid2(TRFTYPELANG, Anum_pg_transform_oid,
+ ObjectIdGetDatum(type_id),
+ ObjectIdGetDatum(lang_id));
+ if (!OidIsValid(oid) && !missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("transform for type %s language \"%s\" does not exist",
+ format_type_be(type_id),
+ get_language_name(lang_id, false))));
+ return oid;
+}
+
+
+/*
+ * Subroutine for ALTER FUNCTION/AGGREGATE SET SCHEMA/RENAME
+ *
+ * Is there a function with the given name and signature already in the given
+ * namespace? If so, raise an appropriate error message.
+ */
+void
+IsThereFunctionInNamespace(const char *proname, int pronargs,
+ oidvector *proargtypes, Oid nspOid)
+{
+ /* check for duplicate name (more friendly than unique-index failure) */
+ if (SearchSysCacheExists3(PROCNAMEARGSNSP,
+ CStringGetDatum(proname),
+ PointerGetDatum(proargtypes),
+ ObjectIdGetDatum(nspOid)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_FUNCTION),
+ errmsg("function %s already exists in schema \"%s\"",
+ funcname_signature_string(proname, pronargs,
+ NIL, proargtypes->values),
+ get_namespace_name(nspOid))));
+}
+
+/*
+ * ExecuteDoStmt
+ * Execute inline procedural-language code
+ *
+ * See at ExecuteCallStmt() about the atomic argument.
+ */
+void
+ExecuteDoStmt(ParseState *pstate, DoStmt *stmt, bool atomic)
+{
+ InlineCodeBlock *codeblock = makeNode(InlineCodeBlock);
+ ListCell *arg;
+ DefElem *as_item = NULL;
+ DefElem *language_item = NULL;
+ char *language;
+ Oid laninline;
+ HeapTuple languageTuple;
+ Form_pg_language languageStruct;
+
+ /* Process options we got from gram.y */
+ foreach(arg, stmt->args)
+ {
+ DefElem *defel = (DefElem *) lfirst(arg);
+
+ if (strcmp(defel->defname, "as") == 0)
+ {
+ if (as_item)
+ errorConflictingDefElem(defel, pstate);
+ as_item = defel;
+ }
+ else if (strcmp(defel->defname, "language") == 0)
+ {
+ if (language_item)
+ errorConflictingDefElem(defel, pstate);
+ language_item = defel;
+ }
+ else
+ elog(ERROR, "option \"%s\" not recognized",
+ defel->defname);
+ }
+
+ if (as_item)
+ codeblock->source_text = strVal(as_item->arg);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("no inline code specified")));
+
+ /* if LANGUAGE option wasn't specified, use the default */
+ if (language_item)
+ language = strVal(language_item->arg);
+ else
+ language = "plpgsql";
+
+ /* Look up the language and validate permissions */
+ languageTuple = SearchSysCache1(LANGNAME, PointerGetDatum(language));
+ if (!HeapTupleIsValid(languageTuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("language \"%s\" does not exist", language),
+ (extension_file_exists(language) ?
+ errhint("Use CREATE EXTENSION to load the language into the database.") : 0)));
+
+ languageStruct = (Form_pg_language) GETSTRUCT(languageTuple);
+ codeblock->langOid = languageStruct->oid;
+ codeblock->langIsTrusted = languageStruct->lanpltrusted;
+ codeblock->atomic = atomic;
+
+ if (languageStruct->lanpltrusted)
+ {
+ /* if trusted language, need USAGE privilege */
+ AclResult aclresult;
+
+ aclresult = pg_language_aclcheck(codeblock->langOid, GetUserId(),
+ ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_LANGUAGE,
+ NameStr(languageStruct->lanname));
+ }
+ else
+ {
+ /* if untrusted language, must be superuser */
+ if (!superuser())
+ aclcheck_error(ACLCHECK_NO_PRIV, OBJECT_LANGUAGE,
+ NameStr(languageStruct->lanname));
+ }
+
+ /* get the handler function's OID */
+ laninline = languageStruct->laninline;
+ if (!OidIsValid(laninline))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("language \"%s\" does not support inline code execution",
+ NameStr(languageStruct->lanname))));
+
+ ReleaseSysCache(languageTuple);
+
+ /* execute the inline handler */
+ OidFunctionCall1(laninline, PointerGetDatum(codeblock));
+}
+
+/*
+ * Execute CALL statement
+ *
+ * Inside a top-level CALL statement, transaction-terminating commands such as
+ * COMMIT or a PL-specific equivalent are allowed. The terminology in the SQL
+ * standard is that CALL establishes a non-atomic execution context. Most
+ * other commands establish an atomic execution context, in which transaction
+ * control actions are not allowed. If there are nested executions of CALL,
+ * we want to track the execution context recursively, so that the nested
+ * CALLs can also do transaction control. Note, however, that for example in
+ * CALL -> SELECT -> CALL, the second call cannot do transaction control,
+ * because the SELECT in between establishes an atomic execution context.
+ *
+ * So when ExecuteCallStmt() is called from the top level, we pass in atomic =
+ * false (recall that that means transactions = yes). We then create a
+ * CallContext node with content atomic = false, which is passed in the
+ * fcinfo->context field to the procedure invocation. The language
+ * implementation should then take appropriate measures to allow or prevent
+ * transaction commands based on that information, e.g., call
+ * SPI_connect_ext(SPI_OPT_NONATOMIC). The language should also pass on the
+ * atomic flag to any nested invocations to CALL.
+ *
+ * The expression data structures and execution context that we create
+ * within this function are children of the portalContext of the Portal
+ * that the CALL utility statement runs in. Therefore, any pass-by-ref
+ * values that we're passing to the procedure will survive transaction
+ * commits that might occur inside the procedure.
+ */
+void
+ExecuteCallStmt(CallStmt *stmt, ParamListInfo params, bool atomic, DestReceiver *dest)
+{
+ LOCAL_FCINFO(fcinfo, FUNC_MAX_ARGS);
+ ListCell *lc;
+ FuncExpr *fexpr;
+ int nargs;
+ int i;
+ AclResult aclresult;
+ FmgrInfo flinfo;
+ CallContext *callcontext;
+ EState *estate;
+ ExprContext *econtext;
+ HeapTuple tp;
+ PgStat_FunctionCallUsage fcusage;
+ Datum retval;
+
+ fexpr = stmt->funcexpr;
+ Assert(fexpr);
+ Assert(IsA(fexpr, FuncExpr));
+
+ aclresult = pg_proc_aclcheck(fexpr->funcid, GetUserId(), ACL_EXECUTE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_PROCEDURE, get_func_name(fexpr->funcid));
+
+ /* Prep the context object we'll pass to the procedure */
+ callcontext = makeNode(CallContext);
+ callcontext->atomic = atomic;
+
+ tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(fexpr->funcid));
+ if (!HeapTupleIsValid(tp))
+ elog(ERROR, "cache lookup failed for function %u", fexpr->funcid);
+
+ /*
+ * If proconfig is set we can't allow transaction commands because of the
+ * way the GUC stacking works: The transaction boundary would have to pop
+ * the proconfig setting off the stack. That restriction could be lifted
+ * by redesigning the GUC nesting mechanism a bit.
+ */
+ if (!heap_attisnull(tp, Anum_pg_proc_proconfig, NULL))
+ callcontext->atomic = true;
+
+ /*
+ * In security definer procedures, we can't allow transaction commands.
+ * StartTransaction() insists that the security context stack is empty,
+ * and AbortTransaction() resets the security context. This could be
+ * reorganized, but right now it doesn't work.
+ */
+ if (((Form_pg_proc) GETSTRUCT(tp))->prosecdef)
+ callcontext->atomic = true;
+
+ ReleaseSysCache(tp);
+
+ /* safety check; see ExecInitFunc() */
+ nargs = list_length(fexpr->args);
+ if (nargs > FUNC_MAX_ARGS)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_ARGUMENTS),
+ errmsg_plural("cannot pass more than %d argument to a procedure",
+ "cannot pass more than %d arguments to a procedure",
+ FUNC_MAX_ARGS,
+ FUNC_MAX_ARGS)));
+
+ /* Initialize function call structure */
+ InvokeFunctionExecuteHook(fexpr->funcid);
+ fmgr_info(fexpr->funcid, &flinfo);
+ fmgr_info_set_expr((Node *) fexpr, &flinfo);
+ InitFunctionCallInfoData(*fcinfo, &flinfo, nargs, fexpr->inputcollid,
+ (Node *) callcontext, NULL);
+
+ /*
+ * Evaluate procedure arguments inside a suitable execution context. Note
+ * we can't free this context till the procedure returns.
+ */
+ estate = CreateExecutorState();
+ estate->es_param_list_info = params;
+ econtext = CreateExprContext(estate);
+
+ /*
+ * If we're called in non-atomic context, we also have to ensure that the
+ * argument expressions run with an up-to-date snapshot. Our caller will
+ * have provided a current snapshot in atomic contexts, but not in
+ * non-atomic contexts, because the possibility of a COMMIT/ROLLBACK
+ * destroying the snapshot makes higher-level management too complicated.
+ */
+ if (!atomic)
+ PushActiveSnapshot(GetTransactionSnapshot());
+
+ i = 0;
+ foreach(lc, fexpr->args)
+ {
+ ExprState *exprstate;
+ Datum val;
+ bool isnull;
+
+ exprstate = ExecPrepareExpr(lfirst(lc), estate);
+
+ val = ExecEvalExprSwitchContext(exprstate, econtext, &isnull);
+
+ fcinfo->args[i].value = val;
+ fcinfo->args[i].isnull = isnull;
+
+ i++;
+ }
+
+ /* Get rid of temporary snapshot for arguments, if we made one */
+ if (!atomic)
+ PopActiveSnapshot();
+
+ /* Here we actually call the procedure */
+ pgstat_init_function_usage(fcinfo, &fcusage);
+ retval = FunctionCallInvoke(fcinfo);
+ pgstat_end_function_usage(&fcusage, true);
+
+ /* Handle the procedure's outputs */
+ if (fexpr->funcresulttype == VOIDOID)
+ {
+ /* do nothing */
+ }
+ else if (fexpr->funcresulttype == RECORDOID)
+ {
+ /* send tuple to client */
+ HeapTupleHeader td;
+ Oid tupType;
+ int32 tupTypmod;
+ TupleDesc retdesc;
+ HeapTupleData rettupdata;
+ TupOutputState *tstate;
+ TupleTableSlot *slot;
+
+ if (fcinfo->isnull)
+ elog(ERROR, "procedure returned null record");
+
+ /*
+ * Ensure there's an active snapshot whilst we execute whatever's
+ * involved here. Note that this is *not* sufficient to make the
+ * world safe for TOAST pointers to be included in the returned data:
+ * the referenced data could have gone away while we didn't hold a
+ * snapshot. Hence, it's incumbent on PLs that can do COMMIT/ROLLBACK
+ * to not return TOAST pointers, unless those pointers were fetched
+ * after the last COMMIT/ROLLBACK in the procedure.
+ *
+ * XXX that is a really nasty, hard-to-test requirement. Is there a
+ * way to remove it?
+ */
+ EnsurePortalSnapshotExists();
+
+ td = DatumGetHeapTupleHeader(retval);
+ tupType = HeapTupleHeaderGetTypeId(td);
+ tupTypmod = HeapTupleHeaderGetTypMod(td);
+ retdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
+
+ tstate = begin_tup_output_tupdesc(dest, retdesc,
+ &TTSOpsHeapTuple);
+
+ rettupdata.t_len = HeapTupleHeaderGetDatumLength(td);
+ ItemPointerSetInvalid(&(rettupdata.t_self));
+ rettupdata.t_tableOid = InvalidOid;
+ rettupdata.t_data = td;
+
+ slot = ExecStoreHeapTuple(&rettupdata, tstate->slot, false);
+ tstate->dest->receiveSlot(slot, tstate->dest);
+
+ end_tup_output(tstate);
+
+ ReleaseTupleDesc(retdesc);
+ }
+ else
+ elog(ERROR, "unexpected result type for procedure: %u",
+ fexpr->funcresulttype);
+
+ FreeExecutorState(estate);
+}
+
+/*
+ * Construct the tuple descriptor for a CALL statement return
+ */
+TupleDesc
+CallStmtResultDesc(CallStmt *stmt)
+{
+ FuncExpr *fexpr;
+ HeapTuple tuple;
+ TupleDesc tupdesc;
+
+ fexpr = stmt->funcexpr;
+
+ tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(fexpr->funcid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for procedure %u", fexpr->funcid);
+
+ tupdesc = build_function_result_tupdesc_t(tuple);
+
+ ReleaseSysCache(tuple);
+
+ return tupdesc;
+}
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
new file mode 100644
index 0000000..d3f7b09
--- /dev/null
+++ b/src/backend/commands/indexcmds.c
@@ -0,0 +1,4355 @@
+/*-------------------------------------------------------------------------
+ *
+ * indexcmds.c
+ * POSTGRES define and remove index code.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/indexcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/amapi.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/reloptions.h"
+#include "access/sysattr.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/index.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_constraint.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_tablespace.h"
+#include "catalog/pg_type.h"
+#include "commands/comment.h"
+#include "commands/dbcommands.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/progress.h"
+#include "commands/tablecmds.h"
+#include "commands/tablespace.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_func.h"
+#include "parser/parse_oper.h"
+#include "partitioning/partdesc.h"
+#include "pgstat.h"
+#include "rewrite/rewriteManip.h"
+#include "storage/lmgr.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "storage/sinvaladt.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/partcache.h"
+#include "utils/pg_rusage.h"
+#include "utils/regproc.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+
+
+/* non-export function prototypes */
+static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
+static void CheckPredicate(Expr *predicate);
+static void ComputeIndexAttrs(IndexInfo *indexInfo,
+ Oid *typeOidP,
+ Oid *collationOidP,
+ Oid *classOidP,
+ int16 *colOptionP,
+ List *attList,
+ List *exclusionOpNames,
+ Oid relId,
+ const char *accessMethodName, Oid accessMethodId,
+ bool amcanorder,
+ bool isconstraint,
+ Oid ddl_userid,
+ int ddl_sec_context,
+ int *ddl_save_nestlevel);
+static char *ChooseIndexName(const char *tabname, Oid namespaceId,
+ List *colnames, List *exclusionOpNames,
+ bool primary, bool isconstraint);
+static char *ChooseIndexNameAddition(List *colnames);
+static List *ChooseIndexColumnNames(List *indexElems);
+static void ReindexIndex(RangeVar *indexRelation, ReindexParams *params,
+ bool isTopLevel);
+static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
+ Oid relId, Oid oldRelId, void *arg);
+static Oid ReindexTable(RangeVar *relation, ReindexParams *params,
+ bool isTopLevel);
+static void ReindexMultipleTables(const char *objectName,
+ ReindexObjectType objectKind, ReindexParams *params);
+static void reindex_error_callback(void *args);
+static void ReindexPartitions(Oid relid, ReindexParams *params,
+ bool isTopLevel);
+static void ReindexMultipleInternal(List *relids,
+ ReindexParams *params);
+static bool ReindexRelationConcurrently(Oid relationOid,
+ ReindexParams *params);
+static void update_relispartition(Oid relationId, bool newval);
+static inline void set_indexsafe_procflags(void);
+
+/*
+ * callback argument type for RangeVarCallbackForReindexIndex()
+ */
+struct ReindexIndexCallbackState
+{
+ ReindexParams params; /* options from statement */
+ Oid locked_table_oid; /* tracks previously locked table */
+};
+
+/*
+ * callback arguments for reindex_error_callback()
+ */
+typedef struct ReindexErrorInfo
+{
+ char *relname;
+ char *relnamespace;
+ char relkind;
+} ReindexErrorInfo;
+
+/*
+ * CheckIndexCompatible
+ * Determine whether an existing index definition is compatible with a
+ * prospective index definition, such that the existing index storage
+ * could become the storage of the new index, avoiding a rebuild.
+ *
+ * 'oldId': the OID of the existing index
+ * 'accessMethodName': name of the AM to use.
+ * 'attributeList': a list of IndexElem specifying columns and expressions
+ * to index on.
+ * 'exclusionOpNames': list of names of exclusion-constraint operators,
+ * or NIL if not an exclusion constraint.
+ *
+ * This is tailored to the needs of ALTER TABLE ALTER TYPE, which recreates
+ * any indexes that depended on a changing column from their pg_get_indexdef
+ * or pg_get_constraintdef definitions. We omit some of the sanity checks of
+ * DefineIndex. We assume that the old and new indexes have the same number
+ * of columns and that if one has an expression column or predicate, both do.
+ * Errors arising from the attribute list still apply.
+ *
+ * Most column type changes that can skip a table rewrite do not invalidate
+ * indexes. We acknowledge this when all operator classes, collations and
+ * exclusion operators match. Though we could further permit intra-opfamily
+ * changes for btree and hash indexes, that adds subtle complexity with no
+ * concrete benefit for core types. Note, that INCLUDE columns aren't
+ * checked by this function, for them it's enough that table rewrite is
+ * skipped.
+ *
+ * When a comparison or exclusion operator has a polymorphic input type, the
+ * actual input types must also match. This defends against the possibility
+ * that operators could vary behavior in response to get_fn_expr_argtype().
+ * At present, this hazard is theoretical: check_exclusion_constraint() and
+ * all core index access methods decline to set fn_expr for such calls.
+ *
+ * We do not yet implement a test to verify compatibility of expression
+ * columns or predicates, so assume any such index is incompatible.
+ */
+bool
+CheckIndexCompatible(Oid oldId,
+ const char *accessMethodName,
+ List *attributeList,
+ List *exclusionOpNames)
+{
+ bool isconstraint;
+ Oid *typeObjectId;
+ Oid *collationObjectId;
+ Oid *classObjectId;
+ Oid accessMethodId;
+ Oid relationId;
+ HeapTuple tuple;
+ Form_pg_index indexForm;
+ Form_pg_am accessMethodForm;
+ IndexAmRoutine *amRoutine;
+ bool amcanorder;
+ int16 *coloptions;
+ IndexInfo *indexInfo;
+ int numberOfAttributes;
+ int old_natts;
+ bool isnull;
+ bool ret = true;
+ oidvector *old_indclass;
+ oidvector *old_indcollation;
+ Relation irel;
+ int i;
+ Datum d;
+
+ /* Caller should already have the relation locked in some way. */
+ relationId = IndexGetRelation(oldId, false);
+
+ /*
+ * We can pretend isconstraint = false unconditionally. It only serves to
+ * decide the text of an error message that should never happen for us.
+ */
+ isconstraint = false;
+
+ numberOfAttributes = list_length(attributeList);
+ Assert(numberOfAttributes > 0);
+ Assert(numberOfAttributes <= INDEX_MAX_KEYS);
+
+ /* look up the access method */
+ tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("access method \"%s\" does not exist",
+ accessMethodName)));
+ accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
+ accessMethodId = accessMethodForm->oid;
+ amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
+ ReleaseSysCache(tuple);
+
+ amcanorder = amRoutine->amcanorder;
+
+ /*
+ * Compute the operator classes, collations, and exclusion operators for
+ * the new index, so we can test whether it's compatible with the existing
+ * one. Note that ComputeIndexAttrs might fail here, but that's OK:
+ * DefineIndex would have failed later. Our attributeList contains only
+ * key attributes, thus we're filling ii_NumIndexAttrs and
+ * ii_NumIndexKeyAttrs with same value.
+ */
+ indexInfo = makeIndexInfo(numberOfAttributes, numberOfAttributes,
+ accessMethodId, NIL, NIL, false, false, false, false);
+ typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
+ collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
+ classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
+ coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16));
+ ComputeIndexAttrs(indexInfo,
+ typeObjectId, collationObjectId, classObjectId,
+ coloptions, attributeList,
+ exclusionOpNames, relationId,
+ accessMethodName, accessMethodId,
+ amcanorder, isconstraint, InvalidOid, 0, NULL);
+
+
+ /* Get the soon-obsolete pg_index tuple. */
+ tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldId));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for index %u", oldId);
+ indexForm = (Form_pg_index) GETSTRUCT(tuple);
+
+ /*
+ * We don't assess expressions or predicates; assume incompatibility.
+ * Also, if the index is invalid for any reason, treat it as incompatible.
+ */
+ if (!(heap_attisnull(tuple, Anum_pg_index_indpred, NULL) &&
+ heap_attisnull(tuple, Anum_pg_index_indexprs, NULL) &&
+ indexForm->indisvalid))
+ {
+ ReleaseSysCache(tuple);
+ return false;
+ }
+
+ /* Any change in operator class or collation breaks compatibility. */
+ old_natts = indexForm->indnkeyatts;
+ Assert(old_natts == numberOfAttributes);
+
+ d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indcollation, &isnull);
+ Assert(!isnull);
+ old_indcollation = (oidvector *) DatumGetPointer(d);
+
+ d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indclass, &isnull);
+ Assert(!isnull);
+ old_indclass = (oidvector *) DatumGetPointer(d);
+
+ ret = (memcmp(old_indclass->values, classObjectId,
+ old_natts * sizeof(Oid)) == 0 &&
+ memcmp(old_indcollation->values, collationObjectId,
+ old_natts * sizeof(Oid)) == 0);
+
+ ReleaseSysCache(tuple);
+
+ if (!ret)
+ return false;
+
+ /* For polymorphic opcintype, column type changes break compatibility. */
+ irel = index_open(oldId, AccessShareLock); /* caller probably has a lock */
+ for (i = 0; i < old_natts; i++)
+ {
+ if (IsPolymorphicType(get_opclass_input_type(classObjectId[i])) &&
+ TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i])
+ {
+ ret = false;
+ break;
+ }
+ }
+
+ /* Any change in opclass options break compatibility. */
+ if (ret)
+ {
+ Datum *opclassOptions = RelationGetIndexRawAttOptions(irel);
+
+ ret = CompareOpclassOptions(opclassOptions,
+ indexInfo->ii_OpclassOptions, old_natts);
+
+ if (opclassOptions)
+ pfree(opclassOptions);
+ }
+
+ /* Any change in exclusion operator selections breaks compatibility. */
+ if (ret && indexInfo->ii_ExclusionOps != NULL)
+ {
+ Oid *old_operators,
+ *old_procs;
+ uint16 *old_strats;
+
+ RelationGetExclusionInfo(irel, &old_operators, &old_procs, &old_strats);
+ ret = memcmp(old_operators, indexInfo->ii_ExclusionOps,
+ old_natts * sizeof(Oid)) == 0;
+
+ /* Require an exact input type match for polymorphic operators. */
+ if (ret)
+ {
+ for (i = 0; i < old_natts && ret; i++)
+ {
+ Oid left,
+ right;
+
+ op_input_types(indexInfo->ii_ExclusionOps[i], &left, &right);
+ if ((IsPolymorphicType(left) || IsPolymorphicType(right)) &&
+ TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i])
+ {
+ ret = false;
+ break;
+ }
+ }
+ }
+ }
+
+ index_close(irel, NoLock);
+ return ret;
+}
+
+/*
+ * CompareOpclassOptions
+ *
+ * Compare per-column opclass options which are represented by arrays of text[]
+ * datums. Both elements of arrays and array themselves can be NULL.
+ */
+static bool
+CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts)
+{
+ int i;
+
+ if (!opts1 && !opts2)
+ return true;
+
+ for (i = 0; i < natts; i++)
+ {
+ Datum opt1 = opts1 ? opts1[i] : (Datum) 0;
+ Datum opt2 = opts2 ? opts2[i] : (Datum) 0;
+
+ if (opt1 == (Datum) 0)
+ {
+ if (opt2 == (Datum) 0)
+ continue;
+ else
+ return false;
+ }
+ else if (opt2 == (Datum) 0)
+ return false;
+
+ /* Compare non-NULL text[] datums. */
+ if (!DatumGetBool(DirectFunctionCall2(array_eq, opt1, opt2)))
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * WaitForOlderSnapshots
+ *
+ * Wait for transactions that might have an older snapshot than the given xmin
+ * limit, because it might not contain tuples deleted just before it has
+ * been taken. Obtain a list of VXIDs of such transactions, and wait for them
+ * individually. This is used when building an index concurrently.
+ *
+ * We can exclude any running transactions that have xmin > the xmin given;
+ * their oldest snapshot must be newer than our xmin limit.
+ * We can also exclude any transactions that have xmin = zero, since they
+ * evidently have no live snapshot at all (and any one they might be in
+ * process of taking is certainly newer than ours). Transactions in other
+ * DBs can be ignored too, since they'll never even be able to see the
+ * index being worked on.
+ *
+ * We can also exclude autovacuum processes and processes running manual
+ * lazy VACUUMs, because they won't be fazed by missing index entries
+ * either. (Manual ANALYZEs, however, can't be excluded because they
+ * might be within transactions that are going to do arbitrary operations
+ * later.) Processes running CREATE INDEX CONCURRENTLY or REINDEX CONCURRENTLY
+ * on indexes that are neither expressional nor partial are also safe to
+ * ignore, since we know that those processes won't examine any data
+ * outside the table they're indexing.
+ *
+ * Also, GetCurrentVirtualXIDs never reports our own vxid, so we need not
+ * check for that.
+ *
+ * If a process goes idle-in-transaction with xmin zero, we do not need to
+ * wait for it anymore, per the above argument. We do not have the
+ * infrastructure right now to stop waiting if that happens, but we can at
+ * least avoid the folly of waiting when it is idle at the time we would
+ * begin to wait. We do this by repeatedly rechecking the output of
+ * GetCurrentVirtualXIDs. If, during any iteration, a particular vxid
+ * doesn't show up in the output, we know we can forget about it.
+ */
+void
+WaitForOlderSnapshots(TransactionId limitXmin, bool progress)
+{
+ int n_old_snapshots;
+ int i;
+ VirtualTransactionId *old_snapshots;
+
+ old_snapshots = GetCurrentVirtualXIDs(limitXmin, true, false,
+ PROC_IS_AUTOVACUUM | PROC_IN_VACUUM
+ | PROC_IN_SAFE_IC,
+ &n_old_snapshots);
+ if (progress)
+ pgstat_progress_update_param(PROGRESS_WAITFOR_TOTAL, n_old_snapshots);
+
+ for (i = 0; i < n_old_snapshots; i++)
+ {
+ if (!VirtualTransactionIdIsValid(old_snapshots[i]))
+ continue; /* found uninteresting in previous cycle */
+
+ if (i > 0)
+ {
+ /* see if anything's changed ... */
+ VirtualTransactionId *newer_snapshots;
+ int n_newer_snapshots;
+ int j;
+ int k;
+
+ newer_snapshots = GetCurrentVirtualXIDs(limitXmin,
+ true, false,
+ PROC_IS_AUTOVACUUM | PROC_IN_VACUUM
+ | PROC_IN_SAFE_IC,
+ &n_newer_snapshots);
+ for (j = i; j < n_old_snapshots; j++)
+ {
+ if (!VirtualTransactionIdIsValid(old_snapshots[j]))
+ continue; /* found uninteresting in previous cycle */
+ for (k = 0; k < n_newer_snapshots; k++)
+ {
+ if (VirtualTransactionIdEquals(old_snapshots[j],
+ newer_snapshots[k]))
+ break;
+ }
+ if (k >= n_newer_snapshots) /* not there anymore */
+ SetInvalidVirtualTransactionId(old_snapshots[j]);
+ }
+ pfree(newer_snapshots);
+ }
+
+ if (VirtualTransactionIdIsValid(old_snapshots[i]))
+ {
+ /* If requested, publish who we're going to wait for. */
+ if (progress)
+ {
+ PGPROC *holder = BackendIdGetProc(old_snapshots[i].backendId);
+
+ if (holder)
+ pgstat_progress_update_param(PROGRESS_WAITFOR_CURRENT_PID,
+ holder->pid);
+ }
+ VirtualXactLock(old_snapshots[i], true);
+ }
+
+ if (progress)
+ pgstat_progress_update_param(PROGRESS_WAITFOR_DONE, i + 1);
+ }
+}
+
+
+/*
+ * DefineIndex
+ * Creates a new index.
+ *
+ * This function manages the current userid according to the needs of pg_dump.
+ * Recreating old-database catalog entries in new-database is fine, regardless
+ * of which users would have permission to recreate those entries now. That's
+ * just preservation of state. Running opaque expressions, like calling a
+ * function named in a catalog entry or evaluating a pg_node_tree in a catalog
+ * entry, as anyone other than the object owner, is not fine. To adhere to
+ * those principles and to remain fail-safe, use the table owner userid for
+ * most ACL checks. Use the original userid for ACL checks reached without
+ * traversing opaque expressions. (pg_dump can predict such ACL checks from
+ * catalogs.) Overall, this is a mess. Future DDL development should
+ * consider offering one DDL command for catalog setup and a separate DDL
+ * command for steps that run opaque expressions.
+ *
+ * 'relationId': the OID of the heap relation on which the index is to be
+ * created
+ * 'stmt': IndexStmt describing the properties of the new index.
+ * 'indexRelationId': normally InvalidOid, but during bootstrap can be
+ * nonzero to specify a preselected OID for the index.
+ * 'parentIndexId': the OID of the parent index; InvalidOid if not the child
+ * of a partitioned index.
+ * 'parentConstraintId': the OID of the parent constraint; InvalidOid if not
+ * the child of a constraint (only used when recursing)
+ * 'is_alter_table': this is due to an ALTER rather than a CREATE operation.
+ * 'check_rights': check for CREATE rights in namespace and tablespace. (This
+ * should be true except when ALTER is deleting/recreating an index.)
+ * 'check_not_in_use': check for table not already in use in current session.
+ * This should be true unless caller is holding the table open, in which
+ * case the caller had better have checked it earlier.
+ * 'skip_build': make the catalog entries but don't create the index files
+ * 'quiet': suppress the NOTICE chatter ordinarily provided for constraints.
+ *
+ * Returns the object address of the created index.
+ */
+ObjectAddress
+DefineIndex(Oid relationId,
+ IndexStmt *stmt,
+ Oid indexRelationId,
+ Oid parentIndexId,
+ Oid parentConstraintId,
+ bool is_alter_table,
+ bool check_rights,
+ bool check_not_in_use,
+ bool skip_build,
+ bool quiet)
+{
+ bool concurrent;
+ char *indexRelationName;
+ char *accessMethodName;
+ Oid *typeObjectId;
+ Oid *collationObjectId;
+ Oid *classObjectId;
+ Oid accessMethodId;
+ Oid namespaceId;
+ Oid tablespaceId;
+ Oid createdConstraintId = InvalidOid;
+ List *indexColNames;
+ List *allIndexParams;
+ Relation rel;
+ HeapTuple tuple;
+ Form_pg_am accessMethodForm;
+ IndexAmRoutine *amRoutine;
+ bool amcanorder;
+ amoptions_function amoptions;
+ bool partitioned;
+ bool safe_index;
+ Datum reloptions;
+ int16 *coloptions;
+ IndexInfo *indexInfo;
+ bits16 flags;
+ bits16 constr_flags;
+ int numberOfAttributes;
+ int numberOfKeyAttributes;
+ TransactionId limitXmin;
+ ObjectAddress address;
+ LockRelId heaprelid;
+ LOCKTAG heaplocktag;
+ LOCKMODE lockmode;
+ Snapshot snapshot;
+ Oid root_save_userid;
+ int root_save_sec_context;
+ int root_save_nestlevel;
+ int i;
+
+ root_save_nestlevel = NewGUCNestLevel();
+
+ /*
+ * Some callers need us to run with an empty default_tablespace; this is a
+ * necessary hack to be able to reproduce catalog state accurately when
+ * recreating indexes after table-rewriting ALTER TABLE.
+ */
+ if (stmt->reset_default_tblspc)
+ (void) set_config_option("default_tablespace", "",
+ PGC_USERSET, PGC_S_SESSION,
+ GUC_ACTION_SAVE, true, 0, false);
+
+ /*
+ * Force non-concurrent build on temporary relations, even if CONCURRENTLY
+ * was requested. Other backends can't access a temporary relation, so
+ * there's no harm in grabbing a stronger lock, and a non-concurrent DROP
+ * is more efficient. Do this before any use of the concurrent option is
+ * done.
+ */
+ if (stmt->concurrent && get_rel_persistence(relationId) != RELPERSISTENCE_TEMP)
+ concurrent = true;
+ else
+ concurrent = false;
+
+ /*
+ * Start progress report. If we're building a partition, this was already
+ * done.
+ */
+ if (!OidIsValid(parentIndexId))
+ {
+ pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+ relationId);
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_COMMAND,
+ concurrent ?
+ PROGRESS_CREATEIDX_COMMAND_CREATE_CONCURRENTLY :
+ PROGRESS_CREATEIDX_COMMAND_CREATE);
+ }
+
+ /*
+ * No index OID to report yet
+ */
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_INDEX_OID,
+ InvalidOid);
+
+ /*
+ * count key attributes in index
+ */
+ numberOfKeyAttributes = list_length(stmt->indexParams);
+
+ /*
+ * Calculate the new list of index columns including both key columns and
+ * INCLUDE columns. Later we can determine which of these are key
+ * columns, and which are just part of the INCLUDE list by checking the
+ * list position. A list item in a position less than ii_NumIndexKeyAttrs
+ * is part of the key columns, and anything equal to and over is part of
+ * the INCLUDE columns.
+ */
+ allIndexParams = list_concat_copy(stmt->indexParams,
+ stmt->indexIncludingParams);
+ numberOfAttributes = list_length(allIndexParams);
+
+ if (numberOfKeyAttributes <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("must specify at least one column")));
+ if (numberOfAttributes > INDEX_MAX_KEYS)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_COLUMNS),
+ errmsg("cannot use more than %d columns in an index",
+ INDEX_MAX_KEYS)));
+
+ /*
+ * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
+ * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
+ * (but not VACUUM).
+ *
+ * NB: Caller is responsible for making sure that relationId refers to the
+ * relation on which the index should be built; except in bootstrap mode,
+ * this will typically require the caller to have already locked the
+ * relation. To avoid lock upgrade hazards, that lock should be at least
+ * as strong as the one we take here.
+ *
+ * NB: If the lock strength here ever changes, code that is run by
+ * parallel workers under the control of certain particular ambuild
+ * functions will need to be updated, too.
+ */
+ lockmode = concurrent ? ShareUpdateExclusiveLock : ShareLock;
+ rel = table_open(relationId, lockmode);
+
+ /*
+ * Switch to the table owner's userid, so that any index functions are run
+ * as that user. Also lock down security-restricted operations. We
+ * already arranged to make GUC variable changes local to this command.
+ */
+ GetUserIdAndSecContext(&root_save_userid, &root_save_sec_context);
+ SetUserIdAndSecContext(rel->rd_rel->relowner,
+ root_save_sec_context | SECURITY_RESTRICTED_OPERATION);
+
+ namespaceId = RelationGetNamespace(rel);
+
+ /* Ensure that it makes sense to index this kind of relation */
+ switch (rel->rd_rel->relkind)
+ {
+ case RELKIND_RELATION:
+ case RELKIND_MATVIEW:
+ case RELKIND_PARTITIONED_TABLE:
+ /* OK */
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot create index on relation \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_relkind_not_supported(rel->rd_rel->relkind)));
+ break;
+ }
+
+ /*
+ * Establish behavior for partitioned tables, and verify sanity of
+ * parameters.
+ *
+ * We do not build an actual index in this case; we only create a few
+ * catalog entries. The actual indexes are built by recursing for each
+ * partition.
+ */
+ partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
+ if (partitioned)
+ {
+ /*
+ * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
+ * the error is thrown also for temporary tables. Seems better to be
+ * consistent, even though we could do it on temporary table because
+ * we're not actually doing it concurrently.
+ */
+ if (stmt->concurrent)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot create index on partitioned table \"%s\" concurrently",
+ RelationGetRelationName(rel))));
+ if (stmt->excludeOpNames)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot create exclusion constraints on partitioned table \"%s\"",
+ RelationGetRelationName(rel))));
+ }
+
+ /*
+ * Don't try to CREATE INDEX on temp tables of other backends.
+ */
+ if (RELATION_IS_OTHER_TEMP(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot create indexes on temporary tables of other sessions")));
+
+ /*
+ * Unless our caller vouches for having checked this already, insist that
+ * the table not be in use by our own session, either. Otherwise we might
+ * fail to make entries in the new index (for instance, if an INSERT or
+ * UPDATE is in progress and has already made its list of target indexes).
+ */
+ if (check_not_in_use)
+ CheckTableNotInUse(rel, "CREATE INDEX");
+
+ /*
+ * Verify we (still) have CREATE rights in the rel's namespace.
+ * (Presumably we did when the rel was created, but maybe not anymore.)
+ * Skip check if caller doesn't want it. Also skip check if
+ * bootstrapping, since permissions machinery may not be working yet.
+ */
+ if (check_rights && !IsBootstrapProcessingMode())
+ {
+ AclResult aclresult;
+
+ aclresult = pg_namespace_aclcheck(namespaceId, root_save_userid,
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(namespaceId));
+ }
+
+ /*
+ * Select tablespace to use. If not specified, use default tablespace
+ * (which may in turn default to database's default).
+ */
+ if (stmt->tableSpace)
+ {
+ tablespaceId = get_tablespace_oid(stmt->tableSpace, false);
+ if (partitioned && tablespaceId == MyDatabaseTableSpace)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot specify default tablespace for partitioned relations")));
+ }
+ else
+ {
+ tablespaceId = GetDefaultTablespace(rel->rd_rel->relpersistence,
+ partitioned);
+ /* note InvalidOid is OK in this case */
+ }
+
+ /* Check tablespace permissions */
+ if (check_rights &&
+ OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
+ {
+ AclResult aclresult;
+
+ aclresult = pg_tablespace_aclcheck(tablespaceId, root_save_userid,
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_TABLESPACE,
+ get_tablespace_name(tablespaceId));
+ }
+
+ /*
+ * Force shared indexes into the pg_global tablespace. This is a bit of a
+ * hack but seems simpler than marking them in the BKI commands. On the
+ * other hand, if it's not shared, don't allow it to be placed there.
+ */
+ if (rel->rd_rel->relisshared)
+ tablespaceId = GLOBALTABLESPACE_OID;
+ else if (tablespaceId == GLOBALTABLESPACE_OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("only shared relations can be placed in pg_global tablespace")));
+
+ /*
+ * Choose the index column names.
+ */
+ indexColNames = ChooseIndexColumnNames(allIndexParams);
+
+ /*
+ * Select name for index if caller didn't specify
+ */
+ indexRelationName = stmt->idxname;
+ if (indexRelationName == NULL)
+ indexRelationName = ChooseIndexName(RelationGetRelationName(rel),
+ namespaceId,
+ indexColNames,
+ stmt->excludeOpNames,
+ stmt->primary,
+ stmt->isconstraint);
+
+ /*
+ * look up the access method, verify it can handle the requested features
+ */
+ accessMethodName = stmt->accessMethod;
+ tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
+ if (!HeapTupleIsValid(tuple))
+ {
+ /*
+ * Hack to provide more-or-less-transparent updating of old RTREE
+ * indexes to GiST: if RTREE is requested and not found, use GIST.
+ */
+ if (strcmp(accessMethodName, "rtree") == 0)
+ {
+ ereport(NOTICE,
+ (errmsg("substituting access method \"gist\" for obsolete method \"rtree\"")));
+ accessMethodName = "gist";
+ tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
+ }
+
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("access method \"%s\" does not exist",
+ accessMethodName)));
+ }
+ accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
+ accessMethodId = accessMethodForm->oid;
+ amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
+
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
+ accessMethodId);
+
+ if (stmt->unique && !amRoutine->amcanunique)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not support unique indexes",
+ accessMethodName)));
+ if (stmt->indexIncludingParams != NIL && !amRoutine->amcaninclude)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not support included columns",
+ accessMethodName)));
+ if (numberOfKeyAttributes > 1 && !amRoutine->amcanmulticol)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not support multicolumn indexes",
+ accessMethodName)));
+ if (stmt->excludeOpNames && amRoutine->amgettuple == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not support exclusion constraints",
+ accessMethodName)));
+
+ amcanorder = amRoutine->amcanorder;
+ amoptions = amRoutine->amoptions;
+
+ pfree(amRoutine);
+ ReleaseSysCache(tuple);
+
+ /*
+ * Validate predicate, if given
+ */
+ if (stmt->whereClause)
+ CheckPredicate((Expr *) stmt->whereClause);
+
+ /*
+ * Parse AM-specific options, convert to text array form, validate.
+ */
+ reloptions = transformRelOptions((Datum) 0, stmt->options,
+ NULL, NULL, false, false);
+
+ (void) index_reloptions(amoptions, reloptions, true);
+
+ /*
+ * Prepare arguments for index_create, primarily an IndexInfo structure.
+ * Note that predicates must be in implicit-AND format. In a concurrent
+ * build, mark it not-ready-for-inserts.
+ */
+ indexInfo = makeIndexInfo(numberOfAttributes,
+ numberOfKeyAttributes,
+ accessMethodId,
+ NIL, /* expressions, NIL for now */
+ make_ands_implicit((Expr *) stmt->whereClause),
+ stmt->unique,
+ stmt->nulls_not_distinct,
+ !concurrent,
+ concurrent);
+
+ typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
+ collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
+ classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
+ coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16));
+ ComputeIndexAttrs(indexInfo,
+ typeObjectId, collationObjectId, classObjectId,
+ coloptions, allIndexParams,
+ stmt->excludeOpNames, relationId,
+ accessMethodName, accessMethodId,
+ amcanorder, stmt->isconstraint, root_save_userid,
+ root_save_sec_context, &root_save_nestlevel);
+
+ /*
+ * Extra checks when creating a PRIMARY KEY index.
+ */
+ if (stmt->primary)
+ index_check_primary_key(rel, indexInfo, is_alter_table, stmt);
+
+ /*
+ * If this table is partitioned and we're creating a unique index or a
+ * primary key, make sure that the partition key is a subset of the
+ * index's columns. Otherwise it would be possible to violate uniqueness
+ * by putting values that ought to be unique in different partitions.
+ *
+ * We could lift this limitation if we had global indexes, but those have
+ * their own problems, so this is a useful feature combination.
+ */
+ if (partitioned && (stmt->unique || stmt->primary))
+ {
+ PartitionKey key = RelationGetPartitionKey(rel);
+ const char *constraint_type;
+ int i;
+
+ if (stmt->primary)
+ constraint_type = "PRIMARY KEY";
+ else if (stmt->unique)
+ constraint_type = "UNIQUE";
+ else if (stmt->excludeOpNames != NIL)
+ constraint_type = "EXCLUDE";
+ else
+ {
+ elog(ERROR, "unknown constraint type");
+ constraint_type = NULL; /* keep compiler quiet */
+ }
+
+ /*
+ * Verify that all the columns in the partition key appear in the
+ * unique key definition, with the same notion of equality.
+ */
+ for (i = 0; i < key->partnatts; i++)
+ {
+ bool found = false;
+ int eq_strategy;
+ Oid ptkey_eqop;
+ int j;
+
+ /*
+ * Identify the equality operator associated with this partkey
+ * column. For list and range partitioning, partkeys use btree
+ * operator classes; hash partitioning uses hash operator classes.
+ * (Keep this in sync with ComputePartitionAttrs!)
+ */
+ if (key->strategy == PARTITION_STRATEGY_HASH)
+ eq_strategy = HTEqualStrategyNumber;
+ else
+ eq_strategy = BTEqualStrategyNumber;
+
+ ptkey_eqop = get_opfamily_member(key->partopfamily[i],
+ key->partopcintype[i],
+ key->partopcintype[i],
+ eq_strategy);
+ if (!OidIsValid(ptkey_eqop))
+ elog(ERROR, "missing operator %d(%u,%u) in partition opfamily %u",
+ eq_strategy, key->partopcintype[i], key->partopcintype[i],
+ key->partopfamily[i]);
+
+ /*
+ * We'll need to be able to identify the equality operators
+ * associated with index columns, too. We know what to do with
+ * btree opclasses; if there are ever any other index types that
+ * support unique indexes, this logic will need extension.
+ */
+ if (accessMethodId == BTREE_AM_OID)
+ eq_strategy = BTEqualStrategyNumber;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot match partition key to an index using access method \"%s\"",
+ accessMethodName)));
+
+ /*
+ * It may be possible to support UNIQUE constraints when partition
+ * keys are expressions, but is it worth it? Give up for now.
+ */
+ if (key->partattrs[i] == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unsupported %s constraint with partition key definition",
+ constraint_type),
+ errdetail("%s constraints cannot be used when partition keys include expressions.",
+ constraint_type)));
+
+ /* Search the index column(s) for a match */
+ for (j = 0; j < indexInfo->ii_NumIndexKeyAttrs; j++)
+ {
+ if (key->partattrs[i] == indexInfo->ii_IndexAttrNumbers[j])
+ {
+ /* Matched the column, now what about the equality op? */
+ Oid idx_opfamily;
+ Oid idx_opcintype;
+
+ if (get_opclass_opfamily_and_input_type(classObjectId[j],
+ &idx_opfamily,
+ &idx_opcintype))
+ {
+ Oid idx_eqop;
+
+ idx_eqop = get_opfamily_member(idx_opfamily,
+ idx_opcintype,
+ idx_opcintype,
+ eq_strategy);
+ if (ptkey_eqop == idx_eqop)
+ {
+ found = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!found)
+ {
+ Form_pg_attribute att;
+
+ att = TupleDescAttr(RelationGetDescr(rel),
+ key->partattrs[i] - 1);
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unique constraint on partitioned table must include all partitioning columns"),
+ errdetail("%s constraint on table \"%s\" lacks column \"%s\" which is part of the partition key.",
+ constraint_type, RelationGetRelationName(rel),
+ NameStr(att->attname))));
+ }
+ }
+ }
+
+
+ /*
+ * We disallow indexes on system columns. They would not necessarily get
+ * updated correctly, and they don't seem useful anyway.
+ */
+ for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
+ {
+ AttrNumber attno = indexInfo->ii_IndexAttrNumbers[i];
+
+ if (attno < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("index creation on system columns is not supported")));
+ }
+
+ /*
+ * Also check for system columns used in expressions or predicates.
+ */
+ if (indexInfo->ii_Expressions || indexInfo->ii_Predicate)
+ {
+ Bitmapset *indexattrs = NULL;
+
+ pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
+ pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs);
+
+ for (i = FirstLowInvalidHeapAttributeNumber + 1; i < 0; i++)
+ {
+ if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber,
+ indexattrs))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("index creation on system columns is not supported")));
+ }
+ }
+
+ /* Is index safe for others to ignore? See set_indexsafe_procflags() */
+ safe_index = indexInfo->ii_Expressions == NIL &&
+ indexInfo->ii_Predicate == NIL;
+
+ /*
+ * Report index creation if appropriate (delay this till after most of the
+ * error checks)
+ */
+ if (stmt->isconstraint && !quiet)
+ {
+ const char *constraint_type;
+
+ if (stmt->primary)
+ constraint_type = "PRIMARY KEY";
+ else if (stmt->unique)
+ constraint_type = "UNIQUE";
+ else if (stmt->excludeOpNames != NIL)
+ constraint_type = "EXCLUDE";
+ else
+ {
+ elog(ERROR, "unknown constraint type");
+ constraint_type = NULL; /* keep compiler quiet */
+ }
+
+ ereport(DEBUG1,
+ (errmsg_internal("%s %s will create implicit index \"%s\" for table \"%s\"",
+ is_alter_table ? "ALTER TABLE / ADD" : "CREATE TABLE /",
+ constraint_type,
+ indexRelationName, RelationGetRelationName(rel))));
+ }
+
+ /*
+ * A valid stmt->oldNode implies that we already have a built form of the
+ * index. The caller should also decline any index build.
+ */
+ Assert(!OidIsValid(stmt->oldNode) || (skip_build && !concurrent));
+
+ /*
+ * Make the catalog entries for the index, including constraints. This
+ * step also actually builds the index, except if caller requested not to
+ * or in concurrent mode, in which case it'll be done later, or doing a
+ * partitioned index (because those don't have storage).
+ */
+ flags = constr_flags = 0;
+ if (stmt->isconstraint)
+ flags |= INDEX_CREATE_ADD_CONSTRAINT;
+ if (skip_build || concurrent || partitioned)
+ flags |= INDEX_CREATE_SKIP_BUILD;
+ if (stmt->if_not_exists)
+ flags |= INDEX_CREATE_IF_NOT_EXISTS;
+ if (concurrent)
+ flags |= INDEX_CREATE_CONCURRENT;
+ if (partitioned)
+ flags |= INDEX_CREATE_PARTITIONED;
+ if (stmt->primary)
+ flags |= INDEX_CREATE_IS_PRIMARY;
+
+ /*
+ * If the table is partitioned, and recursion was declined but partitions
+ * exist, mark the index as invalid.
+ */
+ if (partitioned && stmt->relation && !stmt->relation->inh)
+ {
+ PartitionDesc pd = RelationGetPartitionDesc(rel, true);
+
+ if (pd->nparts != 0)
+ flags |= INDEX_CREATE_INVALID;
+ }
+
+ if (stmt->deferrable)
+ constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
+ if (stmt->initdeferred)
+ constr_flags |= INDEX_CONSTR_CREATE_INIT_DEFERRED;
+
+ indexRelationId =
+ index_create(rel, indexRelationName, indexRelationId, parentIndexId,
+ parentConstraintId,
+ stmt->oldNode, indexInfo, indexColNames,
+ accessMethodId, tablespaceId,
+ collationObjectId, classObjectId,
+ coloptions, reloptions,
+ flags, constr_flags,
+ allowSystemTableMods, !check_rights,
+ &createdConstraintId);
+
+ ObjectAddressSet(address, RelationRelationId, indexRelationId);
+
+ if (!OidIsValid(indexRelationId))
+ {
+ /*
+ * Roll back any GUC changes executed by index functions. Also revert
+ * to original default_tablespace if we changed it above.
+ */
+ AtEOXact_GUC(false, root_save_nestlevel);
+
+ /* Restore userid and security context */
+ SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+
+ table_close(rel, NoLock);
+
+ /* If this is the top-level index, we're done */
+ if (!OidIsValid(parentIndexId))
+ pgstat_progress_end_command();
+
+ return address;
+ }
+
+ /*
+ * Roll back any GUC changes executed by index functions, and keep
+ * subsequent changes local to this command. This is essential if some
+ * index function changed a behavior-affecting GUC, e.g. search_path.
+ */
+ AtEOXact_GUC(false, root_save_nestlevel);
+ root_save_nestlevel = NewGUCNestLevel();
+
+ /* Add any requested comment */
+ if (stmt->idxcomment != NULL)
+ CreateComments(indexRelationId, RelationRelationId, 0,
+ stmt->idxcomment);
+
+ if (partitioned)
+ {
+ PartitionDesc partdesc;
+
+ /*
+ * Unless caller specified to skip this step (via ONLY), process each
+ * partition to make sure they all contain a corresponding index.
+ *
+ * If we're called internally (no stmt->relation), recurse always.
+ */
+ partdesc = RelationGetPartitionDesc(rel, true);
+ if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
+ {
+ int nparts = partdesc->nparts;
+ Oid *part_oids = palloc(sizeof(Oid) * nparts);
+ bool invalidate_parent = false;
+ Relation parentIndex;
+ TupleDesc parentDesc;
+
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
+ nparts);
+
+ /* Make a local copy of partdesc->oids[], just for safety */
+ memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
+
+ /*
+ * We'll need an IndexInfo describing the parent index. The one
+ * built above is almost good enough, but not quite, because (for
+ * example) its predicate expression if any hasn't been through
+ * expression preprocessing. The most reliable way to get an
+ * IndexInfo that will match those for child indexes is to build
+ * it the same way, using BuildIndexInfo().
+ */
+ parentIndex = index_open(indexRelationId, lockmode);
+ indexInfo = BuildIndexInfo(parentIndex);
+
+ parentDesc = RelationGetDescr(rel);
+
+ /*
+ * For each partition, scan all existing indexes; if one matches
+ * our index definition and is not already attached to some other
+ * parent index, attach it to the one we just created.
+ *
+ * If none matches, build a new index by calling ourselves
+ * recursively with the same options (except for the index name).
+ */
+ for (i = 0; i < nparts; i++)
+ {
+ Oid childRelid = part_oids[i];
+ Relation childrel;
+ Oid child_save_userid;
+ int child_save_sec_context;
+ int child_save_nestlevel;
+ List *childidxs;
+ ListCell *cell;
+ AttrMap *attmap;
+ bool found = false;
+
+ childrel = table_open(childRelid, lockmode);
+
+ GetUserIdAndSecContext(&child_save_userid,
+ &child_save_sec_context);
+ SetUserIdAndSecContext(childrel->rd_rel->relowner,
+ child_save_sec_context | SECURITY_RESTRICTED_OPERATION);
+ child_save_nestlevel = NewGUCNestLevel();
+
+ /*
+ * Don't try to create indexes on foreign tables, though. Skip
+ * those if a regular index, or fail if trying to create a
+ * constraint index.
+ */
+ if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ {
+ if (stmt->unique || stmt->primary)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot create unique index on partitioned table \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail("Table \"%s\" contains partitions that are foreign tables.",
+ RelationGetRelationName(rel))));
+
+ AtEOXact_GUC(false, child_save_nestlevel);
+ SetUserIdAndSecContext(child_save_userid,
+ child_save_sec_context);
+ table_close(childrel, lockmode);
+ continue;
+ }
+
+ childidxs = RelationGetIndexList(childrel);
+ attmap =
+ build_attrmap_by_name(RelationGetDescr(childrel),
+ parentDesc);
+
+ foreach(cell, childidxs)
+ {
+ Oid cldidxid = lfirst_oid(cell);
+ Relation cldidx;
+ IndexInfo *cldIdxInfo;
+
+ /* this index is already partition of another one */
+ if (has_superclass(cldidxid))
+ continue;
+
+ cldidx = index_open(cldidxid, lockmode);
+ cldIdxInfo = BuildIndexInfo(cldidx);
+ if (CompareIndexInfo(cldIdxInfo, indexInfo,
+ cldidx->rd_indcollation,
+ parentIndex->rd_indcollation,
+ cldidx->rd_opfamily,
+ parentIndex->rd_opfamily,
+ attmap))
+ {
+ Oid cldConstrOid = InvalidOid;
+
+ /*
+ * Found a match.
+ *
+ * If this index is being created in the parent
+ * because of a constraint, then the child needs to
+ * have a constraint also, so look for one. If there
+ * is no such constraint, this index is no good, so
+ * keep looking.
+ */
+ if (createdConstraintId != InvalidOid)
+ {
+ cldConstrOid =
+ get_relation_idx_constraint_oid(childRelid,
+ cldidxid);
+ if (cldConstrOid == InvalidOid)
+ {
+ index_close(cldidx, lockmode);
+ continue;
+ }
+ }
+
+ /* Attach index to parent and we're done. */
+ IndexSetParentIndex(cldidx, indexRelationId);
+ if (createdConstraintId != InvalidOid)
+ ConstraintSetParentConstraint(cldConstrOid,
+ createdConstraintId,
+ childRelid);
+
+ if (!cldidx->rd_index->indisvalid)
+ invalidate_parent = true;
+
+ found = true;
+ /* keep lock till commit */
+ index_close(cldidx, NoLock);
+ break;
+ }
+
+ index_close(cldidx, lockmode);
+ }
+
+ list_free(childidxs);
+ AtEOXact_GUC(false, child_save_nestlevel);
+ SetUserIdAndSecContext(child_save_userid,
+ child_save_sec_context);
+ table_close(childrel, NoLock);
+
+ /*
+ * If no matching index was found, create our own.
+ */
+ if (!found)
+ {
+ IndexStmt *childStmt = copyObject(stmt);
+ bool found_whole_row;
+ ListCell *lc;
+ ObjectAddress childAddr;
+
+ /*
+ * We can't use the same index name for the child index,
+ * so clear idxname to let the recursive invocation choose
+ * a new name. Likewise, the existing target relation
+ * field is wrong, and if indexOid or oldNode are set,
+ * they mustn't be applied to the child either.
+ */
+ childStmt->idxname = NULL;
+ childStmt->relation = NULL;
+ childStmt->indexOid = InvalidOid;
+ childStmt->oldNode = InvalidOid;
+ childStmt->oldCreateSubid = InvalidSubTransactionId;
+ childStmt->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
+
+ /*
+ * Adjust any Vars (both in expressions and in the index's
+ * WHERE clause) to match the partition's column numbering
+ * in case it's different from the parent's.
+ */
+ foreach(lc, childStmt->indexParams)
+ {
+ IndexElem *ielem = lfirst(lc);
+
+ /*
+ * If the index parameter is an expression, we must
+ * translate it to contain child Vars.
+ */
+ if (ielem->expr)
+ {
+ ielem->expr =
+ map_variable_attnos((Node *) ielem->expr,
+ 1, 0, attmap,
+ InvalidOid,
+ &found_whole_row);
+ if (found_whole_row)
+ elog(ERROR, "cannot convert whole-row table reference");
+ }
+ }
+ childStmt->whereClause =
+ map_variable_attnos(stmt->whereClause, 1, 0,
+ attmap,
+ InvalidOid, &found_whole_row);
+ if (found_whole_row)
+ elog(ERROR, "cannot convert whole-row table reference");
+
+ /*
+ * Recurse as the starting user ID. Callee will use that
+ * for permission checks, then switch again.
+ */
+ Assert(GetUserId() == child_save_userid);
+ SetUserIdAndSecContext(root_save_userid,
+ root_save_sec_context);
+ childAddr =
+ DefineIndex(childRelid, childStmt,
+ InvalidOid, /* no predefined OID */
+ indexRelationId, /* this is our child */
+ createdConstraintId,
+ is_alter_table, check_rights,
+ check_not_in_use,
+ skip_build, quiet);
+ SetUserIdAndSecContext(child_save_userid,
+ child_save_sec_context);
+
+ /*
+ * Check if the index just created is valid or not, as it
+ * could be possible that it has been switched as invalid
+ * when recursing across multiple partition levels.
+ */
+ if (!get_index_isvalid(childAddr.objectId))
+ invalidate_parent = true;
+ }
+
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+ i + 1);
+ free_attrmap(attmap);
+ }
+
+ index_close(parentIndex, lockmode);
+
+ /*
+ * The pg_index row we inserted for this index was marked
+ * indisvalid=true. But if we attached an existing index that is
+ * invalid, this is incorrect, so update our row to invalid too.
+ */
+ if (invalidate_parent)
+ {
+ Relation pg_index = table_open(IndexRelationId, RowExclusiveLock);
+ HeapTuple tup,
+ newtup;
+
+ tup = SearchSysCache1(INDEXRELID,
+ ObjectIdGetDatum(indexRelationId));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for index %u",
+ indexRelationId);
+ newtup = heap_copytuple(tup);
+ ((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
+ CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
+ ReleaseSysCache(tup);
+ table_close(pg_index, RowExclusiveLock);
+ heap_freetuple(newtup);
+
+ /*
+ * CCI here to make this update visible, in case this recurses
+ * across multiple partition levels.
+ */
+ CommandCounterIncrement();
+ }
+ }
+
+ /*
+ * Indexes on partitioned tables are not themselves built, so we're
+ * done here.
+ */
+ AtEOXact_GUC(false, root_save_nestlevel);
+ SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+ table_close(rel, NoLock);
+ if (!OidIsValid(parentIndexId))
+ pgstat_progress_end_command();
+ return address;
+ }
+
+ AtEOXact_GUC(false, root_save_nestlevel);
+ SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
+
+ if (!concurrent)
+ {
+ /* Close the heap and we're done, in the non-concurrent case */
+ table_close(rel, NoLock);
+
+ /* If this is the top-level index, we're done. */
+ if (!OidIsValid(parentIndexId))
+ pgstat_progress_end_command();
+
+ return address;
+ }
+
+ /* save lockrelid and locktag for below, then close rel */
+ heaprelid = rel->rd_lockInfo.lockRelId;
+ SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
+ table_close(rel, NoLock);
+
+ /*
+ * For a concurrent build, it's important to make the catalog entries
+ * visible to other transactions before we start to build the index. That
+ * will prevent them from making incompatible HOT updates. The new index
+ * will be marked not indisready and not indisvalid, so that no one else
+ * tries to either insert into it or use it for queries.
+ *
+ * We must commit our current transaction so that the index becomes
+ * visible; then start another. Note that all the data structures we just
+ * built are lost in the commit. The only data we keep past here are the
+ * relation IDs.
+ *
+ * Before committing, get a session-level lock on the table, to ensure
+ * that neither it nor the index can be dropped before we finish. This
+ * cannot block, even if someone else is waiting for access, because we
+ * already have the same lock within our transaction.
+ *
+ * Note: we don't currently bother with a session lock on the index,
+ * because there are no operations that could change its state while we
+ * hold lock on the parent table. This might need to change later.
+ */
+ LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
+
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+ StartTransactionCommand();
+
+ /* Tell concurrent index builds to ignore us, if index qualifies */
+ if (safe_index)
+ set_indexsafe_procflags();
+
+ /*
+ * The index is now visible, so we can report the OID. While on it,
+ * include the report for the beginning of phase 2.
+ */
+ {
+ const int progress_cols[] = {
+ PROGRESS_CREATEIDX_INDEX_OID,
+ PROGRESS_CREATEIDX_PHASE
+ };
+ const int64 progress_vals[] = {
+ indexRelationId,
+ PROGRESS_CREATEIDX_PHASE_WAIT_1
+ };
+
+ pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
+ }
+
+ /*
+ * Phase 2 of concurrent index build (see comments for validate_index()
+ * for an overview of how this works)
+ *
+ * Now we must wait until no running transaction could have the table open
+ * with the old list of indexes. Use ShareLock to consider running
+ * transactions that hold locks that permit writing to the table. Note we
+ * do not need to worry about xacts that open the table for writing after
+ * this point; they will see the new index when they open it.
+ *
+ * Note: the reason we use actual lock acquisition here, rather than just
+ * checking the ProcArray and sleeping, is that deadlock is possible if
+ * one of the transactions in question is blocked trying to acquire an
+ * exclusive lock on our table. The lock code will detect deadlock and
+ * error out properly.
+ */
+ WaitForLockers(heaplocktag, ShareLock, true);
+
+ /*
+ * At this moment we are sure that there are no transactions with the
+ * table open for write that don't have this new index in their list of
+ * indexes. We have waited out all the existing transactions and any new
+ * transaction will have the new index in its list, but the index is still
+ * marked as "not-ready-for-inserts". The index is consulted while
+ * deciding HOT-safety though. This arrangement ensures that no new HOT
+ * chains can be created where the new tuple and the old tuple in the
+ * chain have different index keys.
+ *
+ * We now take a new snapshot, and build the index using all tuples that
+ * are visible in this snapshot. We can be sure that any HOT updates to
+ * these tuples will be compatible with the index, since any updates made
+ * by transactions that didn't know about the index are now committed or
+ * rolled back. Thus, each visible tuple is either the end of its
+ * HOT-chain or the extension of the chain is HOT-safe for this index.
+ */
+
+ /* Set ActiveSnapshot since functions in the indexes may need it */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
+ /* Perform concurrent build of index */
+ index_concurrently_build(relationId, indexRelationId);
+
+ /* we can do away with our snapshot */
+ PopActiveSnapshot();
+
+ /*
+ * Commit this transaction to make the indisready update visible.
+ */
+ CommitTransactionCommand();
+ StartTransactionCommand();
+
+ /* Tell concurrent index builds to ignore us, if index qualifies */
+ if (safe_index)
+ set_indexsafe_procflags();
+
+ /*
+ * Phase 3 of concurrent index build
+ *
+ * We once again wait until no transaction can have the table open with
+ * the index marked as read-only for updates.
+ */
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
+ PROGRESS_CREATEIDX_PHASE_WAIT_2);
+ WaitForLockers(heaplocktag, ShareLock, true);
+
+ /*
+ * Now take the "reference snapshot" that will be used by validate_index()
+ * to filter candidate tuples. Beware! There might still be snapshots in
+ * use that treat some transaction as in-progress that our reference
+ * snapshot treats as committed. If such a recently-committed transaction
+ * deleted tuples in the table, we will not include them in the index; yet
+ * those transactions which see the deleting one as still-in-progress will
+ * expect such tuples to be there once we mark the index as valid.
+ *
+ * We solve this by waiting for all endangered transactions to exit before
+ * we mark the index as valid.
+ *
+ * We also set ActiveSnapshot to this snap, since functions in indexes may
+ * need a snapshot.
+ */
+ snapshot = RegisterSnapshot(GetTransactionSnapshot());
+ PushActiveSnapshot(snapshot);
+
+ /*
+ * Scan the index and the heap, insert any missing index entries.
+ */
+ validate_index(relationId, indexRelationId, snapshot);
+
+ /*
+ * Drop the reference snapshot. We must do this before waiting out other
+ * snapshot holders, else we will deadlock against other processes also
+ * doing CREATE INDEX CONCURRENTLY, which would see our snapshot as one
+ * they must wait for. But first, save the snapshot's xmin to use as
+ * limitXmin for GetCurrentVirtualXIDs().
+ */
+ limitXmin = snapshot->xmin;
+
+ PopActiveSnapshot();
+ UnregisterSnapshot(snapshot);
+
+ /*
+ * The snapshot subsystem could still contain registered snapshots that
+ * are holding back our process's advertised xmin; in particular, if
+ * default_transaction_isolation = serializable, there is a transaction
+ * snapshot that is still active. The CatalogSnapshot is likewise a
+ * hazard. To ensure no deadlocks, we must commit and start yet another
+ * transaction, and do our wait before any snapshot has been taken in it.
+ */
+ CommitTransactionCommand();
+ StartTransactionCommand();
+
+ /* Tell concurrent index builds to ignore us, if index qualifies */
+ if (safe_index)
+ set_indexsafe_procflags();
+
+ /* We should now definitely not be advertising any xmin. */
+ Assert(MyProc->xmin == InvalidTransactionId);
+
+ /*
+ * The index is now valid in the sense that it contains all currently
+ * interesting tuples. But since it might not contain tuples deleted just
+ * before the reference snap was taken, we have to wait out any
+ * transactions that might have older snapshots.
+ */
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
+ PROGRESS_CREATEIDX_PHASE_WAIT_3);
+ WaitForOlderSnapshots(limitXmin, true);
+
+ /*
+ * Index can now be marked valid -- update its pg_index entry
+ */
+ index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
+
+ /*
+ * The pg_index update will cause backends (including this one) to update
+ * relcache entries for the index itself, but we should also send a
+ * relcache inval on the parent table to force replanning of cached plans.
+ * Otherwise existing sessions might fail to use the new index where it
+ * would be useful. (Note that our earlier commits did not create reasons
+ * to replan; so relcache flush on the index itself was sufficient.)
+ */
+ CacheInvalidateRelcacheByRelid(heaprelid.relId);
+
+ /*
+ * Last thing to do is release the session-level lock on the parent table.
+ */
+ UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
+
+ pgstat_progress_end_command();
+
+ return address;
+}
+
+
+/*
+ * CheckMutability
+ * Test whether given expression is mutable
+ */
+static bool
+CheckMutability(Expr *expr)
+{
+ /*
+ * First run the expression through the planner. This has a couple of
+ * important consequences. First, function default arguments will get
+ * inserted, which may affect volatility (consider "default now()").
+ * Second, inline-able functions will get inlined, which may allow us to
+ * conclude that the function is really less volatile than it's marked. As
+ * an example, polymorphic functions must be marked with the most volatile
+ * behavior that they have for any input type, but once we inline the
+ * function we may be able to conclude that it's not so volatile for the
+ * particular input type we're dealing with.
+ *
+ * We assume here that expression_planner() won't scribble on its input.
+ */
+ expr = expression_planner(expr);
+
+ /* Now we can search for non-immutable functions */
+ return contain_mutable_functions((Node *) expr);
+}
+
+
+/*
+ * CheckPredicate
+ * Checks that the given partial-index predicate is valid.
+ *
+ * This used to also constrain the form of the predicate to forms that
+ * indxpath.c could do something with. However, that seems overly
+ * restrictive. One useful application of partial indexes is to apply
+ * a UNIQUE constraint across a subset of a table, and in that scenario
+ * any evaluable predicate will work. So accept any predicate here
+ * (except ones requiring a plan), and let indxpath.c fend for itself.
+ */
+static void
+CheckPredicate(Expr *predicate)
+{
+ /*
+ * transformExpr() should have already rejected subqueries, aggregates,
+ * and window functions, based on the EXPR_KIND_ for a predicate.
+ */
+
+ /*
+ * A predicate using mutable functions is probably wrong, for the same
+ * reasons that we don't allow an index expression to use one.
+ */
+ if (CheckMutability(predicate))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("functions in index predicate must be marked IMMUTABLE")));
+}
+
+/*
+ * Compute per-index-column information, including indexed column numbers
+ * or index expressions, opclasses and their options. Note, all output vectors
+ * should be allocated for all columns, including "including" ones.
+ *
+ * If the caller switched to the table owner, ddl_userid is the role for ACL
+ * checks reached without traversing opaque expressions. Otherwise, it's
+ * InvalidOid, and other ddl_* arguments are undefined.
+ */
+static void
+ComputeIndexAttrs(IndexInfo *indexInfo,
+ Oid *typeOidP,
+ Oid *collationOidP,
+ Oid *classOidP,
+ int16 *colOptionP,
+ List *attList, /* list of IndexElem's */
+ List *exclusionOpNames,
+ Oid relId,
+ const char *accessMethodName,
+ Oid accessMethodId,
+ bool amcanorder,
+ bool isconstraint,
+ Oid ddl_userid,
+ int ddl_sec_context,
+ int *ddl_save_nestlevel)
+{
+ ListCell *nextExclOp;
+ ListCell *lc;
+ int attn;
+ int nkeycols = indexInfo->ii_NumIndexKeyAttrs;
+ Oid save_userid;
+ int save_sec_context;
+
+ /* Allocate space for exclusion operator info, if needed */
+ if (exclusionOpNames)
+ {
+ Assert(list_length(exclusionOpNames) == nkeycols);
+ indexInfo->ii_ExclusionOps = (Oid *) palloc(sizeof(Oid) * nkeycols);
+ indexInfo->ii_ExclusionProcs = (Oid *) palloc(sizeof(Oid) * nkeycols);
+ indexInfo->ii_ExclusionStrats = (uint16 *) palloc(sizeof(uint16) * nkeycols);
+ nextExclOp = list_head(exclusionOpNames);
+ }
+ else
+ nextExclOp = NULL;
+
+ if (OidIsValid(ddl_userid))
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+
+ /*
+ * process attributeList
+ */
+ attn = 0;
+ foreach(lc, attList)
+ {
+ IndexElem *attribute = (IndexElem *) lfirst(lc);
+ Oid atttype;
+ Oid attcollation;
+
+ /*
+ * Process the column-or-expression to be indexed.
+ */
+ if (attribute->name != NULL)
+ {
+ /* Simple index attribute */
+ HeapTuple atttuple;
+ Form_pg_attribute attform;
+
+ Assert(attribute->expr == NULL);
+ atttuple = SearchSysCacheAttName(relId, attribute->name);
+ if (!HeapTupleIsValid(atttuple))
+ {
+ /* difference in error message spellings is historical */
+ if (isconstraint)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" named in key does not exist",
+ attribute->name)));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" does not exist",
+ attribute->name)));
+ }
+ attform = (Form_pg_attribute) GETSTRUCT(atttuple);
+ indexInfo->ii_IndexAttrNumbers[attn] = attform->attnum;
+ atttype = attform->atttypid;
+ attcollation = attform->attcollation;
+ ReleaseSysCache(atttuple);
+ }
+ else
+ {
+ /* Index expression */
+ Node *expr = attribute->expr;
+
+ Assert(expr != NULL);
+
+ if (attn >= nkeycols)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("expressions are not supported in included columns")));
+ atttype = exprType(expr);
+ attcollation = exprCollation(expr);
+
+ /*
+ * Strip any top-level COLLATE clause. This ensures that we treat
+ * "x COLLATE y" and "(x COLLATE y)" alike.
+ */
+ while (IsA(expr, CollateExpr))
+ expr = (Node *) ((CollateExpr *) expr)->arg;
+
+ if (IsA(expr, Var) &&
+ ((Var *) expr)->varattno != InvalidAttrNumber)
+ {
+ /*
+ * User wrote "(column)" or "(column COLLATE something)".
+ * Treat it like simple attribute anyway.
+ */
+ indexInfo->ii_IndexAttrNumbers[attn] = ((Var *) expr)->varattno;
+ }
+ else
+ {
+ indexInfo->ii_IndexAttrNumbers[attn] = 0; /* marks expression */
+ indexInfo->ii_Expressions = lappend(indexInfo->ii_Expressions,
+ expr);
+
+ /*
+ * transformExpr() should have already rejected subqueries,
+ * aggregates, and window functions, based on the EXPR_KIND_
+ * for an index expression.
+ */
+
+ /*
+ * An expression using mutable functions is probably wrong,
+ * since if you aren't going to get the same result for the
+ * same data every time, it's not clear what the index entries
+ * mean at all.
+ */
+ if (CheckMutability((Expr *) expr))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("functions in index expression must be marked IMMUTABLE")));
+ }
+ }
+
+ typeOidP[attn] = atttype;
+
+ /*
+ * Included columns have no collation, no opclass and no ordering
+ * options.
+ */
+ if (attn >= nkeycols)
+ {
+ if (attribute->collation)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("including column does not support a collation")));
+ if (attribute->opclass)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("including column does not support an operator class")));
+ if (attribute->ordering != SORTBY_DEFAULT)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("including column does not support ASC/DESC options")));
+ if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("including column does not support NULLS FIRST/LAST options")));
+
+ classOidP[attn] = InvalidOid;
+ colOptionP[attn] = 0;
+ collationOidP[attn] = InvalidOid;
+ attn++;
+
+ continue;
+ }
+
+ /*
+ * Apply collation override if any. Use of ddl_userid is necessary
+ * due to ACL checks therein, and it's safe because collations don't
+ * contain opaque expressions (or non-opaque expressions).
+ */
+ if (attribute->collation)
+ {
+ if (OidIsValid(ddl_userid))
+ {
+ AtEOXact_GUC(false, *ddl_save_nestlevel);
+ SetUserIdAndSecContext(ddl_userid, ddl_sec_context);
+ }
+ attcollation = get_collation_oid(attribute->collation, false);
+ if (OidIsValid(ddl_userid))
+ {
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+ *ddl_save_nestlevel = NewGUCNestLevel();
+ }
+ }
+
+ /*
+ * Check we have a collation iff it's a collatable type. The only
+ * expected failures here are (1) COLLATE applied to a noncollatable
+ * type, or (2) index expression had an unresolved collation. But we
+ * might as well code this to be a complete consistency check.
+ */
+ if (type_is_collatable(atttype))
+ {
+ if (!OidIsValid(attcollation))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for index expression"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+ else
+ {
+ if (OidIsValid(attcollation))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("collations are not supported by type %s",
+ format_type_be(atttype))));
+ }
+
+ collationOidP[attn] = attcollation;
+
+ /*
+ * Identify the opclass to use. Use of ddl_userid is necessary due to
+ * ACL checks therein. This is safe despite opclasses containing
+ * opaque expressions (specifically, functions), because only
+ * superusers can define opclasses.
+ */
+ if (OidIsValid(ddl_userid))
+ {
+ AtEOXact_GUC(false, *ddl_save_nestlevel);
+ SetUserIdAndSecContext(ddl_userid, ddl_sec_context);
+ }
+ classOidP[attn] = ResolveOpClass(attribute->opclass,
+ atttype,
+ accessMethodName,
+ accessMethodId);
+ if (OidIsValid(ddl_userid))
+ {
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+ *ddl_save_nestlevel = NewGUCNestLevel();
+ }
+
+ /*
+ * Identify the exclusion operator, if any.
+ */
+ if (nextExclOp)
+ {
+ List *opname = (List *) lfirst(nextExclOp);
+ Oid opid;
+ Oid opfamily;
+ int strat;
+
+ /*
+ * Find the operator --- it must accept the column datatype
+ * without runtime coercion (but binary compatibility is OK).
+ * Operators contain opaque expressions (specifically, functions).
+ * compatible_oper_opid() boils down to oper() and
+ * IsBinaryCoercible(). PostgreSQL would have security problems
+ * elsewhere if oper() started calling opaque expressions.
+ */
+ if (OidIsValid(ddl_userid))
+ {
+ AtEOXact_GUC(false, *ddl_save_nestlevel);
+ SetUserIdAndSecContext(ddl_userid, ddl_sec_context);
+ }
+ opid = compatible_oper_opid(opname, atttype, atttype, false);
+ if (OidIsValid(ddl_userid))
+ {
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+ *ddl_save_nestlevel = NewGUCNestLevel();
+ }
+
+ /*
+ * Only allow commutative operators to be used in exclusion
+ * constraints. If X conflicts with Y, but Y does not conflict
+ * with X, bad things will happen.
+ */
+ if (get_commutator(opid) != opid)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("operator %s is not commutative",
+ format_operator(opid)),
+ errdetail("Only commutative operators can be used in exclusion constraints.")));
+
+ /*
+ * Operator must be a member of the right opfamily, too
+ */
+ opfamily = get_opclass_family(classOidP[attn]);
+ strat = get_op_opfamily_strategy(opid, opfamily);
+ if (strat == 0)
+ {
+ HeapTuple opftuple;
+ Form_pg_opfamily opfform;
+
+ /*
+ * attribute->opclass might not explicitly name the opfamily,
+ * so fetch the name of the selected opfamily for use in the
+ * error message.
+ */
+ opftuple = SearchSysCache1(OPFAMILYOID,
+ ObjectIdGetDatum(opfamily));
+ if (!HeapTupleIsValid(opftuple))
+ elog(ERROR, "cache lookup failed for opfamily %u",
+ opfamily);
+ opfform = (Form_pg_opfamily) GETSTRUCT(opftuple);
+
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("operator %s is not a member of operator family \"%s\"",
+ format_operator(opid),
+ NameStr(opfform->opfname)),
+ errdetail("The exclusion operator must be related to the index operator class for the constraint.")));
+ }
+
+ indexInfo->ii_ExclusionOps[attn] = opid;
+ indexInfo->ii_ExclusionProcs[attn] = get_opcode(opid);
+ indexInfo->ii_ExclusionStrats[attn] = strat;
+ nextExclOp = lnext(exclusionOpNames, nextExclOp);
+ }
+
+ /*
+ * Set up the per-column options (indoption field). For now, this is
+ * zero for any un-ordered index, while ordered indexes have DESC and
+ * NULLS FIRST/LAST options.
+ */
+ colOptionP[attn] = 0;
+ if (amcanorder)
+ {
+ /* default ordering is ASC */
+ if (attribute->ordering == SORTBY_DESC)
+ colOptionP[attn] |= INDOPTION_DESC;
+ /* default null ordering is LAST for ASC, FIRST for DESC */
+ if (attribute->nulls_ordering == SORTBY_NULLS_DEFAULT)
+ {
+ if (attribute->ordering == SORTBY_DESC)
+ colOptionP[attn] |= INDOPTION_NULLS_FIRST;
+ }
+ else if (attribute->nulls_ordering == SORTBY_NULLS_FIRST)
+ colOptionP[attn] |= INDOPTION_NULLS_FIRST;
+ }
+ else
+ {
+ /* index AM does not support ordering */
+ if (attribute->ordering != SORTBY_DEFAULT)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not support ASC/DESC options",
+ accessMethodName)));
+ if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("access method \"%s\" does not support NULLS FIRST/LAST options",
+ accessMethodName)));
+ }
+
+ /* Set up the per-column opclass options (attoptions field). */
+ if (attribute->opclassopts)
+ {
+ Assert(attn < nkeycols);
+
+ if (!indexInfo->ii_OpclassOptions)
+ indexInfo->ii_OpclassOptions =
+ palloc0(sizeof(Datum) * indexInfo->ii_NumIndexAttrs);
+
+ indexInfo->ii_OpclassOptions[attn] =
+ transformRelOptions((Datum) 0, attribute->opclassopts,
+ NULL, NULL, false, false);
+ }
+
+ attn++;
+ }
+}
+
+/*
+ * Resolve possibly-defaulted operator class specification
+ *
+ * Note: This is used to resolve operator class specifications in index and
+ * partition key definitions.
+ */
+Oid
+ResolveOpClass(List *opclass, Oid attrType,
+ const char *accessMethodName, Oid accessMethodId)
+{
+ char *schemaname;
+ char *opcname;
+ HeapTuple tuple;
+ Form_pg_opclass opform;
+ Oid opClassId,
+ opInputType;
+
+ if (opclass == NIL)
+ {
+ /* no operator class specified, so find the default */
+ opClassId = GetDefaultOpClass(attrType, accessMethodId);
+ if (!OidIsValid(opClassId))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("data type %s has no default operator class for access method \"%s\"",
+ format_type_be(attrType), accessMethodName),
+ errhint("You must specify an operator class for the index or define a default operator class for the data type.")));
+ return opClassId;
+ }
+
+ /*
+ * Specific opclass name given, so look up the opclass.
+ */
+
+ /* deconstruct the name list */
+ DeconstructQualifiedName(opclass, &schemaname, &opcname);
+
+ if (schemaname)
+ {
+ /* Look in specific schema only */
+ Oid namespaceId;
+
+ namespaceId = LookupExplicitNamespace(schemaname, false);
+ tuple = SearchSysCache3(CLAAMNAMENSP,
+ ObjectIdGetDatum(accessMethodId),
+ PointerGetDatum(opcname),
+ ObjectIdGetDatum(namespaceId));
+ }
+ else
+ {
+ /* Unqualified opclass name, so search the search path */
+ opClassId = OpclassnameGetOpcid(accessMethodId, opcname);
+ if (!OidIsValid(opClassId))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("operator class \"%s\" does not exist for access method \"%s\"",
+ opcname, accessMethodName)));
+ tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(opClassId));
+ }
+
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("operator class \"%s\" does not exist for access method \"%s\"",
+ NameListToString(opclass), accessMethodName)));
+
+ /*
+ * Verify that the index operator class accepts this datatype. Note we
+ * will accept binary compatibility.
+ */
+ opform = (Form_pg_opclass) GETSTRUCT(tuple);
+ opClassId = opform->oid;
+ opInputType = opform->opcintype;
+
+ if (!IsBinaryCoercible(attrType, opInputType))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("operator class \"%s\" does not accept data type %s",
+ NameListToString(opclass), format_type_be(attrType))));
+
+ ReleaseSysCache(tuple);
+
+ return opClassId;
+}
+
+/*
+ * GetDefaultOpClass
+ *
+ * Given the OIDs of a datatype and an access method, find the default
+ * operator class, if any. Returns InvalidOid if there is none.
+ */
+Oid
+GetDefaultOpClass(Oid type_id, Oid am_id)
+{
+ Oid result = InvalidOid;
+ int nexact = 0;
+ int ncompatible = 0;
+ int ncompatiblepreferred = 0;
+ Relation rel;
+ ScanKeyData skey[1];
+ SysScanDesc scan;
+ HeapTuple tup;
+ TYPCATEGORY tcategory;
+
+ /* If it's a domain, look at the base type instead */
+ type_id = getBaseType(type_id);
+
+ tcategory = TypeCategory(type_id);
+
+ /*
+ * We scan through all the opclasses available for the access method,
+ * looking for one that is marked default and matches the target type
+ * (either exactly or binary-compatibly, but prefer an exact match).
+ *
+ * We could find more than one binary-compatible match. If just one is
+ * for a preferred type, use that one; otherwise we fail, forcing the user
+ * to specify which one he wants. (The preferred-type special case is a
+ * kluge for varchar: it's binary-compatible to both text and bpchar, so
+ * we need a tiebreaker.) If we find more than one exact match, then
+ * someone put bogus entries in pg_opclass.
+ */
+ rel = table_open(OperatorClassRelationId, AccessShareLock);
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_opclass_opcmethod,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(am_id));
+
+ scan = systable_beginscan(rel, OpclassAmNameNspIndexId, true,
+ NULL, 1, skey);
+
+ while (HeapTupleIsValid(tup = systable_getnext(scan)))
+ {
+ Form_pg_opclass opclass = (Form_pg_opclass) GETSTRUCT(tup);
+
+ /* ignore altogether if not a default opclass */
+ if (!opclass->opcdefault)
+ continue;
+ if (opclass->opcintype == type_id)
+ {
+ nexact++;
+ result = opclass->oid;
+ }
+ else if (nexact == 0 &&
+ IsBinaryCoercible(type_id, opclass->opcintype))
+ {
+ if (IsPreferredType(tcategory, opclass->opcintype))
+ {
+ ncompatiblepreferred++;
+ result = opclass->oid;
+ }
+ else if (ncompatiblepreferred == 0)
+ {
+ ncompatible++;
+ result = opclass->oid;
+ }
+ }
+ }
+
+ systable_endscan(scan);
+
+ table_close(rel, AccessShareLock);
+
+ /* raise error if pg_opclass contains inconsistent data */
+ if (nexact > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("there are multiple default operator classes for data type %s",
+ format_type_be(type_id))));
+
+ if (nexact == 1 ||
+ ncompatiblepreferred == 1 ||
+ (ncompatiblepreferred == 0 && ncompatible == 1))
+ return result;
+
+ return InvalidOid;
+}
+
+/*
+ * makeObjectName()
+ *
+ * Create a name for an implicitly created index, sequence, constraint,
+ * extended statistics, etc.
+ *
+ * The parameters are typically: the original table name, the original field
+ * name, and a "type" string (such as "seq" or "pkey"). The field name
+ * and/or type can be NULL if not relevant.
+ *
+ * The result is a palloc'd string.
+ *
+ * The basic result we want is "name1_name2_label", omitting "_name2" or
+ * "_label" when those parameters are NULL. However, we must generate
+ * a name with less than NAMEDATALEN characters! So, we truncate one or
+ * both names if necessary to make a short-enough string. The label part
+ * is never truncated (so it had better be reasonably short).
+ *
+ * The caller is responsible for checking uniqueness of the generated
+ * name and retrying as needed; retrying will be done by altering the
+ * "label" string (which is why we never truncate that part).
+ */
+char *
+makeObjectName(const char *name1, const char *name2, const char *label)
+{
+ char *name;
+ int overhead = 0; /* chars needed for label and underscores */
+ int availchars; /* chars available for name(s) */
+ int name1chars; /* chars allocated to name1 */
+ int name2chars; /* chars allocated to name2 */
+ int ndx;
+
+ name1chars = strlen(name1);
+ if (name2)
+ {
+ name2chars = strlen(name2);
+ overhead++; /* allow for separating underscore */
+ }
+ else
+ name2chars = 0;
+ if (label)
+ overhead += strlen(label) + 1;
+
+ availchars = NAMEDATALEN - 1 - overhead;
+ Assert(availchars > 0); /* else caller chose a bad label */
+
+ /*
+ * If we must truncate, preferentially truncate the longer name. This
+ * logic could be expressed without a loop, but it's simple and obvious as
+ * a loop.
+ */
+ while (name1chars + name2chars > availchars)
+ {
+ if (name1chars > name2chars)
+ name1chars--;
+ else
+ name2chars--;
+ }
+
+ name1chars = pg_mbcliplen(name1, name1chars, name1chars);
+ if (name2)
+ name2chars = pg_mbcliplen(name2, name2chars, name2chars);
+
+ /* Now construct the string using the chosen lengths */
+ name = palloc(name1chars + name2chars + overhead + 1);
+ memcpy(name, name1, name1chars);
+ ndx = name1chars;
+ if (name2)
+ {
+ name[ndx++] = '_';
+ memcpy(name + ndx, name2, name2chars);
+ ndx += name2chars;
+ }
+ if (label)
+ {
+ name[ndx++] = '_';
+ strcpy(name + ndx, label);
+ }
+ else
+ name[ndx] = '\0';
+
+ return name;
+}
+
+/*
+ * Select a nonconflicting name for a new relation. This is ordinarily
+ * used to choose index names (which is why it's here) but it can also
+ * be used for sequences, or any autogenerated relation kind.
+ *
+ * name1, name2, and label are used the same way as for makeObjectName(),
+ * except that the label can't be NULL; digits will be appended to the label
+ * if needed to create a name that is unique within the specified namespace.
+ *
+ * If isconstraint is true, we also avoid choosing a name matching any
+ * existing constraint in the same namespace. (This is stricter than what
+ * Postgres itself requires, but the SQL standard says that constraint names
+ * should be unique within schemas, so we follow that for autogenerated
+ * constraint names.)
+ *
+ * Note: it is theoretically possible to get a collision anyway, if someone
+ * else chooses the same name concurrently. This is fairly unlikely to be
+ * a problem in practice, especially if one is holding an exclusive lock on
+ * the relation identified by name1. However, if choosing multiple names
+ * within a single command, you'd better create the new object and do
+ * CommandCounterIncrement before choosing the next one!
+ *
+ * Returns a palloc'd string.
+ */
+char *
+ChooseRelationName(const char *name1, const char *name2,
+ const char *label, Oid namespaceid,
+ bool isconstraint)
+{
+ int pass = 0;
+ char *relname = NULL;
+ char modlabel[NAMEDATALEN];
+
+ /* try the unmodified label first */
+ strlcpy(modlabel, label, sizeof(modlabel));
+
+ for (;;)
+ {
+ relname = makeObjectName(name1, name2, modlabel);
+
+ if (!OidIsValid(get_relname_relid(relname, namespaceid)))
+ {
+ if (!isconstraint ||
+ !ConstraintNameExists(relname, namespaceid))
+ break;
+ }
+
+ /* found a conflict, so try a new name component */
+ pfree(relname);
+ snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
+ }
+
+ return relname;
+}
+
+/*
+ * Select the name to be used for an index.
+ *
+ * The argument list is pretty ad-hoc :-(
+ */
+static char *
+ChooseIndexName(const char *tabname, Oid namespaceId,
+ List *colnames, List *exclusionOpNames,
+ bool primary, bool isconstraint)
+{
+ char *indexname;
+
+ if (primary)
+ {
+ /* the primary key's name does not depend on the specific column(s) */
+ indexname = ChooseRelationName(tabname,
+ NULL,
+ "pkey",
+ namespaceId,
+ true);
+ }
+ else if (exclusionOpNames != NIL)
+ {
+ indexname = ChooseRelationName(tabname,
+ ChooseIndexNameAddition(colnames),
+ "excl",
+ namespaceId,
+ true);
+ }
+ else if (isconstraint)
+ {
+ indexname = ChooseRelationName(tabname,
+ ChooseIndexNameAddition(colnames),
+ "key",
+ namespaceId,
+ true);
+ }
+ else
+ {
+ indexname = ChooseRelationName(tabname,
+ ChooseIndexNameAddition(colnames),
+ "idx",
+ namespaceId,
+ false);
+ }
+
+ return indexname;
+}
+
+/*
+ * Generate "name2" for a new index given the list of column names for it
+ * (as produced by ChooseIndexColumnNames). This will be passed to
+ * ChooseRelationName along with the parent table name and a suitable label.
+ *
+ * We know that less than NAMEDATALEN characters will actually be used,
+ * so we can truncate the result once we've generated that many.
+ *
+ * XXX See also ChooseForeignKeyConstraintNameAddition and
+ * ChooseExtendedStatisticNameAddition.
+ */
+static char *
+ChooseIndexNameAddition(List *colnames)
+{
+ char buf[NAMEDATALEN * 2];
+ int buflen = 0;
+ ListCell *lc;
+
+ buf[0] = '\0';
+ foreach(lc, colnames)
+ {
+ const char *name = (const char *) lfirst(lc);
+
+ if (buflen > 0)
+ buf[buflen++] = '_'; /* insert _ between names */
+
+ /*
+ * At this point we have buflen <= NAMEDATALEN. name should be less
+ * than NAMEDATALEN already, but use strlcpy for paranoia.
+ */
+ strlcpy(buf + buflen, name, NAMEDATALEN);
+ buflen += strlen(buf + buflen);
+ if (buflen >= NAMEDATALEN)
+ break;
+ }
+ return pstrdup(buf);
+}
+
+/*
+ * Select the actual names to be used for the columns of an index, given the
+ * list of IndexElems for the columns. This is mostly about ensuring the
+ * names are unique so we don't get a conflicting-attribute-names error.
+ *
+ * Returns a List of plain strings (char *, not String nodes).
+ */
+static List *
+ChooseIndexColumnNames(List *indexElems)
+{
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach(lc, indexElems)
+ {
+ IndexElem *ielem = (IndexElem *) lfirst(lc);
+ const char *origname;
+ const char *curname;
+ int i;
+ char buf[NAMEDATALEN];
+
+ /* Get the preliminary name from the IndexElem */
+ if (ielem->indexcolname)
+ origname = ielem->indexcolname; /* caller-specified name */
+ else if (ielem->name)
+ origname = ielem->name; /* simple column reference */
+ else
+ origname = "expr"; /* default name for expression */
+
+ /* If it conflicts with any previous column, tweak it */
+ curname = origname;
+ for (i = 1;; i++)
+ {
+ ListCell *lc2;
+ char nbuf[32];
+ int nlen;
+
+ foreach(lc2, result)
+ {
+ if (strcmp(curname, (char *) lfirst(lc2)) == 0)
+ break;
+ }
+ if (lc2 == NULL)
+ break; /* found nonconflicting name */
+
+ sprintf(nbuf, "%d", i);
+
+ /* Ensure generated names are shorter than NAMEDATALEN */
+ nlen = pg_mbcliplen(origname, strlen(origname),
+ NAMEDATALEN - 1 - strlen(nbuf));
+ memcpy(buf, origname, nlen);
+ strcpy(buf + nlen, nbuf);
+ curname = buf;
+ }
+
+ /* And attach to the result list */
+ result = lappend(result, pstrdup(curname));
+ }
+ return result;
+}
+
+/*
+ * ExecReindex
+ *
+ * Primary entry point for manual REINDEX commands. This is mainly a
+ * preparation wrapper for the real operations that will happen in
+ * each subroutine of REINDEX.
+ */
+void
+ExecReindex(ParseState *pstate, ReindexStmt *stmt, bool isTopLevel)
+{
+ ReindexParams params = {0};
+ ListCell *lc;
+ bool concurrently = false;
+ bool verbose = false;
+ char *tablespacename = NULL;
+
+ /* Parse option list */
+ foreach(lc, stmt->params)
+ {
+ DefElem *opt = (DefElem *) lfirst(lc);
+
+ if (strcmp(opt->defname, "verbose") == 0)
+ verbose = defGetBoolean(opt);
+ else if (strcmp(opt->defname, "concurrently") == 0)
+ concurrently = defGetBoolean(opt);
+ else if (strcmp(opt->defname, "tablespace") == 0)
+ tablespacename = defGetString(opt);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized REINDEX option \"%s\"",
+ opt->defname),
+ parser_errposition(pstate, opt->location)));
+ }
+
+ if (concurrently)
+ PreventInTransactionBlock(isTopLevel,
+ "REINDEX CONCURRENTLY");
+
+ params.options =
+ (verbose ? REINDEXOPT_VERBOSE : 0) |
+ (concurrently ? REINDEXOPT_CONCURRENTLY : 0);
+
+ /*
+ * Assign the tablespace OID to move indexes to, with InvalidOid to do
+ * nothing.
+ */
+ if (tablespacename != NULL)
+ {
+ params.tablespaceOid = get_tablespace_oid(tablespacename, false);
+
+ /* Check permissions except when moving to database's default */
+ if (OidIsValid(params.tablespaceOid) &&
+ params.tablespaceOid != MyDatabaseTableSpace)
+ {
+ AclResult aclresult;
+
+ aclresult = pg_tablespace_aclcheck(params.tablespaceOid,
+ GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_TABLESPACE,
+ get_tablespace_name(params.tablespaceOid));
+ }
+ }
+ else
+ params.tablespaceOid = InvalidOid;
+
+ switch (stmt->kind)
+ {
+ case REINDEX_OBJECT_INDEX:
+ ReindexIndex(stmt->relation, &params, isTopLevel);
+ break;
+ case REINDEX_OBJECT_TABLE:
+ ReindexTable(stmt->relation, &params, isTopLevel);
+ break;
+ case REINDEX_OBJECT_SCHEMA:
+ case REINDEX_OBJECT_SYSTEM:
+ case REINDEX_OBJECT_DATABASE:
+
+ /*
+ * This cannot run inside a user transaction block; if we were
+ * inside a transaction, then its commit- and
+ * start-transaction-command calls would not have the intended
+ * effect!
+ */
+ PreventInTransactionBlock(isTopLevel,
+ (stmt->kind == REINDEX_OBJECT_SCHEMA) ? "REINDEX SCHEMA" :
+ (stmt->kind == REINDEX_OBJECT_SYSTEM) ? "REINDEX SYSTEM" :
+ "REINDEX DATABASE");
+ ReindexMultipleTables(stmt->name, stmt->kind, &params);
+ break;
+ default:
+ elog(ERROR, "unrecognized object type: %d",
+ (int) stmt->kind);
+ break;
+ }
+}
+
+/*
+ * ReindexIndex
+ * Recreate a specific index.
+ */
+static void
+ReindexIndex(RangeVar *indexRelation, ReindexParams *params, bool isTopLevel)
+{
+ struct ReindexIndexCallbackState state;
+ Oid indOid;
+ char persistence;
+ char relkind;
+
+ /*
+ * Find and lock index, and check permissions on table; use callback to
+ * obtain lock on table first, to avoid deadlock hazard. The lock level
+ * used here must match the index lock obtained in reindex_index().
+ *
+ * If it's a temporary index, we will perform a non-concurrent reindex,
+ * even if CONCURRENTLY was requested. In that case, reindex_index() will
+ * upgrade the lock, but that's OK, because other sessions can't hold
+ * locks on our temporary table.
+ */
+ state.params = *params;
+ state.locked_table_oid = InvalidOid;
+ indOid = RangeVarGetRelidExtended(indexRelation,
+ (params->options & REINDEXOPT_CONCURRENTLY) != 0 ?
+ ShareUpdateExclusiveLock : AccessExclusiveLock,
+ 0,
+ RangeVarCallbackForReindexIndex,
+ &state);
+
+ /*
+ * Obtain the current persistence and kind of the existing index. We
+ * already hold a lock on the index.
+ */
+ persistence = get_rel_persistence(indOid);
+ relkind = get_rel_relkind(indOid);
+
+ if (relkind == RELKIND_PARTITIONED_INDEX)
+ ReindexPartitions(indOid, params, isTopLevel);
+ else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
+ persistence != RELPERSISTENCE_TEMP)
+ ReindexRelationConcurrently(indOid, params);
+ else
+ {
+ ReindexParams newparams = *params;
+
+ newparams.options |= REINDEXOPT_REPORT_PROGRESS;
+ reindex_index(indOid, false, persistence, &newparams);
+ }
+}
+
+/*
+ * Check permissions on table before acquiring relation lock; also lock
+ * the heap before the RangeVarGetRelidExtended takes the index lock, to avoid
+ * deadlocks.
+ */
+static void
+RangeVarCallbackForReindexIndex(const RangeVar *relation,
+ Oid relId, Oid oldRelId, void *arg)
+{
+ char relkind;
+ struct ReindexIndexCallbackState *state = arg;
+ LOCKMODE table_lockmode;
+
+ /*
+ * Lock level here should match table lock in reindex_index() for
+ * non-concurrent case and table locks used by index_concurrently_*() for
+ * concurrent case.
+ */
+ table_lockmode = (state->params.options & REINDEXOPT_CONCURRENTLY) != 0 ?
+ ShareUpdateExclusiveLock : ShareLock;
+
+ /*
+ * If we previously locked some other index's heap, and the name we're
+ * looking up no longer refers to that relation, release the now-useless
+ * lock.
+ */
+ if (relId != oldRelId && OidIsValid(oldRelId))
+ {
+ UnlockRelationOid(state->locked_table_oid, table_lockmode);
+ state->locked_table_oid = InvalidOid;
+ }
+
+ /* If the relation does not exist, there's nothing more to do. */
+ if (!OidIsValid(relId))
+ return;
+
+ /*
+ * If the relation does exist, check whether it's an index. But note that
+ * the relation might have been dropped between the time we did the name
+ * lookup and now. In that case, there's nothing to do.
+ */
+ relkind = get_rel_relkind(relId);
+ if (!relkind)
+ return;
+ if (relkind != RELKIND_INDEX &&
+ relkind != RELKIND_PARTITIONED_INDEX)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not an index", relation->relname)));
+
+ /* Check permissions */
+ if (!pg_class_ownercheck(relId, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_INDEX, relation->relname);
+
+ /* Lock heap before index to avoid deadlock. */
+ if (relId != oldRelId)
+ {
+ Oid table_oid = IndexGetRelation(relId, true);
+
+ /*
+ * If the OID isn't valid, it means the index was concurrently
+ * dropped, which is not a problem for us; just return normally.
+ */
+ if (OidIsValid(table_oid))
+ {
+ LockRelationOid(table_oid, table_lockmode);
+ state->locked_table_oid = table_oid;
+ }
+ }
+}
+
+/*
+ * ReindexTable
+ * Recreate all indexes of a table (and of its toast table, if any)
+ */
+static Oid
+ReindexTable(RangeVar *relation, ReindexParams *params, bool isTopLevel)
+{
+ Oid heapOid;
+ bool result;
+
+ /*
+ * The lock level used here should match reindex_relation().
+ *
+ * If it's a temporary table, we will perform a non-concurrent reindex,
+ * even if CONCURRENTLY was requested. In that case, reindex_relation()
+ * will upgrade the lock, but that's OK, because other sessions can't hold
+ * locks on our temporary table.
+ */
+ heapOid = RangeVarGetRelidExtended(relation,
+ (params->options & REINDEXOPT_CONCURRENTLY) != 0 ?
+ ShareUpdateExclusiveLock : ShareLock,
+ 0,
+ RangeVarCallbackOwnsTable, NULL);
+
+ if (get_rel_relkind(heapOid) == RELKIND_PARTITIONED_TABLE)
+ ReindexPartitions(heapOid, params, isTopLevel);
+ else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
+ get_rel_persistence(heapOid) != RELPERSISTENCE_TEMP)
+ {
+ result = ReindexRelationConcurrently(heapOid, params);
+
+ if (!result)
+ ereport(NOTICE,
+ (errmsg("table \"%s\" has no indexes that can be reindexed concurrently",
+ relation->relname)));
+ }
+ else
+ {
+ ReindexParams newparams = *params;
+
+ newparams.options |= REINDEXOPT_REPORT_PROGRESS;
+ result = reindex_relation(heapOid,
+ REINDEX_REL_PROCESS_TOAST |
+ REINDEX_REL_CHECK_CONSTRAINTS,
+ &newparams);
+ if (!result)
+ ereport(NOTICE,
+ (errmsg("table \"%s\" has no indexes to reindex",
+ relation->relname)));
+ }
+
+ return heapOid;
+}
+
+/*
+ * ReindexMultipleTables
+ * Recreate indexes of tables selected by objectName/objectKind.
+ *
+ * To reduce the probability of deadlocks, each table is reindexed in a
+ * separate transaction, so we can release the lock on it right away.
+ * That means this must not be called within a user transaction block!
+ */
+static void
+ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
+ ReindexParams *params)
+{
+ Oid objectOid;
+ Relation relationRelation;
+ TableScanDesc scan;
+ ScanKeyData scan_keys[1];
+ HeapTuple tuple;
+ MemoryContext private_context;
+ MemoryContext old;
+ List *relids = NIL;
+ int num_keys;
+ bool concurrent_warning = false;
+ bool tablespace_warning = false;
+
+ AssertArg(objectName);
+ Assert(objectKind == REINDEX_OBJECT_SCHEMA ||
+ objectKind == REINDEX_OBJECT_SYSTEM ||
+ objectKind == REINDEX_OBJECT_DATABASE);
+
+ if (objectKind == REINDEX_OBJECT_SYSTEM &&
+ (params->options & REINDEXOPT_CONCURRENTLY) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot reindex system catalogs concurrently")));
+
+ /*
+ * Get OID of object to reindex, being the database currently being used
+ * by session for a database or for system catalogs, or the schema defined
+ * by caller. At the same time do permission checks that need different
+ * processing depending on the object type.
+ */
+ if (objectKind == REINDEX_OBJECT_SCHEMA)
+ {
+ objectOid = get_namespace_oid(objectName, false);
+
+ if (!pg_namespace_ownercheck(objectOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA,
+ objectName);
+ }
+ else
+ {
+ objectOid = MyDatabaseId;
+
+ if (strcmp(objectName, get_database_name(objectOid)) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("can only reindex the currently open database")));
+ if (!pg_database_ownercheck(objectOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+ objectName);
+ }
+
+ /*
+ * Create a memory context that will survive forced transaction commits we
+ * do below. Since it is a child of PortalContext, it will go away
+ * eventually even if we suffer an error; there's no need for special
+ * abort cleanup logic.
+ */
+ private_context = AllocSetContextCreate(PortalContext,
+ "ReindexMultipleTables",
+ ALLOCSET_SMALL_SIZES);
+
+ /*
+ * Define the search keys to find the objects to reindex. For a schema, we
+ * select target relations using relnamespace, something not necessary for
+ * a database-wide operation.
+ */
+ if (objectKind == REINDEX_OBJECT_SCHEMA)
+ {
+ num_keys = 1;
+ ScanKeyInit(&scan_keys[0],
+ Anum_pg_class_relnamespace,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(objectOid));
+ }
+ else
+ num_keys = 0;
+
+ /*
+ * Scan pg_class to build a list of the relations we need to reindex.
+ *
+ * We only consider plain relations and materialized views here (toast
+ * rels will be processed indirectly by reindex_relation).
+ */
+ relationRelation = table_open(RelationRelationId, AccessShareLock);
+ scan = table_beginscan_catalog(relationRelation, num_keys, scan_keys);
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ Form_pg_class classtuple = (Form_pg_class) GETSTRUCT(tuple);
+ Oid relid = classtuple->oid;
+
+ /*
+ * Only regular tables and matviews can have indexes, so ignore any
+ * other kind of relation.
+ *
+ * Partitioned tables/indexes are skipped but matching leaf partitions
+ * are processed.
+ */
+ if (classtuple->relkind != RELKIND_RELATION &&
+ classtuple->relkind != RELKIND_MATVIEW)
+ continue;
+
+ /* Skip temp tables of other backends; we can't reindex them at all */
+ if (classtuple->relpersistence == RELPERSISTENCE_TEMP &&
+ !isTempNamespace(classtuple->relnamespace))
+ continue;
+
+ /* Check user/system classification, and optionally skip */
+ if (objectKind == REINDEX_OBJECT_SYSTEM &&
+ !IsSystemClass(relid, classtuple))
+ continue;
+
+ /*
+ * The table can be reindexed if the user is superuser, the table
+ * owner, or the database/schema owner (but in the latter case, only
+ * if it's not a shared relation). pg_class_ownercheck includes the
+ * superuser case, and depending on objectKind we already know that
+ * the user has permission to run REINDEX on this database or schema
+ * per the permission checks at the beginning of this routine.
+ */
+ if (classtuple->relisshared &&
+ !pg_class_ownercheck(relid, GetUserId()))
+ continue;
+
+ /*
+ * Skip system tables, since index_create() would reject indexing them
+ * concurrently (and it would likely fail if we tried).
+ */
+ if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
+ IsCatalogRelationOid(relid))
+ {
+ if (!concurrent_warning)
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot reindex system catalogs concurrently, skipping all")));
+ concurrent_warning = true;
+ continue;
+ }
+
+ /*
+ * If a new tablespace is set, check if this relation has to be
+ * skipped.
+ */
+ if (OidIsValid(params->tablespaceOid))
+ {
+ bool skip_rel = false;
+
+ /*
+ * Mapped relations cannot be moved to different tablespaces (in
+ * particular this eliminates all shared catalogs.).
+ */
+ if (RELKIND_HAS_STORAGE(classtuple->relkind) &&
+ !OidIsValid(classtuple->relfilenode))
+ skip_rel = true;
+
+ /*
+ * A system relation is always skipped, even with
+ * allow_system_table_mods enabled.
+ */
+ if (IsSystemClass(relid, classtuple))
+ skip_rel = true;
+
+ if (skip_rel)
+ {
+ if (!tablespace_warning)
+ ereport(WARNING,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("cannot move system relations, skipping all")));
+ tablespace_warning = true;
+ continue;
+ }
+ }
+
+ /* Save the list of relation OIDs in private context */
+ old = MemoryContextSwitchTo(private_context);
+
+ /*
+ * We always want to reindex pg_class first if it's selected to be
+ * reindexed. This ensures that if there is any corruption in
+ * pg_class' indexes, they will be fixed before we process any other
+ * tables. This is critical because reindexing itself will try to
+ * update pg_class.
+ */
+ if (relid == RelationRelationId)
+ relids = lcons_oid(relid, relids);
+ else
+ relids = lappend_oid(relids, relid);
+
+ MemoryContextSwitchTo(old);
+ }
+ table_endscan(scan);
+ table_close(relationRelation, AccessShareLock);
+
+ /*
+ * Process each relation listed in a separate transaction. Note that this
+ * commits and then starts a new transaction immediately.
+ */
+ ReindexMultipleInternal(relids, params);
+
+ MemoryContextDelete(private_context);
+}
+
+/*
+ * Error callback specific to ReindexPartitions().
+ */
+static void
+reindex_error_callback(void *arg)
+{
+ ReindexErrorInfo *errinfo = (ReindexErrorInfo *) arg;
+
+ Assert(RELKIND_HAS_PARTITIONS(errinfo->relkind));
+
+ if (errinfo->relkind == RELKIND_PARTITIONED_TABLE)
+ errcontext("while reindexing partitioned table \"%s.%s\"",
+ errinfo->relnamespace, errinfo->relname);
+ else if (errinfo->relkind == RELKIND_PARTITIONED_INDEX)
+ errcontext("while reindexing partitioned index \"%s.%s\"",
+ errinfo->relnamespace, errinfo->relname);
+}
+
+/*
+ * ReindexPartitions
+ *
+ * Reindex a set of partitions, per the partitioned index or table given
+ * by the caller.
+ */
+static void
+ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
+{
+ List *partitions = NIL;
+ char relkind = get_rel_relkind(relid);
+ char *relname = get_rel_name(relid);
+ char *relnamespace = get_namespace_name(get_rel_namespace(relid));
+ MemoryContext reindex_context;
+ List *inhoids;
+ ListCell *lc;
+ ErrorContextCallback errcallback;
+ ReindexErrorInfo errinfo;
+
+ Assert(RELKIND_HAS_PARTITIONS(relkind));
+
+ /*
+ * Check if this runs in a transaction block, with an error callback to
+ * provide more context under which a problem happens.
+ */
+ errinfo.relname = pstrdup(relname);
+ errinfo.relnamespace = pstrdup(relnamespace);
+ errinfo.relkind = relkind;
+ errcallback.callback = reindex_error_callback;
+ errcallback.arg = (void *) &errinfo;
+ errcallback.previous = error_context_stack;
+ error_context_stack = &errcallback;
+
+ PreventInTransactionBlock(isTopLevel,
+ relkind == RELKIND_PARTITIONED_TABLE ?
+ "REINDEX TABLE" : "REINDEX INDEX");
+
+ /* Pop the error context stack */
+ error_context_stack = errcallback.previous;
+
+ /*
+ * Create special memory context for cross-transaction storage.
+ *
+ * Since it is a child of PortalContext, it will go away eventually even
+ * if we suffer an error so there is no need for special abort cleanup
+ * logic.
+ */
+ reindex_context = AllocSetContextCreate(PortalContext, "Reindex",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /* ShareLock is enough to prevent schema modifications */
+ inhoids = find_all_inheritors(relid, ShareLock, NULL);
+
+ /*
+ * The list of relations to reindex are the physical partitions of the
+ * tree so discard any partitioned table or index.
+ */
+ foreach(lc, inhoids)
+ {
+ Oid partoid = lfirst_oid(lc);
+ char partkind = get_rel_relkind(partoid);
+ MemoryContext old_context;
+
+ /*
+ * This discards partitioned tables, partitioned indexes and foreign
+ * tables.
+ */
+ if (!RELKIND_HAS_STORAGE(partkind))
+ continue;
+
+ Assert(partkind == RELKIND_INDEX ||
+ partkind == RELKIND_RELATION);
+
+ /* Save partition OID */
+ old_context = MemoryContextSwitchTo(reindex_context);
+ partitions = lappend_oid(partitions, partoid);
+ MemoryContextSwitchTo(old_context);
+ }
+
+ /*
+ * Process each partition listed in a separate transaction. Note that
+ * this commits and then starts a new transaction immediately.
+ */
+ ReindexMultipleInternal(partitions, params);
+
+ /*
+ * Clean up working storage --- note we must do this after
+ * StartTransactionCommand, else we might be trying to delete the active
+ * context!
+ */
+ MemoryContextDelete(reindex_context);
+}
+
+/*
+ * ReindexMultipleInternal
+ *
+ * Reindex a list of relations, each one being processed in its own
+ * transaction. This commits the existing transaction immediately,
+ * and starts a new transaction when finished.
+ */
+static void
+ReindexMultipleInternal(List *relids, ReindexParams *params)
+{
+ ListCell *l;
+
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+
+ foreach(l, relids)
+ {
+ Oid relid = lfirst_oid(l);
+ char relkind;
+ char relpersistence;
+
+ StartTransactionCommand();
+
+ /* functions in indexes may want a snapshot set */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
+ /* check if the relation still exists */
+ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(relid)))
+ {
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+ continue;
+ }
+
+ /*
+ * Check permissions except when moving to database's default if a new
+ * tablespace is chosen. Note that this check also happens in
+ * ExecReindex(), but we do an extra check here as this runs across
+ * multiple transactions.
+ */
+ if (OidIsValid(params->tablespaceOid) &&
+ params->tablespaceOid != MyDatabaseTableSpace)
+ {
+ AclResult aclresult;
+
+ aclresult = pg_tablespace_aclcheck(params->tablespaceOid,
+ GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_TABLESPACE,
+ get_tablespace_name(params->tablespaceOid));
+ }
+
+ relkind = get_rel_relkind(relid);
+ relpersistence = get_rel_persistence(relid);
+
+ /*
+ * Partitioned tables and indexes can never be processed directly, and
+ * a list of their leaves should be built first.
+ */
+ Assert(!RELKIND_HAS_PARTITIONS(relkind));
+
+ if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
+ relpersistence != RELPERSISTENCE_TEMP)
+ {
+ ReindexParams newparams = *params;
+
+ newparams.options |= REINDEXOPT_MISSING_OK;
+ (void) ReindexRelationConcurrently(relid, &newparams);
+ /* ReindexRelationConcurrently() does the verbose output */
+ }
+ else if (relkind == RELKIND_INDEX)
+ {
+ ReindexParams newparams = *params;
+
+ newparams.options |=
+ REINDEXOPT_REPORT_PROGRESS | REINDEXOPT_MISSING_OK;
+ reindex_index(relid, false, relpersistence, &newparams);
+ PopActiveSnapshot();
+ /* reindex_index() does the verbose output */
+ }
+ else
+ {
+ bool result;
+ ReindexParams newparams = *params;
+
+ newparams.options |=
+ REINDEXOPT_REPORT_PROGRESS | REINDEXOPT_MISSING_OK;
+ result = reindex_relation(relid,
+ REINDEX_REL_PROCESS_TOAST |
+ REINDEX_REL_CHECK_CONSTRAINTS,
+ &newparams);
+
+ if (result && (params->options & REINDEXOPT_VERBOSE) != 0)
+ ereport(INFO,
+ (errmsg("table \"%s.%s\" was reindexed",
+ get_namespace_name(get_rel_namespace(relid)),
+ get_rel_name(relid))));
+
+ PopActiveSnapshot();
+ }
+
+ CommitTransactionCommand();
+ }
+
+ StartTransactionCommand();
+}
+
+
+/*
+ * ReindexRelationConcurrently - process REINDEX CONCURRENTLY for given
+ * relation OID
+ *
+ * 'relationOid' can either belong to an index, a table or a materialized
+ * view. For tables and materialized views, all its indexes will be rebuilt,
+ * excluding invalid indexes and any indexes used in exclusion constraints,
+ * but including its associated toast table indexes. For indexes, the index
+ * itself will be rebuilt.
+ *
+ * The locks taken on parent tables and involved indexes are kept until the
+ * transaction is committed, at which point a session lock is taken on each
+ * relation. Both of these protect against concurrent schema changes.
+ *
+ * Returns true if any indexes have been rebuilt (including toast table's
+ * indexes, when relevant), otherwise returns false.
+ *
+ * NOTE: This cannot be used on temporary relations. A concurrent build would
+ * cause issues with ON COMMIT actions triggered by the transactions of the
+ * concurrent build. Temporary relations are not subject to concurrent
+ * concerns, so there's no need for the more complicated concurrent build,
+ * anyway, and a non-concurrent reindex is more efficient.
+ */
+static bool
+ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
+{
+ typedef struct ReindexIndexInfo
+ {
+ Oid indexId;
+ Oid tableId;
+ Oid amId;
+ bool safe; /* for set_indexsafe_procflags */
+ } ReindexIndexInfo;
+ List *heapRelationIds = NIL;
+ List *indexIds = NIL;
+ List *newIndexIds = NIL;
+ List *relationLocks = NIL;
+ List *lockTags = NIL;
+ ListCell *lc,
+ *lc2;
+ MemoryContext private_context;
+ MemoryContext oldcontext;
+ char relkind;
+ char *relationName = NULL;
+ char *relationNamespace = NULL;
+ PGRUsage ru0;
+ const int progress_index[] = {
+ PROGRESS_CREATEIDX_COMMAND,
+ PROGRESS_CREATEIDX_PHASE,
+ PROGRESS_CREATEIDX_INDEX_OID,
+ PROGRESS_CREATEIDX_ACCESS_METHOD_OID
+ };
+ int64 progress_vals[4];
+
+ /*
+ * Create a memory context that will survive forced transaction commits we
+ * do below. Since it is a child of PortalContext, it will go away
+ * eventually even if we suffer an error; there's no need for special
+ * abort cleanup logic.
+ */
+ private_context = AllocSetContextCreate(PortalContext,
+ "ReindexConcurrent",
+ ALLOCSET_SMALL_SIZES);
+
+ if ((params->options & REINDEXOPT_VERBOSE) != 0)
+ {
+ /* Save data needed by REINDEX VERBOSE in private context */
+ oldcontext = MemoryContextSwitchTo(private_context);
+
+ relationName = get_rel_name(relationOid);
+ relationNamespace = get_namespace_name(get_rel_namespace(relationOid));
+
+ pg_rusage_init(&ru0);
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ relkind = get_rel_relkind(relationOid);
+
+ /*
+ * Extract the list of indexes that are going to be rebuilt based on the
+ * relation Oid given by caller.
+ */
+ switch (relkind)
+ {
+ case RELKIND_RELATION:
+ case RELKIND_MATVIEW:
+ case RELKIND_TOASTVALUE:
+ {
+ /*
+ * In the case of a relation, find all its indexes including
+ * toast indexes.
+ */
+ Relation heapRelation;
+
+ /* Save the list of relation OIDs in private context */
+ oldcontext = MemoryContextSwitchTo(private_context);
+
+ /* Track this relation for session locks */
+ heapRelationIds = lappend_oid(heapRelationIds, relationOid);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ if (IsCatalogRelationOid(relationOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot reindex system catalogs concurrently")));
+
+ /* Open relation to get its indexes */
+ if ((params->options & REINDEXOPT_MISSING_OK) != 0)
+ {
+ heapRelation = try_table_open(relationOid,
+ ShareUpdateExclusiveLock);
+ /* leave if relation does not exist */
+ if (!heapRelation)
+ break;
+ }
+ else
+ heapRelation = table_open(relationOid,
+ ShareUpdateExclusiveLock);
+
+ if (OidIsValid(params->tablespaceOid) &&
+ IsSystemRelation(heapRelation))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot move system relation \"%s\"",
+ RelationGetRelationName(heapRelation))));
+
+ /* Add all the valid indexes of relation to list */
+ foreach(lc, RelationGetIndexList(heapRelation))
+ {
+ Oid cellOid = lfirst_oid(lc);
+ Relation indexRelation = index_open(cellOid,
+ ShareUpdateExclusiveLock);
+
+ if (!indexRelation->rd_index->indisvalid)
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot reindex invalid index \"%s.%s\" concurrently, skipping",
+ get_namespace_name(get_rel_namespace(cellOid)),
+ get_rel_name(cellOid))));
+ else if (indexRelation->rd_index->indisexclusion)
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot reindex exclusion constraint index \"%s.%s\" concurrently, skipping",
+ get_namespace_name(get_rel_namespace(cellOid)),
+ get_rel_name(cellOid))));
+ else
+ {
+ ReindexIndexInfo *idx;
+
+ /* Save the list of relation OIDs in private context */
+ oldcontext = MemoryContextSwitchTo(private_context);
+
+ idx = palloc(sizeof(ReindexIndexInfo));
+ idx->indexId = cellOid;
+ /* other fields set later */
+
+ indexIds = lappend(indexIds, idx);
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ index_close(indexRelation, NoLock);
+ }
+
+ /* Also add the toast indexes */
+ if (OidIsValid(heapRelation->rd_rel->reltoastrelid))
+ {
+ Oid toastOid = heapRelation->rd_rel->reltoastrelid;
+ Relation toastRelation = table_open(toastOid,
+ ShareUpdateExclusiveLock);
+
+ /* Save the list of relation OIDs in private context */
+ oldcontext = MemoryContextSwitchTo(private_context);
+
+ /* Track this relation for session locks */
+ heapRelationIds = lappend_oid(heapRelationIds, toastOid);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ foreach(lc2, RelationGetIndexList(toastRelation))
+ {
+ Oid cellOid = lfirst_oid(lc2);
+ Relation indexRelation = index_open(cellOid,
+ ShareUpdateExclusiveLock);
+
+ if (!indexRelation->rd_index->indisvalid)
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("cannot reindex invalid index \"%s.%s\" concurrently, skipping",
+ get_namespace_name(get_rel_namespace(cellOid)),
+ get_rel_name(cellOid))));
+ else
+ {
+ ReindexIndexInfo *idx;
+
+ /*
+ * Save the list of relation OIDs in private
+ * context
+ */
+ oldcontext = MemoryContextSwitchTo(private_context);
+
+ idx = palloc(sizeof(ReindexIndexInfo));
+ idx->indexId = cellOid;
+ indexIds = lappend(indexIds, idx);
+ /* other fields set later */
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ index_close(indexRelation, NoLock);
+ }
+
+ table_close(toastRelation, NoLock);
+ }
+
+ table_close(heapRelation, NoLock);
+ break;
+ }
+ case RELKIND_INDEX:
+ {
+ Oid heapId = IndexGetRelation(relationOid,
+ (params->options & REINDEXOPT_MISSING_OK) != 0);
+ Relation heapRelation;
+ ReindexIndexInfo *idx;
+
+ /* if relation is missing, leave */
+ if (!OidIsValid(heapId))
+ break;
+
+ if (IsCatalogRelationOid(heapId))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot reindex system catalogs concurrently")));
+
+ /*
+ * Don't allow reindex for an invalid index on TOAST table, as
+ * if rebuilt it would not be possible to drop it. Match
+ * error message in reindex_index().
+ */
+ if (IsToastNamespace(get_rel_namespace(relationOid)) &&
+ !get_index_isvalid(relationOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot reindex invalid index on TOAST table")));
+
+ /*
+ * Check if parent relation can be locked and if it exists,
+ * this needs to be done at this stage as the list of indexes
+ * to rebuild is not complete yet, and REINDEXOPT_MISSING_OK
+ * should not be used once all the session locks are taken.
+ */
+ if ((params->options & REINDEXOPT_MISSING_OK) != 0)
+ {
+ heapRelation = try_table_open(heapId,
+ ShareUpdateExclusiveLock);
+ /* leave if relation does not exist */
+ if (!heapRelation)
+ break;
+ }
+ else
+ heapRelation = table_open(heapId,
+ ShareUpdateExclusiveLock);
+
+ if (OidIsValid(params->tablespaceOid) &&
+ IsSystemRelation(heapRelation))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot move system relation \"%s\"",
+ get_rel_name(relationOid))));
+
+ table_close(heapRelation, NoLock);
+
+ /* Save the list of relation OIDs in private context */
+ oldcontext = MemoryContextSwitchTo(private_context);
+
+ /* Track the heap relation of this index for session locks */
+ heapRelationIds = list_make1_oid(heapId);
+
+ /*
+ * Save the list of relation OIDs in private context. Note
+ * that invalid indexes are allowed here.
+ */
+ idx = palloc(sizeof(ReindexIndexInfo));
+ idx->indexId = relationOid;
+ indexIds = lappend(indexIds, idx);
+ /* other fields set later */
+
+ MemoryContextSwitchTo(oldcontext);
+ break;
+ }
+
+ case RELKIND_PARTITIONED_TABLE:
+ case RELKIND_PARTITIONED_INDEX:
+ default:
+ /* Return error if type of relation is not supported */
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot reindex this type of relation concurrently")));
+ break;
+ }
+
+ /*
+ * Definitely no indexes, so leave. Any checks based on
+ * REINDEXOPT_MISSING_OK should be done only while the list of indexes to
+ * work on is built as the session locks taken before this transaction
+ * commits will make sure that they cannot be dropped by a concurrent
+ * session until this operation completes.
+ */
+ if (indexIds == NIL)
+ {
+ PopActiveSnapshot();
+ return false;
+ }
+
+ /* It's not a shared catalog, so refuse to move it to shared tablespace */
+ if (params->tablespaceOid == GLOBALTABLESPACE_OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot move non-shared relation to tablespace \"%s\"",
+ get_tablespace_name(params->tablespaceOid))));
+
+ Assert(heapRelationIds != NIL);
+
+ /*-----
+ * Now we have all the indexes we want to process in indexIds.
+ *
+ * The phases now are:
+ *
+ * 1. create new indexes in the catalog
+ * 2. build new indexes
+ * 3. let new indexes catch up with tuples inserted in the meantime
+ * 4. swap index names
+ * 5. mark old indexes as dead
+ * 6. drop old indexes
+ *
+ * We process each phase for all indexes before moving to the next phase,
+ * for efficiency.
+ */
+
+ /*
+ * Phase 1 of REINDEX CONCURRENTLY
+ *
+ * Create a new index with the same properties as the old one, but it is
+ * only registered in catalogs and will be built later. Then get session
+ * locks on all involved tables. See analogous code in DefineIndex() for
+ * more detailed comments.
+ */
+
+ foreach(lc, indexIds)
+ {
+ char *concurrentName;
+ ReindexIndexInfo *idx = lfirst(lc);
+ ReindexIndexInfo *newidx;
+ Oid newIndexId;
+ Relation indexRel;
+ Relation heapRel;
+ Oid save_userid;
+ int save_sec_context;
+ int save_nestlevel;
+ Relation newIndexRel;
+ LockRelId *lockrelid;
+ Oid tablespaceid;
+
+ indexRel = index_open(idx->indexId, ShareUpdateExclusiveLock);
+ heapRel = table_open(indexRel->rd_index->indrelid,
+ ShareUpdateExclusiveLock);
+
+ /*
+ * Switch to the table owner's userid, so that any index functions are
+ * run as that user. Also lock down security-restricted operations
+ * and arrange to make GUC variable changes local to this command.
+ */
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+ SetUserIdAndSecContext(heapRel->rd_rel->relowner,
+ save_sec_context | SECURITY_RESTRICTED_OPERATION);
+ save_nestlevel = NewGUCNestLevel();
+
+ /* determine safety of this index for set_indexsafe_procflags */
+ idx->safe = (indexRel->rd_indexprs == NIL &&
+ indexRel->rd_indpred == NIL);
+ idx->tableId = RelationGetRelid(heapRel);
+ idx->amId = indexRel->rd_rel->relam;
+
+ /* This function shouldn't be called for temporary relations. */
+ if (indexRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+ elog(ERROR, "cannot reindex a temporary table concurrently");
+
+ pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+ idx->tableId);
+
+ progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
+ progress_vals[1] = 0; /* initializing */
+ progress_vals[2] = idx->indexId;
+ progress_vals[3] = idx->amId;
+ pgstat_progress_update_multi_param(4, progress_index, progress_vals);
+
+ /* Choose a temporary relation name for the new index */
+ concurrentName = ChooseRelationName(get_rel_name(idx->indexId),
+ NULL,
+ "ccnew",
+ get_rel_namespace(indexRel->rd_index->indrelid),
+ false);
+
+ /* Choose the new tablespace, indexes of toast tables are not moved */
+ if (OidIsValid(params->tablespaceOid) &&
+ heapRel->rd_rel->relkind != RELKIND_TOASTVALUE)
+ tablespaceid = params->tablespaceOid;
+ else
+ tablespaceid = indexRel->rd_rel->reltablespace;
+
+ /* Create new index definition based on given index */
+ newIndexId = index_concurrently_create_copy(heapRel,
+ idx->indexId,
+ tablespaceid,
+ concurrentName);
+
+ /*
+ * Now open the relation of the new index, a session-level lock is
+ * also needed on it.
+ */
+ newIndexRel = index_open(newIndexId, ShareUpdateExclusiveLock);
+
+ /*
+ * Save the list of OIDs and locks in private context
+ */
+ oldcontext = MemoryContextSwitchTo(private_context);
+
+ newidx = palloc(sizeof(ReindexIndexInfo));
+ newidx->indexId = newIndexId;
+ newidx->safe = idx->safe;
+ newidx->tableId = idx->tableId;
+ newidx->amId = idx->amId;
+
+ newIndexIds = lappend(newIndexIds, newidx);
+
+ /*
+ * Save lockrelid to protect each relation from drop then close
+ * relations. The lockrelid on parent relation is not taken here to
+ * avoid multiple locks taken on the same relation, instead we rely on
+ * parentRelationIds built earlier.
+ */
+ lockrelid = palloc(sizeof(*lockrelid));
+ *lockrelid = indexRel->rd_lockInfo.lockRelId;
+ relationLocks = lappend(relationLocks, lockrelid);
+ lockrelid = palloc(sizeof(*lockrelid));
+ *lockrelid = newIndexRel->rd_lockInfo.lockRelId;
+ relationLocks = lappend(relationLocks, lockrelid);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ index_close(indexRel, NoLock);
+ index_close(newIndexRel, NoLock);
+
+ /* Roll back any GUC changes executed by index functions */
+ AtEOXact_GUC(false, save_nestlevel);
+
+ /* Restore userid and security context */
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+
+ table_close(heapRel, NoLock);
+ }
+
+ /*
+ * Save the heap lock for following visibility checks with other backends
+ * might conflict with this session.
+ */
+ foreach(lc, heapRelationIds)
+ {
+ Relation heapRelation = table_open(lfirst_oid(lc), ShareUpdateExclusiveLock);
+ LockRelId *lockrelid;
+ LOCKTAG *heaplocktag;
+
+ /* Save the list of locks in private context */
+ oldcontext = MemoryContextSwitchTo(private_context);
+
+ /* Add lockrelid of heap relation to the list of locked relations */
+ lockrelid = palloc(sizeof(*lockrelid));
+ *lockrelid = heapRelation->rd_lockInfo.lockRelId;
+ relationLocks = lappend(relationLocks, lockrelid);
+
+ heaplocktag = (LOCKTAG *) palloc(sizeof(LOCKTAG));
+
+ /* Save the LOCKTAG for this parent relation for the wait phase */
+ SET_LOCKTAG_RELATION(*heaplocktag, lockrelid->dbId, lockrelid->relId);
+ lockTags = lappend(lockTags, heaplocktag);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ /* Close heap relation */
+ table_close(heapRelation, NoLock);
+ }
+
+ /* Get a session-level lock on each table. */
+ foreach(lc, relationLocks)
+ {
+ LockRelId *lockrelid = (LockRelId *) lfirst(lc);
+
+ LockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
+ }
+
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+ StartTransactionCommand();
+
+ /*
+ * Because we don't take a snapshot in this transaction, there's no need
+ * to set the PROC_IN_SAFE_IC flag here.
+ */
+
+ /*
+ * Phase 2 of REINDEX CONCURRENTLY
+ *
+ * Build the new indexes in a separate transaction for each index to avoid
+ * having open transactions for an unnecessary long time. But before
+ * doing that, wait until no running transactions could have the table of
+ * the index open with the old list of indexes. See "phase 2" in
+ * DefineIndex() for more details.
+ */
+
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
+ PROGRESS_CREATEIDX_PHASE_WAIT_1);
+ WaitForLockersMultiple(lockTags, ShareLock, true);
+ CommitTransactionCommand();
+
+ foreach(lc, newIndexIds)
+ {
+ ReindexIndexInfo *newidx = lfirst(lc);
+
+ /* Start new transaction for this index's concurrent build */
+ StartTransactionCommand();
+
+ /*
+ * Check for user-requested abort. This is inside a transaction so as
+ * xact.c does not issue a useless WARNING, and ensures that
+ * session-level locks are cleaned up on abort.
+ */
+ CHECK_FOR_INTERRUPTS();
+
+ /* Tell concurrent indexing to ignore us, if index qualifies */
+ if (newidx->safe)
+ set_indexsafe_procflags();
+
+ /* Set ActiveSnapshot since functions in the indexes may need it */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
+ /*
+ * Update progress for the index to build, with the correct parent
+ * table involved.
+ */
+ pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId);
+ progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
+ progress_vals[1] = PROGRESS_CREATEIDX_PHASE_BUILD;
+ progress_vals[2] = newidx->indexId;
+ progress_vals[3] = newidx->amId;
+ pgstat_progress_update_multi_param(4, progress_index, progress_vals);
+
+ /* Perform concurrent build of new index */
+ index_concurrently_build(newidx->tableId, newidx->indexId);
+
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+ }
+
+ StartTransactionCommand();
+
+ /*
+ * Because we don't take a snapshot or Xid in this transaction, there's no
+ * need to set the PROC_IN_SAFE_IC flag here.
+ */
+
+ /*
+ * Phase 3 of REINDEX CONCURRENTLY
+ *
+ * During this phase the old indexes catch up with any new tuples that
+ * were created during the previous phase. See "phase 3" in DefineIndex()
+ * for more details.
+ */
+
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
+ PROGRESS_CREATEIDX_PHASE_WAIT_2);
+ WaitForLockersMultiple(lockTags, ShareLock, true);
+ CommitTransactionCommand();
+
+ foreach(lc, newIndexIds)
+ {
+ ReindexIndexInfo *newidx = lfirst(lc);
+ TransactionId limitXmin;
+ Snapshot snapshot;
+
+ StartTransactionCommand();
+
+ /*
+ * Check for user-requested abort. This is inside a transaction so as
+ * xact.c does not issue a useless WARNING, and ensures that
+ * session-level locks are cleaned up on abort.
+ */
+ CHECK_FOR_INTERRUPTS();
+
+ /* Tell concurrent indexing to ignore us, if index qualifies */
+ if (newidx->safe)
+ set_indexsafe_procflags();
+
+ /*
+ * Take the "reference snapshot" that will be used by validate_index()
+ * to filter candidate tuples.
+ */
+ snapshot = RegisterSnapshot(GetTransactionSnapshot());
+ PushActiveSnapshot(snapshot);
+
+ /*
+ * Update progress for the index to build, with the correct parent
+ * table involved.
+ */
+ pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+ newidx->tableId);
+ progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
+ progress_vals[1] = PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN;
+ progress_vals[2] = newidx->indexId;
+ progress_vals[3] = newidx->amId;
+ pgstat_progress_update_multi_param(4, progress_index, progress_vals);
+
+ validate_index(newidx->tableId, newidx->indexId, snapshot);
+
+ /*
+ * We can now do away with our active snapshot, we still need to save
+ * the xmin limit to wait for older snapshots.
+ */
+ limitXmin = snapshot->xmin;
+
+ PopActiveSnapshot();
+ UnregisterSnapshot(snapshot);
+
+ /*
+ * To ensure no deadlocks, we must commit and start yet another
+ * transaction, and do our wait before any snapshot has been taken in
+ * it.
+ */
+ CommitTransactionCommand();
+ StartTransactionCommand();
+
+ /*
+ * The index is now valid in the sense that it contains all currently
+ * interesting tuples. But since it might not contain tuples deleted
+ * just before the reference snap was taken, we have to wait out any
+ * transactions that might have older snapshots.
+ *
+ * Because we don't take a snapshot or Xid in this transaction,
+ * there's no need to set the PROC_IN_SAFE_IC flag here.
+ */
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
+ PROGRESS_CREATEIDX_PHASE_WAIT_3);
+ WaitForOlderSnapshots(limitXmin, true);
+
+ CommitTransactionCommand();
+ }
+
+ /*
+ * Phase 4 of REINDEX CONCURRENTLY
+ *
+ * Now that the new indexes have been validated, swap each new index with
+ * its corresponding old index.
+ *
+ * We mark the new indexes as valid and the old indexes as not valid at
+ * the same time to make sure we only get constraint violations from the
+ * indexes with the correct names.
+ */
+
+ StartTransactionCommand();
+
+ /*
+ * Because this transaction only does catalog manipulations and doesn't do
+ * any index operations, we can set the PROC_IN_SAFE_IC flag here
+ * unconditionally.
+ */
+ set_indexsafe_procflags();
+
+ forboth(lc, indexIds, lc2, newIndexIds)
+ {
+ ReindexIndexInfo *oldidx = lfirst(lc);
+ ReindexIndexInfo *newidx = lfirst(lc2);
+ char *oldName;
+
+ /*
+ * Check for user-requested abort. This is inside a transaction so as
+ * xact.c does not issue a useless WARNING, and ensures that
+ * session-level locks are cleaned up on abort.
+ */
+ CHECK_FOR_INTERRUPTS();
+
+ /* Choose a relation name for old index */
+ oldName = ChooseRelationName(get_rel_name(oldidx->indexId),
+ NULL,
+ "ccold",
+ get_rel_namespace(oldidx->tableId),
+ false);
+
+ /*
+ * Swap old index with the new one. This also marks the new one as
+ * valid and the old one as not valid.
+ */
+ index_concurrently_swap(newidx->indexId, oldidx->indexId, oldName);
+
+ /*
+ * Invalidate the relcache for the table, so that after this commit
+ * all sessions will refresh any cached plans that might reference the
+ * index.
+ */
+ CacheInvalidateRelcacheByRelid(oldidx->tableId);
+
+ /*
+ * CCI here so that subsequent iterations see the oldName in the
+ * catalog and can choose a nonconflicting name for their oldName.
+ * Otherwise, this could lead to conflicts if a table has two indexes
+ * whose names are equal for the first NAMEDATALEN-minus-a-few
+ * characters.
+ */
+ CommandCounterIncrement();
+ }
+
+ /* Commit this transaction and make index swaps visible */
+ CommitTransactionCommand();
+ StartTransactionCommand();
+
+ /*
+ * While we could set PROC_IN_SAFE_IC if all indexes qualified, there's no
+ * real need for that, because we only acquire an Xid after the wait is
+ * done, and that lasts for a very short period.
+ */
+
+ /*
+ * Phase 5 of REINDEX CONCURRENTLY
+ *
+ * Mark the old indexes as dead. First we must wait until no running
+ * transaction could be using the index for a query. See also
+ * index_drop() for more details.
+ */
+
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
+ PROGRESS_CREATEIDX_PHASE_WAIT_4);
+ WaitForLockersMultiple(lockTags, AccessExclusiveLock, true);
+
+ foreach(lc, indexIds)
+ {
+ ReindexIndexInfo *oldidx = lfirst(lc);
+
+ /*
+ * Check for user-requested abort. This is inside a transaction so as
+ * xact.c does not issue a useless WARNING, and ensures that
+ * session-level locks are cleaned up on abort.
+ */
+ CHECK_FOR_INTERRUPTS();
+
+ index_concurrently_set_dead(oldidx->tableId, oldidx->indexId);
+ }
+
+ /* Commit this transaction to make the updates visible. */
+ CommitTransactionCommand();
+ StartTransactionCommand();
+
+ /*
+ * While we could set PROC_IN_SAFE_IC if all indexes qualified, there's no
+ * real need for that, because we only acquire an Xid after the wait is
+ * done, and that lasts for a very short period.
+ */
+
+ /*
+ * Phase 6 of REINDEX CONCURRENTLY
+ *
+ * Drop the old indexes.
+ */
+
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
+ PROGRESS_CREATEIDX_PHASE_WAIT_5);
+ WaitForLockersMultiple(lockTags, AccessExclusiveLock, true);
+
+ PushActiveSnapshot(GetTransactionSnapshot());
+
+ {
+ ObjectAddresses *objects = new_object_addresses();
+
+ foreach(lc, indexIds)
+ {
+ ReindexIndexInfo *idx = lfirst(lc);
+ ObjectAddress object;
+
+ object.classId = RelationRelationId;
+ object.objectId = idx->indexId;
+ object.objectSubId = 0;
+
+ add_exact_object_address(&object, objects);
+ }
+
+ /*
+ * Use PERFORM_DELETION_CONCURRENT_LOCK so that index_drop() uses the
+ * right lock level.
+ */
+ performMultipleDeletions(objects, DROP_RESTRICT,
+ PERFORM_DELETION_CONCURRENT_LOCK | PERFORM_DELETION_INTERNAL);
+ }
+
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+
+ /*
+ * Finally, release the session-level lock on the table.
+ */
+ foreach(lc, relationLocks)
+ {
+ LockRelId *lockrelid = (LockRelId *) lfirst(lc);
+
+ UnlockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
+ }
+
+ /* Start a new transaction to finish process properly */
+ StartTransactionCommand();
+
+ /* Log what we did */
+ if ((params->options & REINDEXOPT_VERBOSE) != 0)
+ {
+ if (relkind == RELKIND_INDEX)
+ ereport(INFO,
+ (errmsg("index \"%s.%s\" was reindexed",
+ relationNamespace, relationName),
+ errdetail("%s.",
+ pg_rusage_show(&ru0))));
+ else
+ {
+ foreach(lc, newIndexIds)
+ {
+ ReindexIndexInfo *idx = lfirst(lc);
+ Oid indOid = idx->indexId;
+
+ ereport(INFO,
+ (errmsg("index \"%s.%s\" was reindexed",
+ get_namespace_name(get_rel_namespace(indOid)),
+ get_rel_name(indOid))));
+ /* Don't show rusage here, since it's not per index. */
+ }
+
+ ereport(INFO,
+ (errmsg("table \"%s.%s\" was reindexed",
+ relationNamespace, relationName),
+ errdetail("%s.",
+ pg_rusage_show(&ru0))));
+ }
+ }
+
+ MemoryContextDelete(private_context);
+
+ pgstat_progress_end_command();
+
+ return true;
+}
+
+/*
+ * Insert or delete an appropriate pg_inherits tuple to make the given index
+ * be a partition of the indicated parent index.
+ *
+ * This also corrects the pg_depend information for the affected index.
+ */
+void
+IndexSetParentIndex(Relation partitionIdx, Oid parentOid)
+{
+ Relation pg_inherits;
+ ScanKeyData key[2];
+ SysScanDesc scan;
+ Oid partRelid = RelationGetRelid(partitionIdx);
+ HeapTuple tuple;
+ bool fix_dependencies;
+
+ /* Make sure this is an index */
+ Assert(partitionIdx->rd_rel->relkind == RELKIND_INDEX ||
+ partitionIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX);
+
+ /*
+ * Scan pg_inherits for rows linking our index to some parent.
+ */
+ pg_inherits = relation_open(InheritsRelationId, RowExclusiveLock);
+ ScanKeyInit(&key[0],
+ Anum_pg_inherits_inhrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(partRelid));
+ ScanKeyInit(&key[1],
+ Anum_pg_inherits_inhseqno,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(1));
+ scan = systable_beginscan(pg_inherits, InheritsRelidSeqnoIndexId, true,
+ NULL, 2, key);
+ tuple = systable_getnext(scan);
+
+ if (!HeapTupleIsValid(tuple))
+ {
+ if (parentOid == InvalidOid)
+ {
+ /*
+ * No pg_inherits row, and no parent wanted: nothing to do in this
+ * case.
+ */
+ fix_dependencies = false;
+ }
+ else
+ {
+ StoreSingleInheritance(partRelid, parentOid, 1);
+ fix_dependencies = true;
+ }
+ }
+ else
+ {
+ Form_pg_inherits inhForm = (Form_pg_inherits) GETSTRUCT(tuple);
+
+ if (parentOid == InvalidOid)
+ {
+ /*
+ * There exists a pg_inherits row, which we want to clear; do so.
+ */
+ CatalogTupleDelete(pg_inherits, &tuple->t_self);
+ fix_dependencies = true;
+ }
+ else
+ {
+ /*
+ * A pg_inherits row exists. If it's the same we want, then we're
+ * good; if it differs, that amounts to a corrupt catalog and
+ * should not happen.
+ */
+ if (inhForm->inhparent != parentOid)
+ {
+ /* unexpected: we should not get called in this case */
+ elog(ERROR, "bogus pg_inherit row: inhrelid %u inhparent %u",
+ inhForm->inhrelid, inhForm->inhparent);
+ }
+
+ /* already in the right state */
+ fix_dependencies = false;
+ }
+ }
+
+ /* done with pg_inherits */
+ systable_endscan(scan);
+ relation_close(pg_inherits, RowExclusiveLock);
+
+ /* set relhassubclass if an index partition has been added to the parent */
+ if (OidIsValid(parentOid))
+ SetRelationHasSubclass(parentOid, true);
+
+ /* set relispartition correctly on the partition */
+ update_relispartition(partRelid, OidIsValid(parentOid));
+
+ if (fix_dependencies)
+ {
+ /*
+ * Insert/delete pg_depend rows. If setting a parent, add PARTITION
+ * dependencies on the parent index and the table; if removing a
+ * parent, delete PARTITION dependencies.
+ */
+ if (OidIsValid(parentOid))
+ {
+ ObjectAddress partIdx;
+ ObjectAddress parentIdx;
+ ObjectAddress partitionTbl;
+
+ ObjectAddressSet(partIdx, RelationRelationId, partRelid);
+ ObjectAddressSet(parentIdx, RelationRelationId, parentOid);
+ ObjectAddressSet(partitionTbl, RelationRelationId,
+ partitionIdx->rd_index->indrelid);
+ recordDependencyOn(&partIdx, &parentIdx,
+ DEPENDENCY_PARTITION_PRI);
+ recordDependencyOn(&partIdx, &partitionTbl,
+ DEPENDENCY_PARTITION_SEC);
+ }
+ else
+ {
+ deleteDependencyRecordsForClass(RelationRelationId, partRelid,
+ RelationRelationId,
+ DEPENDENCY_PARTITION_PRI);
+ deleteDependencyRecordsForClass(RelationRelationId, partRelid,
+ RelationRelationId,
+ DEPENDENCY_PARTITION_SEC);
+ }
+
+ /* make our updates visible */
+ CommandCounterIncrement();
+ }
+}
+
+/*
+ * Subroutine of IndexSetParentIndex to update the relispartition flag of the
+ * given index to the given value.
+ */
+static void
+update_relispartition(Oid relationId, bool newval)
+{
+ HeapTuple tup;
+ Relation classRel;
+
+ classRel = table_open(RelationRelationId, RowExclusiveLock);
+ tup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relationId));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for relation %u", relationId);
+ Assert(((Form_pg_class) GETSTRUCT(tup))->relispartition != newval);
+ ((Form_pg_class) GETSTRUCT(tup))->relispartition = newval;
+ CatalogTupleUpdate(classRel, &tup->t_self, tup);
+ heap_freetuple(tup);
+ table_close(classRel, RowExclusiveLock);
+}
+
+/*
+ * Set the PROC_IN_SAFE_IC flag in MyProc->statusFlags.
+ *
+ * When doing concurrent index builds, we can set this flag
+ * to tell other processes concurrently running CREATE
+ * INDEX CONCURRENTLY or REINDEX CONCURRENTLY to ignore us when
+ * doing their waits for concurrent snapshots. On one hand it
+ * avoids pointlessly waiting for a process that's not interesting
+ * anyway; but more importantly it avoids deadlocks in some cases.
+ *
+ * This can be done safely only for indexes that don't execute any
+ * expressions that could access other tables, so index must not be
+ * expressional nor partial. Caller is responsible for only calling
+ * this routine when that assumption holds true.
+ *
+ * (The flag is reset automatically at transaction end, so it must be
+ * set for each transaction.)
+ */
+static inline void
+set_indexsafe_procflags(void)
+{
+ /*
+ * This should only be called before installing xid or xmin in MyProc;
+ * otherwise, concurrent processes could see an Xmin that moves backwards.
+ */
+ Assert(MyProc->xid == InvalidTransactionId &&
+ MyProc->xmin == InvalidTransactionId);
+
+ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ MyProc->statusFlags |= PROC_IN_SAFE_IC;
+ ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
+ LWLockRelease(ProcArrayLock);
+}
diff --git a/src/backend/commands/lockcmds.c b/src/backend/commands/lockcmds.c
new file mode 100644
index 0000000..b97b8b0
--- /dev/null
+++ b/src/backend/commands/lockcmds.c
@@ -0,0 +1,306 @@
+/*-------------------------------------------------------------------------
+ *
+ * lockcmds.c
+ * LOCK command support code
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/lockcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_inherits.h"
+#include "commands/lockcmds.h"
+#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/parse_clause.h"
+#include "rewrite/rewriteHandler.h"
+#include "storage/lmgr.h"
+#include "utils/acl.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+static void LockTableRecurse(Oid reloid, LOCKMODE lockmode, bool nowait);
+static AclResult LockTableAclCheck(Oid relid, LOCKMODE lockmode, Oid userid);
+static void RangeVarCallbackForLockTable(const RangeVar *rv, Oid relid,
+ Oid oldrelid, void *arg);
+static void LockViewRecurse(Oid reloid, LOCKMODE lockmode, bool nowait,
+ List *ancestor_views);
+
+/*
+ * LOCK TABLE
+ */
+void
+LockTableCommand(LockStmt *lockstmt)
+{
+ ListCell *p;
+
+ /*
+ * Iterate over the list and process the named relations one at a time
+ */
+ foreach(p, lockstmt->relations)
+ {
+ RangeVar *rv = (RangeVar *) lfirst(p);
+ bool recurse = rv->inh;
+ Oid reloid;
+
+ reloid = RangeVarGetRelidExtended(rv, lockstmt->mode,
+ lockstmt->nowait ? RVR_NOWAIT : 0,
+ RangeVarCallbackForLockTable,
+ (void *) &lockstmt->mode);
+
+ if (get_rel_relkind(reloid) == RELKIND_VIEW)
+ LockViewRecurse(reloid, lockstmt->mode, lockstmt->nowait, NIL);
+ else if (recurse)
+ LockTableRecurse(reloid, lockstmt->mode, lockstmt->nowait);
+ }
+}
+
+/*
+ * Before acquiring a table lock on the named table, check whether we have
+ * permission to do so.
+ */
+static void
+RangeVarCallbackForLockTable(const RangeVar *rv, Oid relid, Oid oldrelid,
+ void *arg)
+{
+ LOCKMODE lockmode = *(LOCKMODE *) arg;
+ char relkind;
+ char relpersistence;
+ AclResult aclresult;
+
+ if (!OidIsValid(relid))
+ return; /* doesn't exist, so no permissions check */
+ relkind = get_rel_relkind(relid);
+ if (!relkind)
+ return; /* woops, concurrently dropped; no permissions
+ * check */
+
+ /* Currently, we only allow plain tables or views to be locked */
+ if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE &&
+ relkind != RELKIND_VIEW)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot lock relation \"%s\"",
+ rv->relname),
+ errdetail_relkind_not_supported(relkind)));
+
+ /*
+ * Make note if a temporary relation has been accessed in this
+ * transaction.
+ */
+ relpersistence = get_rel_persistence(relid);
+ if (relpersistence == RELPERSISTENCE_TEMP)
+ MyXactFlags |= XACT_FLAGS_ACCESSEDTEMPNAMESPACE;
+
+ /* Check permissions. */
+ aclresult = LockTableAclCheck(relid, lockmode, GetUserId());
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, get_relkind_objtype(get_rel_relkind(relid)), rv->relname);
+}
+
+/*
+ * Apply LOCK TABLE recursively over an inheritance tree
+ *
+ * This doesn't check permission to perform LOCK TABLE on the child tables,
+ * because getting here means that the user has permission to lock the
+ * parent which is enough.
+ */
+static void
+LockTableRecurse(Oid reloid, LOCKMODE lockmode, bool nowait)
+{
+ List *children;
+ ListCell *lc;
+
+ children = find_all_inheritors(reloid, NoLock, NULL);
+
+ foreach(lc, children)
+ {
+ Oid childreloid = lfirst_oid(lc);
+
+ /* Parent already locked. */
+ if (childreloid == reloid)
+ continue;
+
+ if (!nowait)
+ LockRelationOid(childreloid, lockmode);
+ else if (!ConditionalLockRelationOid(childreloid, lockmode))
+ {
+ /* try to throw error by name; relation could be deleted... */
+ char *relname = get_rel_name(childreloid);
+
+ if (!relname)
+ continue; /* child concurrently dropped, just skip it */
+ ereport(ERROR,
+ (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+ errmsg("could not obtain lock on relation \"%s\"",
+ relname)));
+ }
+
+ /*
+ * Even if we got the lock, child might have been concurrently
+ * dropped. If so, we can skip it.
+ */
+ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(childreloid)))
+ {
+ /* Release useless lock */
+ UnlockRelationOid(childreloid, lockmode);
+ continue;
+ }
+ }
+}
+
+/*
+ * Apply LOCK TABLE recursively over a view
+ *
+ * All tables and views appearing in the view definition query are locked
+ * recursively with the same lock mode.
+ */
+
+typedef struct
+{
+ LOCKMODE lockmode; /* lock mode to use */
+ bool nowait; /* no wait mode */
+ Oid check_as_user; /* user for checking the privilege */
+ Oid viewoid; /* OID of the view to be locked */
+ List *ancestor_views; /* OIDs of ancestor views */
+} LockViewRecurse_context;
+
+static bool
+LockViewRecurse_walker(Node *node, LockViewRecurse_context *context)
+{
+ if (node == NULL)
+ return false;
+
+ if (IsA(node, Query))
+ {
+ Query *query = (Query *) node;
+ ListCell *rtable;
+
+ foreach(rtable, query->rtable)
+ {
+ RangeTblEntry *rte = lfirst(rtable);
+ AclResult aclresult;
+
+ Oid relid = rte->relid;
+ char relkind = rte->relkind;
+ char *relname = get_rel_name(relid);
+
+ /*
+ * The OLD and NEW placeholder entries in the view's rtable are
+ * skipped.
+ */
+ if (relid == context->viewoid &&
+ (strcmp(rte->eref->aliasname, "old") == 0 ||
+ strcmp(rte->eref->aliasname, "new") == 0))
+ continue;
+
+ /* Currently, we only allow plain tables or views to be locked. */
+ if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE &&
+ relkind != RELKIND_VIEW)
+ continue;
+
+ /*
+ * We might be dealing with a self-referential view. If so, we
+ * can just stop recursing, since we already locked it.
+ */
+ if (list_member_oid(context->ancestor_views, relid))
+ continue;
+
+ /*
+ * Check permissions as the specified user. This will either be
+ * the view owner or the current user.
+ */
+ aclresult = LockTableAclCheck(relid, context->lockmode,
+ context->check_as_user);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, get_relkind_objtype(relkind), relname);
+
+ /* We have enough rights to lock the relation; do so. */
+ if (!context->nowait)
+ LockRelationOid(relid, context->lockmode);
+ else if (!ConditionalLockRelationOid(relid, context->lockmode))
+ ereport(ERROR,
+ (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+ errmsg("could not obtain lock on relation \"%s\"",
+ relname)));
+
+ if (relkind == RELKIND_VIEW)
+ LockViewRecurse(relid, context->lockmode, context->nowait,
+ context->ancestor_views);
+ else if (rte->inh)
+ LockTableRecurse(relid, context->lockmode, context->nowait);
+ }
+
+ return query_tree_walker(query,
+ LockViewRecurse_walker,
+ context,
+ QTW_IGNORE_JOINALIASES);
+ }
+
+ return expression_tree_walker(node,
+ LockViewRecurse_walker,
+ context);
+}
+
+static void
+LockViewRecurse(Oid reloid, LOCKMODE lockmode, bool nowait,
+ List *ancestor_views)
+{
+ LockViewRecurse_context context;
+ Relation view;
+ Query *viewquery;
+
+ /* caller has already locked the view */
+ view = table_open(reloid, NoLock);
+ viewquery = get_view_query(view);
+
+ /*
+ * If the view has the security_invoker property set, check permissions as
+ * the current user. Otherwise, check permissions as the view owner.
+ */
+ context.lockmode = lockmode;
+ context.nowait = nowait;
+ if (RelationHasSecurityInvoker(view))
+ context.check_as_user = GetUserId();
+ else
+ context.check_as_user = view->rd_rel->relowner;
+ context.viewoid = reloid;
+ context.ancestor_views = lappend_oid(ancestor_views, reloid);
+
+ LockViewRecurse_walker((Node *) viewquery, &context);
+
+ context.ancestor_views = list_delete_last(context.ancestor_views);
+
+ table_close(view, NoLock);
+}
+
+/*
+ * Check whether the current user is permitted to lock this relation.
+ */
+static AclResult
+LockTableAclCheck(Oid reloid, LOCKMODE lockmode, Oid userid)
+{
+ AclResult aclresult;
+ AclMode aclmask;
+
+ /* Verify adequate privilege */
+ if (lockmode == AccessShareLock)
+ aclmask = ACL_SELECT;
+ else if (lockmode == RowExclusiveLock)
+ aclmask = ACL_INSERT | ACL_UPDATE | ACL_DELETE | ACL_TRUNCATE;
+ else
+ aclmask = ACL_UPDATE | ACL_DELETE | ACL_TRUNCATE;
+
+ aclresult = pg_class_aclcheck(reloid, userid, aclmask);
+
+ return aclresult;
+}
diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c
new file mode 100644
index 0000000..d1ee106
--- /dev/null
+++ b/src/backend/commands/matview.c
@@ -0,0 +1,936 @@
+/*-------------------------------------------------------------------------
+ *
+ * matview.c
+ * materialized view support
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/matview.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/multixact.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "catalog/catalog.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_operator.h"
+#include "commands/cluster.h"
+#include "commands/matview.h"
+#include "commands/tablecmds.h"
+#include "commands/tablespace.h"
+#include "executor/executor.h"
+#include "executor/spi.h"
+#include "miscadmin.h"
+#include "parser/parse_relation.h"
+#include "pgstat.h"
+#include "rewrite/rewriteHandler.h"
+#include "storage/lmgr.h"
+#include "storage/smgr.h"
+#include "tcop/tcopprot.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+
+
+typedef struct
+{
+ DestReceiver pub; /* publicly-known function pointers */
+ Oid transientoid; /* OID of new heap into which to store */
+ /* These fields are filled by transientrel_startup: */
+ Relation transientrel; /* relation to write to */
+ CommandId output_cid; /* cmin to insert in output tuples */
+ int ti_options; /* table_tuple_insert performance options */
+ BulkInsertState bistate; /* bulk insert state */
+} DR_transientrel;
+
+static int matview_maintenance_depth = 0;
+
+static void transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
+static bool transientrel_receive(TupleTableSlot *slot, DestReceiver *self);
+static void transientrel_shutdown(DestReceiver *self);
+static void transientrel_destroy(DestReceiver *self);
+static uint64 refresh_matview_datafill(DestReceiver *dest, Query *query,
+ const char *queryString);
+static char *make_temptable_name_n(char *tempname, int n);
+static void refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
+ int save_sec_context);
+static void refresh_by_heap_swap(Oid matviewOid, Oid OIDNewHeap, char relpersistence);
+static bool is_usable_unique_index(Relation indexRel);
+static void OpenMatViewIncrementalMaintenance(void);
+static void CloseMatViewIncrementalMaintenance(void);
+
+/*
+ * SetMatViewPopulatedState
+ * Mark a materialized view as populated, or not.
+ *
+ * NOTE: caller must be holding an appropriate lock on the relation.
+ */
+void
+SetMatViewPopulatedState(Relation relation, bool newstate)
+{
+ Relation pgrel;
+ HeapTuple tuple;
+
+ Assert(relation->rd_rel->relkind == RELKIND_MATVIEW);
+
+ /*
+ * Update relation's pg_class entry. Crucial side-effect: other backends
+ * (and this one too!) are sent SI message to make them rebuild relcache
+ * entries.
+ */
+ pgrel = table_open(RelationRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheCopy1(RELOID,
+ ObjectIdGetDatum(RelationGetRelid(relation)));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u",
+ RelationGetRelid(relation));
+
+ ((Form_pg_class) GETSTRUCT(tuple))->relispopulated = newstate;
+
+ CatalogTupleUpdate(pgrel, &tuple->t_self, tuple);
+
+ heap_freetuple(tuple);
+ table_close(pgrel, RowExclusiveLock);
+
+ /*
+ * Advance command counter to make the updated pg_class row locally
+ * visible.
+ */
+ CommandCounterIncrement();
+}
+
+/*
+ * ExecRefreshMatView -- execute a REFRESH MATERIALIZED VIEW command
+ *
+ * This refreshes the materialized view by creating a new table and swapping
+ * the relfilenodes of the new table and the old materialized view, so the OID
+ * of the original materialized view is preserved. Thus we do not lose GRANT
+ * nor references to this materialized view.
+ *
+ * If WITH NO DATA was specified, this is effectively like a TRUNCATE;
+ * otherwise it is like a TRUNCATE followed by an INSERT using the SELECT
+ * statement associated with the materialized view. The statement node's
+ * skipData field shows whether the clause was used.
+ *
+ * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
+ * the new heap, it's better to create the indexes afterwards than to fill them
+ * incrementally while we load.
+ *
+ * The matview's "populated" state is changed based on whether the contents
+ * reflect the result set of the materialized view's query.
+ */
+ObjectAddress
+ExecRefreshMatView(RefreshMatViewStmt *stmt, const char *queryString,
+ ParamListInfo params, QueryCompletion *qc)
+{
+ Oid matviewOid;
+ Relation matviewRel;
+ RewriteRule *rule;
+ List *actions;
+ Query *dataQuery;
+ Oid tableSpace;
+ Oid relowner;
+ Oid OIDNewHeap;
+ DestReceiver *dest;
+ uint64 processed = 0;
+ bool concurrent;
+ LOCKMODE lockmode;
+ char relpersistence;
+ Oid save_userid;
+ int save_sec_context;
+ int save_nestlevel;
+ ObjectAddress address;
+
+ /* Determine strength of lock needed. */
+ concurrent = stmt->concurrent;
+ lockmode = concurrent ? ExclusiveLock : AccessExclusiveLock;
+
+ /*
+ * Get a lock until end of transaction.
+ */
+ matviewOid = RangeVarGetRelidExtended(stmt->relation,
+ lockmode, 0,
+ RangeVarCallbackOwnsTable, NULL);
+ matviewRel = table_open(matviewOid, NoLock);
+ relowner = matviewRel->rd_rel->relowner;
+
+ /*
+ * Switch to the owner's userid, so that any functions are run as that
+ * user. Also lock down security-restricted operations and arrange to
+ * make GUC variable changes local to this command.
+ */
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+ SetUserIdAndSecContext(relowner,
+ save_sec_context | SECURITY_RESTRICTED_OPERATION);
+ save_nestlevel = NewGUCNestLevel();
+
+ /* Make sure it is a materialized view. */
+ if (matviewRel->rd_rel->relkind != RELKIND_MATVIEW)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("\"%s\" is not a materialized view",
+ RelationGetRelationName(matviewRel))));
+
+ /* Check that CONCURRENTLY is not specified if not populated. */
+ if (concurrent && !RelationIsPopulated(matviewRel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("CONCURRENTLY cannot be used when the materialized view is not populated")));
+
+ /* Check that conflicting options have not been specified. */
+ if (concurrent && stmt->skipData)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s and %s options cannot be used together",
+ "CONCURRENTLY", "WITH NO DATA")));
+
+ /*
+ * Check that everything is correct for a refresh. Problems at this point
+ * are internal errors, so elog is sufficient.
+ */
+ if (matviewRel->rd_rel->relhasrules == false ||
+ matviewRel->rd_rules->numLocks < 1)
+ elog(ERROR,
+ "materialized view \"%s\" is missing rewrite information",
+ RelationGetRelationName(matviewRel));
+
+ if (matviewRel->rd_rules->numLocks > 1)
+ elog(ERROR,
+ "materialized view \"%s\" has too many rules",
+ RelationGetRelationName(matviewRel));
+
+ rule = matviewRel->rd_rules->rules[0];
+ if (rule->event != CMD_SELECT || !(rule->isInstead))
+ elog(ERROR,
+ "the rule for materialized view \"%s\" is not a SELECT INSTEAD OF rule",
+ RelationGetRelationName(matviewRel));
+
+ actions = rule->actions;
+ if (list_length(actions) != 1)
+ elog(ERROR,
+ "the rule for materialized view \"%s\" is not a single action",
+ RelationGetRelationName(matviewRel));
+
+ /*
+ * Check that there is a unique index with no WHERE clause on one or more
+ * columns of the materialized view if CONCURRENTLY is specified.
+ */
+ if (concurrent)
+ {
+ List *indexoidlist = RelationGetIndexList(matviewRel);
+ ListCell *indexoidscan;
+ bool hasUniqueIndex = false;
+
+ foreach(indexoidscan, indexoidlist)
+ {
+ Oid indexoid = lfirst_oid(indexoidscan);
+ Relation indexRel;
+
+ indexRel = index_open(indexoid, AccessShareLock);
+ hasUniqueIndex = is_usable_unique_index(indexRel);
+ index_close(indexRel, AccessShareLock);
+ if (hasUniqueIndex)
+ break;
+ }
+
+ list_free(indexoidlist);
+
+ if (!hasUniqueIndex)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot refresh materialized view \"%s\" concurrently",
+ quote_qualified_identifier(get_namespace_name(RelationGetNamespace(matviewRel)),
+ RelationGetRelationName(matviewRel))),
+ errhint("Create a unique index with no WHERE clause on one or more columns of the materialized view.")));
+ }
+
+ /*
+ * The stored query was rewritten at the time of the MV definition, but
+ * has not been scribbled on by the planner.
+ */
+ dataQuery = linitial_node(Query, actions);
+
+ /*
+ * Check for active uses of the relation in the current transaction, such
+ * as open scans.
+ *
+ * NB: We count on this to protect us against problems with refreshing the
+ * data using TABLE_INSERT_FROZEN.
+ */
+ CheckTableNotInUse(matviewRel, "REFRESH MATERIALIZED VIEW");
+
+ /*
+ * Tentatively mark the matview as populated or not (this will roll back
+ * if we fail later).
+ */
+ SetMatViewPopulatedState(matviewRel, !stmt->skipData);
+
+ /* Concurrent refresh builds new data in temp tablespace, and does diff. */
+ if (concurrent)
+ {
+ tableSpace = GetDefaultTablespace(RELPERSISTENCE_TEMP, false);
+ relpersistence = RELPERSISTENCE_TEMP;
+ }
+ else
+ {
+ tableSpace = matviewRel->rd_rel->reltablespace;
+ relpersistence = matviewRel->rd_rel->relpersistence;
+ }
+
+ /*
+ * Create the transient table that will receive the regenerated data. Lock
+ * it against access by any other process until commit (by which time it
+ * will be gone).
+ */
+ OIDNewHeap = make_new_heap(matviewOid, tableSpace,
+ matviewRel->rd_rel->relam,
+ relpersistence, ExclusiveLock);
+ LockRelationOid(OIDNewHeap, AccessExclusiveLock);
+ dest = CreateTransientRelDestReceiver(OIDNewHeap);
+
+ /* Generate the data, if wanted. */
+ if (!stmt->skipData)
+ processed = refresh_matview_datafill(dest, dataQuery, queryString);
+
+ /* Make the matview match the newly generated data. */
+ if (concurrent)
+ {
+ int old_depth = matview_maintenance_depth;
+
+ PG_TRY();
+ {
+ refresh_by_match_merge(matviewOid, OIDNewHeap, relowner,
+ save_sec_context);
+ }
+ PG_CATCH();
+ {
+ matview_maintenance_depth = old_depth;
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+ Assert(matview_maintenance_depth == old_depth);
+ }
+ else
+ {
+ refresh_by_heap_swap(matviewOid, OIDNewHeap, relpersistence);
+
+ /*
+ * Inform cumulative stats system about our activity: basically, we
+ * truncated the matview and inserted some new data. (The concurrent
+ * code path above doesn't need to worry about this because the
+ * inserts and deletes it issues get counted by lower-level code.)
+ */
+ pgstat_count_truncate(matviewRel);
+ if (!stmt->skipData)
+ pgstat_count_heap_insert(matviewRel, processed);
+ }
+
+ table_close(matviewRel, NoLock);
+
+ /* Roll back any GUC changes */
+ AtEOXact_GUC(false, save_nestlevel);
+
+ /* Restore userid and security context */
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+
+ ObjectAddressSet(address, RelationRelationId, matviewOid);
+
+ /*
+ * Save the rowcount so that pg_stat_statements can track the total number
+ * of rows processed by REFRESH MATERIALIZED VIEW command. Note that we
+ * still don't display the rowcount in the command completion tag output,
+ * i.e., the display_rowcount flag of CMDTAG_REFRESH_MATERIALIZED_VIEW
+ * command tag is left false in cmdtaglist.h. Otherwise, the change of
+ * completion tag output might break applications using it.
+ */
+ if (qc)
+ SetQueryCompletion(qc, CMDTAG_REFRESH_MATERIALIZED_VIEW, processed);
+
+ return address;
+}
+
+/*
+ * refresh_matview_datafill
+ *
+ * Execute the given query, sending result rows to "dest" (which will
+ * insert them into the target matview).
+ *
+ * Returns number of rows inserted.
+ */
+static uint64
+refresh_matview_datafill(DestReceiver *dest, Query *query,
+ const char *queryString)
+{
+ List *rewritten;
+ PlannedStmt *plan;
+ QueryDesc *queryDesc;
+ Query *copied_query;
+ uint64 processed;
+
+ /* Lock and rewrite, using a copy to preserve the original query. */
+ copied_query = copyObject(query);
+ AcquireRewriteLocks(copied_query, true, false);
+ rewritten = QueryRewrite(copied_query);
+
+ /* SELECT should never rewrite to more or less than one SELECT query */
+ if (list_length(rewritten) != 1)
+ elog(ERROR, "unexpected rewrite result for REFRESH MATERIALIZED VIEW");
+ query = (Query *) linitial(rewritten);
+
+ /* Check for user-requested abort. */
+ CHECK_FOR_INTERRUPTS();
+
+ /* Plan the query which will generate data for the refresh. */
+ plan = pg_plan_query(query, queryString, CURSOR_OPT_PARALLEL_OK, NULL);
+
+ /*
+ * Use a snapshot with an updated command ID to ensure this query sees
+ * results of any previously executed queries. (This could only matter if
+ * the planner executed an allegedly-stable function that changed the
+ * database contents, but let's do it anyway to be safe.)
+ */
+ PushCopiedSnapshot(GetActiveSnapshot());
+ UpdateActiveSnapshotCommandId();
+
+ /* Create a QueryDesc, redirecting output to our tuple receiver */
+ queryDesc = CreateQueryDesc(plan, queryString,
+ GetActiveSnapshot(), InvalidSnapshot,
+ dest, NULL, NULL, 0);
+
+ /* call ExecutorStart to prepare the plan for execution */
+ ExecutorStart(queryDesc, 0);
+
+ /* run the plan */
+ ExecutorRun(queryDesc, ForwardScanDirection, 0L, true);
+
+ processed = queryDesc->estate->es_processed;
+
+ /* and clean up */
+ ExecutorFinish(queryDesc);
+ ExecutorEnd(queryDesc);
+
+ FreeQueryDesc(queryDesc);
+
+ PopActiveSnapshot();
+
+ return processed;
+}
+
+DestReceiver *
+CreateTransientRelDestReceiver(Oid transientoid)
+{
+ DR_transientrel *self = (DR_transientrel *) palloc0(sizeof(DR_transientrel));
+
+ self->pub.receiveSlot = transientrel_receive;
+ self->pub.rStartup = transientrel_startup;
+ self->pub.rShutdown = transientrel_shutdown;
+ self->pub.rDestroy = transientrel_destroy;
+ self->pub.mydest = DestTransientRel;
+ self->transientoid = transientoid;
+
+ return (DestReceiver *) self;
+}
+
+/*
+ * transientrel_startup --- executor startup
+ */
+static void
+transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
+{
+ DR_transientrel *myState = (DR_transientrel *) self;
+ Relation transientrel;
+
+ transientrel = table_open(myState->transientoid, NoLock);
+
+ /*
+ * Fill private fields of myState for use by later routines
+ */
+ myState->transientrel = transientrel;
+ myState->output_cid = GetCurrentCommandId(true);
+ myState->ti_options = TABLE_INSERT_SKIP_FSM | TABLE_INSERT_FROZEN;
+ myState->bistate = GetBulkInsertState();
+
+ /*
+ * Valid smgr_targblock implies something already wrote to the relation.
+ * This may be harmless, but this function hasn't planned for it.
+ */
+ Assert(RelationGetTargetBlock(transientrel) == InvalidBlockNumber);
+}
+
+/*
+ * transientrel_receive --- receive one tuple
+ */
+static bool
+transientrel_receive(TupleTableSlot *slot, DestReceiver *self)
+{
+ DR_transientrel *myState = (DR_transientrel *) self;
+
+ /*
+ * Note that the input slot might not be of the type of the target
+ * relation. That's supported by table_tuple_insert(), but slightly less
+ * efficient than inserting with the right slot - but the alternative
+ * would be to copy into a slot of the right type, which would not be
+ * cheap either. This also doesn't allow accessing per-AM data (say a
+ * tuple's xmin), but since we don't do that here...
+ */
+
+ table_tuple_insert(myState->transientrel,
+ slot,
+ myState->output_cid,
+ myState->ti_options,
+ myState->bistate);
+
+ /* We know this is a newly created relation, so there are no indexes */
+
+ return true;
+}
+
+/*
+ * transientrel_shutdown --- executor end
+ */
+static void
+transientrel_shutdown(DestReceiver *self)
+{
+ DR_transientrel *myState = (DR_transientrel *) self;
+
+ FreeBulkInsertState(myState->bistate);
+
+ table_finish_bulk_insert(myState->transientrel, myState->ti_options);
+
+ /* close transientrel, but keep lock until commit */
+ table_close(myState->transientrel, NoLock);
+ myState->transientrel = NULL;
+}
+
+/*
+ * transientrel_destroy --- release DestReceiver object
+ */
+static void
+transientrel_destroy(DestReceiver *self)
+{
+ pfree(self);
+}
+
+
+/*
+ * Given a qualified temporary table name, append an underscore followed by
+ * the given integer, to make a new table name based on the old one.
+ * The result is a palloc'd string.
+ *
+ * As coded, this would fail to make a valid SQL name if the given name were,
+ * say, "FOO"."BAR". Currently, the table name portion of the input will
+ * never be double-quoted because it's of the form "pg_temp_NNN", cf
+ * make_new_heap(). But we might have to work harder someday.
+ */
+static char *
+make_temptable_name_n(char *tempname, int n)
+{
+ StringInfoData namebuf;
+
+ initStringInfo(&namebuf);
+ appendStringInfoString(&namebuf, tempname);
+ appendStringInfo(&namebuf, "_%d", n);
+ return namebuf.data;
+}
+
+/*
+ * refresh_by_match_merge
+ *
+ * Refresh a materialized view with transactional semantics, while allowing
+ * concurrent reads.
+ *
+ * This is called after a new version of the data has been created in a
+ * temporary table. It performs a full outer join against the old version of
+ * the data, producing "diff" results. This join cannot work if there are any
+ * duplicated rows in either the old or new versions, in the sense that every
+ * column would compare as equal between the two rows. It does work correctly
+ * in the face of rows which have at least one NULL value, with all non-NULL
+ * columns equal. The behavior of NULLs on equality tests and on UNIQUE
+ * indexes turns out to be quite convenient here; the tests we need to make
+ * are consistent with default behavior. If there is at least one UNIQUE
+ * index on the materialized view, we have exactly the guarantee we need.
+ *
+ * The temporary table used to hold the diff results contains just the TID of
+ * the old record (if matched) and the ROW from the new table as a single
+ * column of complex record type (if matched).
+ *
+ * Once we have the diff table, we perform set-based DELETE and INSERT
+ * operations against the materialized view, and discard both temporary
+ * tables.
+ *
+ * Everything from the generation of the new data to applying the differences
+ * takes place under cover of an ExclusiveLock, since it seems as though we
+ * would want to prohibit not only concurrent REFRESH operations, but also
+ * incremental maintenance. It also doesn't seem reasonable or safe to allow
+ * SELECT FOR UPDATE or SELECT FOR SHARE on rows being updated or deleted by
+ * this command.
+ */
+static void
+refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
+ int save_sec_context)
+{
+ StringInfoData querybuf;
+ Relation matviewRel;
+ Relation tempRel;
+ char *matviewname;
+ char *tempname;
+ char *diffname;
+ TupleDesc tupdesc;
+ bool foundUniqueIndex;
+ List *indexoidlist;
+ ListCell *indexoidscan;
+ int16 relnatts;
+ Oid *opUsedForQual;
+
+ initStringInfo(&querybuf);
+ matviewRel = table_open(matviewOid, NoLock);
+ matviewname = quote_qualified_identifier(get_namespace_name(RelationGetNamespace(matviewRel)),
+ RelationGetRelationName(matviewRel));
+ tempRel = table_open(tempOid, NoLock);
+ tempname = quote_qualified_identifier(get_namespace_name(RelationGetNamespace(tempRel)),
+ RelationGetRelationName(tempRel));
+ diffname = make_temptable_name_n(tempname, 2);
+
+ relnatts = RelationGetNumberOfAttributes(matviewRel);
+
+ /* Open SPI context. */
+ if (SPI_connect() != SPI_OK_CONNECT)
+ elog(ERROR, "SPI_connect failed");
+
+ /* Analyze the temp table with the new contents. */
+ appendStringInfo(&querybuf, "ANALYZE %s", tempname);
+ if (SPI_exec(querybuf.data, 0) != SPI_OK_UTILITY)
+ elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+
+ /*
+ * We need to ensure that there are not duplicate rows without NULLs in
+ * the new data set before we can count on the "diff" results. Check for
+ * that in a way that allows showing the first duplicated row found. Even
+ * after we pass this test, a unique index on the materialized view may
+ * find a duplicate key problem.
+ *
+ * Note: here and below, we use "tablename.*::tablerowtype" as a hack to
+ * keep ".*" from being expanded into multiple columns in a SELECT list.
+ * Compare ruleutils.c's get_variable().
+ */
+ resetStringInfo(&querybuf);
+ appendStringInfo(&querybuf,
+ "SELECT newdata.*::%s FROM %s newdata "
+ "WHERE newdata.* IS NOT NULL AND EXISTS "
+ "(SELECT 1 FROM %s newdata2 WHERE newdata2.* IS NOT NULL "
+ "AND newdata2.* OPERATOR(pg_catalog.*=) newdata.* "
+ "AND newdata2.ctid OPERATOR(pg_catalog.<>) "
+ "newdata.ctid)",
+ tempname, tempname, tempname);
+ if (SPI_execute(querybuf.data, false, 1) != SPI_OK_SELECT)
+ elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+ if (SPI_processed > 0)
+ {
+ /*
+ * Note that this ereport() is returning data to the user. Generally,
+ * we would want to make sure that the user has been granted access to
+ * this data. However, REFRESH MAT VIEW is only able to be run by the
+ * owner of the mat view (or a superuser) and therefore there is no
+ * need to check for access to data in the mat view.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_CARDINALITY_VIOLATION),
+ errmsg("new data for materialized view \"%s\" contains duplicate rows without any null columns",
+ RelationGetRelationName(matviewRel)),
+ errdetail("Row: %s",
+ SPI_getvalue(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1))));
+ }
+
+ SetUserIdAndSecContext(relowner,
+ save_sec_context | SECURITY_LOCAL_USERID_CHANGE);
+
+ /* Start building the query for creating the diff table. */
+ resetStringInfo(&querybuf);
+ appendStringInfo(&querybuf,
+ "CREATE TEMP TABLE %s AS "
+ "SELECT mv.ctid AS tid, newdata.*::%s AS newdata "
+ "FROM %s mv FULL JOIN %s newdata ON (",
+ diffname, tempname, matviewname, tempname);
+
+ /*
+ * Get the list of index OIDs for the table from the relcache, and look up
+ * each one in the pg_index syscache. We will test for equality on all
+ * columns present in all unique indexes which only reference columns and
+ * include all rows.
+ */
+ tupdesc = matviewRel->rd_att;
+ opUsedForQual = (Oid *) palloc0(sizeof(Oid) * relnatts);
+ foundUniqueIndex = false;
+
+ indexoidlist = RelationGetIndexList(matviewRel);
+
+ foreach(indexoidscan, indexoidlist)
+ {
+ Oid indexoid = lfirst_oid(indexoidscan);
+ Relation indexRel;
+
+ indexRel = index_open(indexoid, RowExclusiveLock);
+ if (is_usable_unique_index(indexRel))
+ {
+ Form_pg_index indexStruct = indexRel->rd_index;
+ int indnkeyatts = indexStruct->indnkeyatts;
+ oidvector *indclass;
+ Datum indclassDatum;
+ bool isnull;
+ int i;
+
+ /* Must get indclass the hard way. */
+ indclassDatum = SysCacheGetAttr(INDEXRELID,
+ indexRel->rd_indextuple,
+ Anum_pg_index_indclass,
+ &isnull);
+ Assert(!isnull);
+ indclass = (oidvector *) DatumGetPointer(indclassDatum);
+
+ /* Add quals for all columns from this index. */
+ for (i = 0; i < indnkeyatts; i++)
+ {
+ int attnum = indexStruct->indkey.values[i];
+ Oid opclass = indclass->values[i];
+ Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1);
+ Oid attrtype = attr->atttypid;
+ HeapTuple cla_ht;
+ Form_pg_opclass cla_tup;
+ Oid opfamily;
+ Oid opcintype;
+ Oid op;
+ const char *leftop;
+ const char *rightop;
+
+ /*
+ * Identify the equality operator associated with this index
+ * column. First we need to look up the column's opclass.
+ */
+ cla_ht = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
+ if (!HeapTupleIsValid(cla_ht))
+ elog(ERROR, "cache lookup failed for opclass %u", opclass);
+ cla_tup = (Form_pg_opclass) GETSTRUCT(cla_ht);
+ Assert(cla_tup->opcmethod == BTREE_AM_OID);
+ opfamily = cla_tup->opcfamily;
+ opcintype = cla_tup->opcintype;
+ ReleaseSysCache(cla_ht);
+
+ op = get_opfamily_member(opfamily, opcintype, opcintype,
+ BTEqualStrategyNumber);
+ if (!OidIsValid(op))
+ elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+ BTEqualStrategyNumber, opcintype, opcintype, opfamily);
+
+ /*
+ * If we find the same column with the same equality semantics
+ * in more than one index, we only need to emit the equality
+ * clause once.
+ *
+ * Since we only remember the last equality operator, this
+ * code could be fooled into emitting duplicate clauses given
+ * multiple indexes with several different opclasses ... but
+ * that's so unlikely it doesn't seem worth spending extra
+ * code to avoid.
+ */
+ if (opUsedForQual[attnum - 1] == op)
+ continue;
+ opUsedForQual[attnum - 1] = op;
+
+ /*
+ * Actually add the qual, ANDed with any others.
+ */
+ if (foundUniqueIndex)
+ appendStringInfoString(&querybuf, " AND ");
+
+ leftop = quote_qualified_identifier("newdata",
+ NameStr(attr->attname));
+ rightop = quote_qualified_identifier("mv",
+ NameStr(attr->attname));
+
+ generate_operator_clause(&querybuf,
+ leftop, attrtype,
+ op,
+ rightop, attrtype);
+
+ foundUniqueIndex = true;
+ }
+ }
+
+ /* Keep the locks, since we're about to run DML which needs them. */
+ index_close(indexRel, NoLock);
+ }
+
+ list_free(indexoidlist);
+
+ /*
+ * There must be at least one usable unique index on the matview.
+ *
+ * ExecRefreshMatView() checks that after taking the exclusive lock on the
+ * matview. So at least one unique index is guaranteed to exist here
+ * because the lock is still being held; so an Assert seems sufficient.
+ */
+ Assert(foundUniqueIndex);
+
+ appendStringInfoString(&querybuf,
+ " AND newdata.* OPERATOR(pg_catalog.*=) mv.*) "
+ "WHERE newdata.* IS NULL OR mv.* IS NULL "
+ "ORDER BY tid");
+
+ /* Create the temporary "diff" table. */
+ if (SPI_exec(querybuf.data, 0) != SPI_OK_UTILITY)
+ elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+
+ SetUserIdAndSecContext(relowner,
+ save_sec_context | SECURITY_RESTRICTED_OPERATION);
+
+ /*
+ * We have no further use for data from the "full-data" temp table, but we
+ * must keep it around because its type is referenced from the diff table.
+ */
+
+ /* Analyze the diff table. */
+ resetStringInfo(&querybuf);
+ appendStringInfo(&querybuf, "ANALYZE %s", diffname);
+ if (SPI_exec(querybuf.data, 0) != SPI_OK_UTILITY)
+ elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+
+ OpenMatViewIncrementalMaintenance();
+
+ /* Deletes must come before inserts; do them first. */
+ resetStringInfo(&querybuf);
+ appendStringInfo(&querybuf,
+ "DELETE FROM %s mv WHERE ctid OPERATOR(pg_catalog.=) ANY "
+ "(SELECT diff.tid FROM %s diff "
+ "WHERE diff.tid IS NOT NULL "
+ "AND diff.newdata IS NULL)",
+ matviewname, diffname);
+ if (SPI_exec(querybuf.data, 0) != SPI_OK_DELETE)
+ elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+
+ /* Inserts go last. */
+ resetStringInfo(&querybuf);
+ appendStringInfo(&querybuf,
+ "INSERT INTO %s SELECT (diff.newdata).* "
+ "FROM %s diff WHERE tid IS NULL",
+ matviewname, diffname);
+ if (SPI_exec(querybuf.data, 0) != SPI_OK_INSERT)
+ elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+
+ /* We're done maintaining the materialized view. */
+ CloseMatViewIncrementalMaintenance();
+ table_close(tempRel, NoLock);
+ table_close(matviewRel, NoLock);
+
+ /* Clean up temp tables. */
+ resetStringInfo(&querybuf);
+ appendStringInfo(&querybuf, "DROP TABLE %s, %s", diffname, tempname);
+ if (SPI_exec(querybuf.data, 0) != SPI_OK_UTILITY)
+ elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+
+ /* Close SPI context. */
+ if (SPI_finish() != SPI_OK_FINISH)
+ elog(ERROR, "SPI_finish failed");
+}
+
+/*
+ * Swap the physical files of the target and transient tables, then rebuild
+ * the target's indexes and throw away the transient table. Security context
+ * swapping is handled by the called function, so it is not needed here.
+ */
+static void
+refresh_by_heap_swap(Oid matviewOid, Oid OIDNewHeap, char relpersistence)
+{
+ finish_heap_swap(matviewOid, OIDNewHeap, false, false, true, true,
+ RecentXmin, ReadNextMultiXactId(), relpersistence);
+}
+
+/*
+ * Check whether specified index is usable for match merge.
+ */
+static bool
+is_usable_unique_index(Relation indexRel)
+{
+ Form_pg_index indexStruct = indexRel->rd_index;
+
+ /*
+ * Must be unique, valid, immediate, non-partial, and be defined over
+ * plain user columns (not expressions). We also require it to be a
+ * btree. Even if we had any other unique index kinds, we'd not know how
+ * to identify the corresponding equality operator, nor could we be sure
+ * that the planner could implement the required FULL JOIN with non-btree
+ * operators.
+ */
+ if (indexStruct->indisunique &&
+ indexStruct->indimmediate &&
+ indexRel->rd_rel->relam == BTREE_AM_OID &&
+ indexStruct->indisvalid &&
+ RelationGetIndexPredicate(indexRel) == NIL &&
+ indexStruct->indnatts > 0)
+ {
+ /*
+ * The point of groveling through the index columns individually is to
+ * reject both index expressions and system columns. Currently,
+ * matviews couldn't have OID columns so there's no way to create an
+ * index on a system column; but maybe someday that wouldn't be true,
+ * so let's be safe.
+ */
+ int numatts = indexStruct->indnatts;
+ int i;
+
+ for (i = 0; i < numatts; i++)
+ {
+ int attnum = indexStruct->indkey.values[i];
+
+ if (attnum <= 0)
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+
+/*
+ * This should be used to test whether the backend is in a context where it is
+ * OK to allow DML statements to modify materialized views. We only want to
+ * allow that for internal code driven by the materialized view definition,
+ * not for arbitrary user-supplied code.
+ *
+ * While the function names reflect the fact that their main intended use is
+ * incremental maintenance of materialized views (in response to changes to
+ * the data in referenced relations), they are initially used to allow REFRESH
+ * without blocking concurrent reads.
+ */
+bool
+MatViewIncrementalMaintenanceIsEnabled(void)
+{
+ return matview_maintenance_depth > 0;
+}
+
+static void
+OpenMatViewIncrementalMaintenance(void)
+{
+ matview_maintenance_depth++;
+}
+
+static void
+CloseMatViewIncrementalMaintenance(void)
+{
+ matview_maintenance_depth--;
+ Assert(matview_maintenance_depth >= 0);
+}
diff --git a/src/backend/commands/opclasscmds.c b/src/backend/commands/opclasscmds.c
new file mode 100644
index 0000000..7a931ab
--- /dev/null
+++ b/src/backend/commands/opclasscmds.c
@@ -0,0 +1,1745 @@
+/*-------------------------------------------------------------------------
+ *
+ * opclasscmds.c
+ *
+ * Routines for opclass (and opfamily) manipulation commands
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/opclasscmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "access/genam.h"
+#include "access/hash.h"
+#include "access/htup_details.h"
+#include "access/nbtree.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_type.h"
+#include "commands/alter.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "parser/parse_oper.h"
+#include "parser/parse_type.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+static void AlterOpFamilyAdd(AlterOpFamilyStmt *stmt,
+ Oid amoid, Oid opfamilyoid,
+ int maxOpNumber, int maxProcNumber,
+ int opclassOptsProcNumber, List *items);
+static void AlterOpFamilyDrop(AlterOpFamilyStmt *stmt,
+ Oid amoid, Oid opfamilyoid,
+ int maxOpNumber, int maxProcNumber,
+ List *items);
+static void processTypesSpec(List *args, Oid *lefttype, Oid *righttype);
+static void assignOperTypes(OpFamilyMember *member, Oid amoid, Oid typeoid);
+static void assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid,
+ int opclassOptsProcNum);
+static void addFamilyMember(List **list, OpFamilyMember *member);
+static void storeOperators(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+ List *operators, bool isAdd);
+static void storeProcedures(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+ List *procedures, bool isAdd);
+static void dropOperators(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+ List *operators);
+static void dropProcedures(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+ List *procedures);
+
+/*
+ * OpFamilyCacheLookup
+ * Look up an existing opfamily by name.
+ *
+ * Returns a syscache tuple reference, or NULL if not found.
+ */
+static HeapTuple
+OpFamilyCacheLookup(Oid amID, List *opfamilyname, bool missing_ok)
+{
+ char *schemaname;
+ char *opfname;
+ HeapTuple htup;
+
+ /* deconstruct the name list */
+ DeconstructQualifiedName(opfamilyname, &schemaname, &opfname);
+
+ if (schemaname)
+ {
+ /* Look in specific schema only */
+ Oid namespaceId;
+
+ namespaceId = LookupExplicitNamespace(schemaname, missing_ok);
+ if (!OidIsValid(namespaceId))
+ htup = NULL;
+ else
+ htup = SearchSysCache3(OPFAMILYAMNAMENSP,
+ ObjectIdGetDatum(amID),
+ PointerGetDatum(opfname),
+ ObjectIdGetDatum(namespaceId));
+ }
+ else
+ {
+ /* Unqualified opfamily name, so search the search path */
+ Oid opfID = OpfamilynameGetOpfid(amID, opfname);
+
+ if (!OidIsValid(opfID))
+ htup = NULL;
+ else
+ htup = SearchSysCache1(OPFAMILYOID, ObjectIdGetDatum(opfID));
+ }
+
+ if (!HeapTupleIsValid(htup) && !missing_ok)
+ {
+ HeapTuple amtup;
+
+ amtup = SearchSysCache1(AMOID, ObjectIdGetDatum(amID));
+ if (!HeapTupleIsValid(amtup))
+ elog(ERROR, "cache lookup failed for access method %u", amID);
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("operator family \"%s\" does not exist for access method \"%s\"",
+ NameListToString(opfamilyname),
+ NameStr(((Form_pg_am) GETSTRUCT(amtup))->amname))));
+ }
+
+ return htup;
+}
+
+/*
+ * get_opfamily_oid
+ * find an opfamily OID by possibly qualified name
+ *
+ * If not found, returns InvalidOid if missing_ok, else throws error.
+ */
+Oid
+get_opfamily_oid(Oid amID, List *opfamilyname, bool missing_ok)
+{
+ HeapTuple htup;
+ Form_pg_opfamily opfamform;
+ Oid opfID;
+
+ htup = OpFamilyCacheLookup(amID, opfamilyname, missing_ok);
+ if (!HeapTupleIsValid(htup))
+ return InvalidOid;
+ opfamform = (Form_pg_opfamily) GETSTRUCT(htup);
+ opfID = opfamform->oid;
+ ReleaseSysCache(htup);
+
+ return opfID;
+}
+
+/*
+ * OpClassCacheLookup
+ * Look up an existing opclass by name.
+ *
+ * Returns a syscache tuple reference, or NULL if not found.
+ */
+static HeapTuple
+OpClassCacheLookup(Oid amID, List *opclassname, bool missing_ok)
+{
+ char *schemaname;
+ char *opcname;
+ HeapTuple htup;
+
+ /* deconstruct the name list */
+ DeconstructQualifiedName(opclassname, &schemaname, &opcname);
+
+ if (schemaname)
+ {
+ /* Look in specific schema only */
+ Oid namespaceId;
+
+ namespaceId = LookupExplicitNamespace(schemaname, missing_ok);
+ if (!OidIsValid(namespaceId))
+ htup = NULL;
+ else
+ htup = SearchSysCache3(CLAAMNAMENSP,
+ ObjectIdGetDatum(amID),
+ PointerGetDatum(opcname),
+ ObjectIdGetDatum(namespaceId));
+ }
+ else
+ {
+ /* Unqualified opclass name, so search the search path */
+ Oid opcID = OpclassnameGetOpcid(amID, opcname);
+
+ if (!OidIsValid(opcID))
+ htup = NULL;
+ else
+ htup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opcID));
+ }
+
+ if (!HeapTupleIsValid(htup) && !missing_ok)
+ {
+ HeapTuple amtup;
+
+ amtup = SearchSysCache1(AMOID, ObjectIdGetDatum(amID));
+ if (!HeapTupleIsValid(amtup))
+ elog(ERROR, "cache lookup failed for access method %u", amID);
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("operator class \"%s\" does not exist for access method \"%s\"",
+ NameListToString(opclassname),
+ NameStr(((Form_pg_am) GETSTRUCT(amtup))->amname))));
+ }
+
+ return htup;
+}
+
+/*
+ * get_opclass_oid
+ * find an opclass OID by possibly qualified name
+ *
+ * If not found, returns InvalidOid if missing_ok, else throws error.
+ */
+Oid
+get_opclass_oid(Oid amID, List *opclassname, bool missing_ok)
+{
+ HeapTuple htup;
+ Form_pg_opclass opcform;
+ Oid opcID;
+
+ htup = OpClassCacheLookup(amID, opclassname, missing_ok);
+ if (!HeapTupleIsValid(htup))
+ return InvalidOid;
+ opcform = (Form_pg_opclass) GETSTRUCT(htup);
+ opcID = opcform->oid;
+ ReleaseSysCache(htup);
+
+ return opcID;
+}
+
+/*
+ * CreateOpFamily
+ * Internal routine to make the catalog entry for a new operator family.
+ *
+ * Caller must have done permissions checks etc. already.
+ */
+static ObjectAddress
+CreateOpFamily(CreateOpFamilyStmt *stmt, const char *opfname,
+ Oid namespaceoid, Oid amoid)
+{
+ Oid opfamilyoid;
+ Relation rel;
+ HeapTuple tup;
+ Datum values[Natts_pg_opfamily];
+ bool nulls[Natts_pg_opfamily];
+ NameData opfName;
+ ObjectAddress myself,
+ referenced;
+
+ rel = table_open(OperatorFamilyRelationId, RowExclusiveLock);
+
+ /*
+ * Make sure there is no existing opfamily of this name (this is just to
+ * give a more friendly error message than "duplicate key").
+ */
+ if (SearchSysCacheExists3(OPFAMILYAMNAMENSP,
+ ObjectIdGetDatum(amoid),
+ CStringGetDatum(opfname),
+ ObjectIdGetDatum(namespaceoid)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("operator family \"%s\" for access method \"%s\" already exists",
+ opfname, stmt->amname)));
+
+ /*
+ * Okay, let's create the pg_opfamily entry.
+ */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ opfamilyoid = GetNewOidWithIndex(rel, OpfamilyOidIndexId,
+ Anum_pg_opfamily_oid);
+ values[Anum_pg_opfamily_oid - 1] = ObjectIdGetDatum(opfamilyoid);
+ values[Anum_pg_opfamily_opfmethod - 1] = ObjectIdGetDatum(amoid);
+ namestrcpy(&opfName, opfname);
+ values[Anum_pg_opfamily_opfname - 1] = NameGetDatum(&opfName);
+ values[Anum_pg_opfamily_opfnamespace - 1] = ObjectIdGetDatum(namespaceoid);
+ values[Anum_pg_opfamily_opfowner - 1] = ObjectIdGetDatum(GetUserId());
+
+ tup = heap_form_tuple(rel->rd_att, values, nulls);
+
+ CatalogTupleInsert(rel, tup);
+
+ heap_freetuple(tup);
+
+ /*
+ * Create dependencies for the opfamily proper.
+ */
+ myself.classId = OperatorFamilyRelationId;
+ myself.objectId = opfamilyoid;
+ myself.objectSubId = 0;
+
+ /* dependency on access method */
+ referenced.classId = AccessMethodRelationId;
+ referenced.objectId = amoid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
+
+ /* dependency on namespace */
+ referenced.classId = NamespaceRelationId;
+ referenced.objectId = namespaceoid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+ /* dependency on owner */
+ recordDependencyOnOwner(OperatorFamilyRelationId, opfamilyoid, GetUserId());
+
+ /* dependency on extension */
+ recordDependencyOnCurrentExtension(&myself, false);
+
+ /* Report the new operator family to possibly interested event triggers */
+ EventTriggerCollectSimpleCommand(myself, InvalidObjectAddress,
+ (Node *) stmt);
+
+ /* Post creation hook for new operator family */
+ InvokeObjectPostCreateHook(OperatorFamilyRelationId, opfamilyoid, 0);
+
+ table_close(rel, RowExclusiveLock);
+
+ return myself;
+}
+
+/*
+ * DefineOpClass
+ * Define a new index operator class.
+ */
+ObjectAddress
+DefineOpClass(CreateOpClassStmt *stmt)
+{
+ char *opcname; /* name of opclass we're creating */
+ Oid amoid, /* our AM's oid */
+ typeoid, /* indexable datatype oid */
+ storageoid, /* storage datatype oid, if any */
+ namespaceoid, /* namespace to create opclass in */
+ opfamilyoid, /* oid of containing opfamily */
+ opclassoid; /* oid of opclass we create */
+ int maxOpNumber, /* amstrategies value */
+ optsProcNumber, /* amoptsprocnum value */
+ maxProcNumber; /* amsupport value */
+ bool amstorage; /* amstorage flag */
+ List *operators; /* OpFamilyMember list for operators */
+ List *procedures; /* OpFamilyMember list for support procs */
+ ListCell *l;
+ Relation rel;
+ HeapTuple tup;
+ Form_pg_am amform;
+ IndexAmRoutine *amroutine;
+ Datum values[Natts_pg_opclass];
+ bool nulls[Natts_pg_opclass];
+ AclResult aclresult;
+ NameData opcName;
+ ObjectAddress myself,
+ referenced;
+
+ /* Convert list of names to a name and namespace */
+ namespaceoid = QualifiedNameGetCreationNamespace(stmt->opclassname,
+ &opcname);
+
+ /* Check we have creation rights in target namespace */
+ aclresult = pg_namespace_aclcheck(namespaceoid, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(namespaceoid));
+
+ /* Get necessary info about access method */
+ tup = SearchSysCache1(AMNAME, CStringGetDatum(stmt->amname));
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("access method \"%s\" does not exist",
+ stmt->amname)));
+
+ amform = (Form_pg_am) GETSTRUCT(tup);
+ amoid = amform->oid;
+ amroutine = GetIndexAmRoutineByAmId(amoid, false);
+ ReleaseSysCache(tup);
+
+ maxOpNumber = amroutine->amstrategies;
+ /* if amstrategies is zero, just enforce that op numbers fit in int16 */
+ if (maxOpNumber <= 0)
+ maxOpNumber = SHRT_MAX;
+ maxProcNumber = amroutine->amsupport;
+ optsProcNumber = amroutine->amoptsprocnum;
+ amstorage = amroutine->amstorage;
+
+ /* XXX Should we make any privilege check against the AM? */
+
+ /*
+ * The question of appropriate permissions for CREATE OPERATOR CLASS is
+ * interesting. Creating an opclass is tantamount to granting public
+ * execute access on the functions involved, since the index machinery
+ * generally does not check access permission before using the functions.
+ * A minimum expectation therefore is that the caller have execute
+ * privilege with grant option. Since we don't have a way to make the
+ * opclass go away if the grant option is revoked, we choose instead to
+ * require ownership of the functions. It's also not entirely clear what
+ * permissions should be required on the datatype, but ownership seems
+ * like a safe choice.
+ *
+ * Currently, we require superuser privileges to create an opclass. This
+ * seems necessary because we have no way to validate that the offered set
+ * of operators and functions are consistent with the AM's expectations.
+ * It would be nice to provide such a check someday, if it can be done
+ * without solving the halting problem :-(
+ *
+ * XXX re-enable NOT_USED code sections below if you remove this test.
+ */
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to create an operator class")));
+
+ /* Look up the datatype */
+ typeoid = typenameTypeId(NULL, stmt->datatype);
+
+#ifdef NOT_USED
+ /* XXX this is unnecessary given the superuser check above */
+ /* Check we have ownership of the datatype */
+ if (!pg_type_ownercheck(typeoid, GetUserId()))
+ aclcheck_error_type(ACLCHECK_NOT_OWNER, typeoid);
+#endif
+
+ /*
+ * Look up the containing operator family, or create one if FAMILY option
+ * was omitted and there's not a match already.
+ */
+ if (stmt->opfamilyname)
+ {
+ opfamilyoid = get_opfamily_oid(amoid, stmt->opfamilyname, false);
+ }
+ else
+ {
+ /* Lookup existing family of same name and namespace */
+ tup = SearchSysCache3(OPFAMILYAMNAMENSP,
+ ObjectIdGetDatum(amoid),
+ PointerGetDatum(opcname),
+ ObjectIdGetDatum(namespaceoid));
+ if (HeapTupleIsValid(tup))
+ {
+ opfamilyoid = ((Form_pg_opfamily) GETSTRUCT(tup))->oid;
+
+ /*
+ * XXX given the superuser check above, there's no need for an
+ * ownership check here
+ */
+ ReleaseSysCache(tup);
+ }
+ else
+ {
+ CreateOpFamilyStmt *opfstmt;
+ ObjectAddress tmpAddr;
+
+ opfstmt = makeNode(CreateOpFamilyStmt);
+ opfstmt->opfamilyname = stmt->opclassname;
+ opfstmt->amname = stmt->amname;
+
+ /*
+ * Create it ... again no need for more permissions ...
+ */
+ tmpAddr = CreateOpFamily(opfstmt, opcname, namespaceoid, amoid);
+ opfamilyoid = tmpAddr.objectId;
+ }
+ }
+
+ operators = NIL;
+ procedures = NIL;
+
+ /* Storage datatype is optional */
+ storageoid = InvalidOid;
+
+ /*
+ * Scan the "items" list to obtain additional info.
+ */
+ foreach(l, stmt->items)
+ {
+ CreateOpClassItem *item = lfirst_node(CreateOpClassItem, l);
+ Oid operOid;
+ Oid funcOid;
+ Oid sortfamilyOid;
+ OpFamilyMember *member;
+
+ switch (item->itemtype)
+ {
+ case OPCLASS_ITEM_OPERATOR:
+ if (item->number <= 0 || item->number > maxOpNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("invalid operator number %d,"
+ " must be between 1 and %d",
+ item->number, maxOpNumber)));
+ if (item->name->objargs != NIL)
+ operOid = LookupOperWithArgs(item->name, false);
+ else
+ {
+ /* Default to binary op on input datatype */
+ operOid = LookupOperName(NULL, item->name->objname,
+ typeoid, typeoid,
+ false, -1);
+ }
+
+ if (item->order_family)
+ sortfamilyOid = get_opfamily_oid(BTREE_AM_OID,
+ item->order_family,
+ false);
+ else
+ sortfamilyOid = InvalidOid;
+
+#ifdef NOT_USED
+ /* XXX this is unnecessary given the superuser check above */
+ /* Caller must own operator and its underlying function */
+ if (!pg_oper_ownercheck(operOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_OPERATOR,
+ get_opname(operOid));
+ funcOid = get_opcode(operOid);
+ if (!pg_proc_ownercheck(funcOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+ get_func_name(funcOid));
+#endif
+
+ /* Save the info */
+ member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember));
+ member->is_func = false;
+ member->object = operOid;
+ member->number = item->number;
+ member->sortfamily = sortfamilyOid;
+ assignOperTypes(member, amoid, typeoid);
+ addFamilyMember(&operators, member);
+ break;
+ case OPCLASS_ITEM_FUNCTION:
+ if (item->number <= 0 || item->number > maxProcNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("invalid function number %d,"
+ " must be between 1 and %d",
+ item->number, maxProcNumber)));
+ funcOid = LookupFuncWithArgs(OBJECT_FUNCTION, item->name, false);
+#ifdef NOT_USED
+ /* XXX this is unnecessary given the superuser check above */
+ /* Caller must own function */
+ if (!pg_proc_ownercheck(funcOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+ get_func_name(funcOid));
+#endif
+ /* Save the info */
+ member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember));
+ member->is_func = true;
+ member->object = funcOid;
+ member->number = item->number;
+
+ /* allow overriding of the function's actual arg types */
+ if (item->class_args)
+ processTypesSpec(item->class_args,
+ &member->lefttype, &member->righttype);
+
+ assignProcTypes(member, amoid, typeoid, optsProcNumber);
+ addFamilyMember(&procedures, member);
+ break;
+ case OPCLASS_ITEM_STORAGETYPE:
+ if (OidIsValid(storageoid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("storage type specified more than once")));
+ storageoid = typenameTypeId(NULL, item->storedtype);
+
+#ifdef NOT_USED
+ /* XXX this is unnecessary given the superuser check above */
+ /* Check we have ownership of the datatype */
+ if (!pg_type_ownercheck(storageoid, GetUserId()))
+ aclcheck_error_type(ACLCHECK_NOT_OWNER, storageoid);
+#endif
+ break;
+ default:
+ elog(ERROR, "unrecognized item type: %d", item->itemtype);
+ break;
+ }
+ }
+
+ /*
+ * If storagetype is specified, make sure it's legal.
+ */
+ if (OidIsValid(storageoid))
+ {
+ /* Just drop the spec if same as column datatype */
+ if (storageoid == typeoid)
+ storageoid = InvalidOid;
+ else if (!amstorage)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("storage type cannot be different from data type for access method \"%s\"",
+ stmt->amname)));
+ }
+
+ rel = table_open(OperatorClassRelationId, RowExclusiveLock);
+
+ /*
+ * Make sure there is no existing opclass of this name (this is just to
+ * give a more friendly error message than "duplicate key").
+ */
+ if (SearchSysCacheExists3(CLAAMNAMENSP,
+ ObjectIdGetDatum(amoid),
+ CStringGetDatum(opcname),
+ ObjectIdGetDatum(namespaceoid)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("operator class \"%s\" for access method \"%s\" already exists",
+ opcname, stmt->amname)));
+
+ /*
+ * If we are creating a default opclass, check there isn't one already.
+ * (Note we do not restrict this test to visible opclasses; this ensures
+ * that typcache.c can find unique solutions to its questions.)
+ */
+ if (stmt->isDefault)
+ {
+ ScanKeyData skey[1];
+ SysScanDesc scan;
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_opclass_opcmethod,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(amoid));
+
+ scan = systable_beginscan(rel, OpclassAmNameNspIndexId, true,
+ NULL, 1, skey);
+
+ while (HeapTupleIsValid(tup = systable_getnext(scan)))
+ {
+ Form_pg_opclass opclass = (Form_pg_opclass) GETSTRUCT(tup);
+
+ if (opclass->opcintype == typeoid && opclass->opcdefault)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("could not make operator class \"%s\" be default for type %s",
+ opcname,
+ TypeNameToString(stmt->datatype)),
+ errdetail("Operator class \"%s\" already is the default.",
+ NameStr(opclass->opcname))));
+ }
+
+ systable_endscan(scan);
+ }
+
+ /*
+ * Okay, let's create the pg_opclass entry.
+ */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ opclassoid = GetNewOidWithIndex(rel, OpclassOidIndexId,
+ Anum_pg_opclass_oid);
+ values[Anum_pg_opclass_oid - 1] = ObjectIdGetDatum(opclassoid);
+ values[Anum_pg_opclass_opcmethod - 1] = ObjectIdGetDatum(amoid);
+ namestrcpy(&opcName, opcname);
+ values[Anum_pg_opclass_opcname - 1] = NameGetDatum(&opcName);
+ values[Anum_pg_opclass_opcnamespace - 1] = ObjectIdGetDatum(namespaceoid);
+ values[Anum_pg_opclass_opcowner - 1] = ObjectIdGetDatum(GetUserId());
+ values[Anum_pg_opclass_opcfamily - 1] = ObjectIdGetDatum(opfamilyoid);
+ values[Anum_pg_opclass_opcintype - 1] = ObjectIdGetDatum(typeoid);
+ values[Anum_pg_opclass_opcdefault - 1] = BoolGetDatum(stmt->isDefault);
+ values[Anum_pg_opclass_opckeytype - 1] = ObjectIdGetDatum(storageoid);
+
+ tup = heap_form_tuple(rel->rd_att, values, nulls);
+
+ CatalogTupleInsert(rel, tup);
+
+ heap_freetuple(tup);
+
+ /*
+ * Now that we have the opclass OID, set up default dependency info for
+ * the pg_amop and pg_amproc entries. Historically, CREATE OPERATOR CLASS
+ * has created hard dependencies on the opclass, so that's what we use.
+ */
+ foreach(l, operators)
+ {
+ OpFamilyMember *op = (OpFamilyMember *) lfirst(l);
+
+ op->ref_is_hard = true;
+ op->ref_is_family = false;
+ op->refobjid = opclassoid;
+ }
+ foreach(l, procedures)
+ {
+ OpFamilyMember *proc = (OpFamilyMember *) lfirst(l);
+
+ proc->ref_is_hard = true;
+ proc->ref_is_family = false;
+ proc->refobjid = opclassoid;
+ }
+
+ /*
+ * Let the index AM editorialize on the dependency choices. It could also
+ * do further validation on the operators and functions, if it likes.
+ */
+ if (amroutine->amadjustmembers)
+ amroutine->amadjustmembers(opfamilyoid,
+ opclassoid,
+ operators,
+ procedures);
+
+ /*
+ * Now add tuples to pg_amop and pg_amproc tying in the operators and
+ * functions. Dependencies on them are inserted, too.
+ */
+ storeOperators(stmt->opfamilyname, amoid, opfamilyoid,
+ operators, false);
+ storeProcedures(stmt->opfamilyname, amoid, opfamilyoid,
+ procedures, false);
+
+ /* let event triggers know what happened */
+ EventTriggerCollectCreateOpClass(stmt, opclassoid, operators, procedures);
+
+ /*
+ * Create dependencies for the opclass proper. Note: we do not need a
+ * dependency link to the AM, because that exists through the opfamily.
+ */
+ myself.classId = OperatorClassRelationId;
+ myself.objectId = opclassoid;
+ myself.objectSubId = 0;
+
+ /* dependency on namespace */
+ referenced.classId = NamespaceRelationId;
+ referenced.objectId = namespaceoid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+ /* dependency on opfamily */
+ referenced.classId = OperatorFamilyRelationId;
+ referenced.objectId = opfamilyoid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
+
+ /* dependency on indexed datatype */
+ referenced.classId = TypeRelationId;
+ referenced.objectId = typeoid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+ /* dependency on storage datatype */
+ if (OidIsValid(storageoid))
+ {
+ referenced.classId = TypeRelationId;
+ referenced.objectId = storageoid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+ }
+
+ /* dependency on owner */
+ recordDependencyOnOwner(OperatorClassRelationId, opclassoid, GetUserId());
+
+ /* dependency on extension */
+ recordDependencyOnCurrentExtension(&myself, false);
+
+ /* Post creation hook for new operator class */
+ InvokeObjectPostCreateHook(OperatorClassRelationId, opclassoid, 0);
+
+ table_close(rel, RowExclusiveLock);
+
+ return myself;
+}
+
+
+/*
+ * DefineOpFamily
+ * Define a new index operator family.
+ */
+ObjectAddress
+DefineOpFamily(CreateOpFamilyStmt *stmt)
+{
+ char *opfname; /* name of opfamily we're creating */
+ Oid amoid, /* our AM's oid */
+ namespaceoid; /* namespace to create opfamily in */
+ AclResult aclresult;
+
+ /* Convert list of names to a name and namespace */
+ namespaceoid = QualifiedNameGetCreationNamespace(stmt->opfamilyname,
+ &opfname);
+
+ /* Check we have creation rights in target namespace */
+ aclresult = pg_namespace_aclcheck(namespaceoid, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(namespaceoid));
+
+ /* Get access method OID, throwing an error if it doesn't exist. */
+ amoid = get_index_am_oid(stmt->amname, false);
+
+ /* XXX Should we make any privilege check against the AM? */
+
+ /*
+ * Currently, we require superuser privileges to create an opfamily. See
+ * comments in DefineOpClass.
+ */
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to create an operator family")));
+
+ /* Insert pg_opfamily catalog entry */
+ return CreateOpFamily(stmt, opfname, namespaceoid, amoid);
+}
+
+
+/*
+ * AlterOpFamily
+ * Add or remove operators/procedures within an existing operator family.
+ *
+ * Note: this implements only ALTER OPERATOR FAMILY ... ADD/DROP. Some
+ * other commands called ALTER OPERATOR FAMILY exist, but go through
+ * different code paths.
+ */
+Oid
+AlterOpFamily(AlterOpFamilyStmt *stmt)
+{
+ Oid amoid, /* our AM's oid */
+ opfamilyoid; /* oid of opfamily */
+ int maxOpNumber, /* amstrategies value */
+ optsProcNumber, /* amopclassopts value */
+ maxProcNumber; /* amsupport value */
+ HeapTuple tup;
+ Form_pg_am amform;
+ IndexAmRoutine *amroutine;
+
+ /* Get necessary info about access method */
+ tup = SearchSysCache1(AMNAME, CStringGetDatum(stmt->amname));
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("access method \"%s\" does not exist",
+ stmt->amname)));
+
+ amform = (Form_pg_am) GETSTRUCT(tup);
+ amoid = amform->oid;
+ amroutine = GetIndexAmRoutineByAmId(amoid, false);
+ ReleaseSysCache(tup);
+
+ maxOpNumber = amroutine->amstrategies;
+ /* if amstrategies is zero, just enforce that op numbers fit in int16 */
+ if (maxOpNumber <= 0)
+ maxOpNumber = SHRT_MAX;
+ maxProcNumber = amroutine->amsupport;
+ optsProcNumber = amroutine->amoptsprocnum;
+
+ /* XXX Should we make any privilege check against the AM? */
+
+ /* Look up the opfamily */
+ opfamilyoid = get_opfamily_oid(amoid, stmt->opfamilyname, false);
+
+ /*
+ * Currently, we require superuser privileges to alter an opfamily.
+ *
+ * XXX re-enable NOT_USED code sections below if you remove this test.
+ */
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to alter an operator family")));
+
+ /*
+ * ADD and DROP cases need separate code from here on down.
+ */
+ if (stmt->isDrop)
+ AlterOpFamilyDrop(stmt, amoid, opfamilyoid,
+ maxOpNumber, maxProcNumber, stmt->items);
+ else
+ AlterOpFamilyAdd(stmt, amoid, opfamilyoid,
+ maxOpNumber, maxProcNumber, optsProcNumber,
+ stmt->items);
+
+ return opfamilyoid;
+}
+
+/*
+ * ADD part of ALTER OP FAMILY
+ */
+static void
+AlterOpFamilyAdd(AlterOpFamilyStmt *stmt, Oid amoid, Oid opfamilyoid,
+ int maxOpNumber, int maxProcNumber, int optsProcNumber,
+ List *items)
+{
+ IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(amoid, false);
+ List *operators; /* OpFamilyMember list for operators */
+ List *procedures; /* OpFamilyMember list for support procs */
+ ListCell *l;
+
+ operators = NIL;
+ procedures = NIL;
+
+ /*
+ * Scan the "items" list to obtain additional info.
+ */
+ foreach(l, items)
+ {
+ CreateOpClassItem *item = lfirst_node(CreateOpClassItem, l);
+ Oid operOid;
+ Oid funcOid;
+ Oid sortfamilyOid;
+ OpFamilyMember *member;
+
+ switch (item->itemtype)
+ {
+ case OPCLASS_ITEM_OPERATOR:
+ if (item->number <= 0 || item->number > maxOpNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("invalid operator number %d,"
+ " must be between 1 and %d",
+ item->number, maxOpNumber)));
+ if (item->name->objargs != NIL)
+ operOid = LookupOperWithArgs(item->name, false);
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("operator argument types must be specified in ALTER OPERATOR FAMILY")));
+ operOid = InvalidOid; /* keep compiler quiet */
+ }
+
+ if (item->order_family)
+ sortfamilyOid = get_opfamily_oid(BTREE_AM_OID,
+ item->order_family,
+ false);
+ else
+ sortfamilyOid = InvalidOid;
+
+#ifdef NOT_USED
+ /* XXX this is unnecessary given the superuser check above */
+ /* Caller must own operator and its underlying function */
+ if (!pg_oper_ownercheck(operOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_OPERATOR,
+ get_opname(operOid));
+ funcOid = get_opcode(operOid);
+ if (!pg_proc_ownercheck(funcOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+ get_func_name(funcOid));
+#endif
+
+ /* Save the info */
+ member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember));
+ member->is_func = false;
+ member->object = operOid;
+ member->number = item->number;
+ member->sortfamily = sortfamilyOid;
+ /* We can set up dependency fields immediately */
+ /* Historically, ALTER ADD has created soft dependencies */
+ member->ref_is_hard = false;
+ member->ref_is_family = true;
+ member->refobjid = opfamilyoid;
+ assignOperTypes(member, amoid, InvalidOid);
+ addFamilyMember(&operators, member);
+ break;
+ case OPCLASS_ITEM_FUNCTION:
+ if (item->number <= 0 || item->number > maxProcNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("invalid function number %d,"
+ " must be between 1 and %d",
+ item->number, maxProcNumber)));
+ funcOid = LookupFuncWithArgs(OBJECT_FUNCTION, item->name, false);
+#ifdef NOT_USED
+ /* XXX this is unnecessary given the superuser check above */
+ /* Caller must own function */
+ if (!pg_proc_ownercheck(funcOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+ get_func_name(funcOid));
+#endif
+
+ /* Save the info */
+ member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember));
+ member->is_func = true;
+ member->object = funcOid;
+ member->number = item->number;
+ /* We can set up dependency fields immediately */
+ /* Historically, ALTER ADD has created soft dependencies */
+ member->ref_is_hard = false;
+ member->ref_is_family = true;
+ member->refobjid = opfamilyoid;
+
+ /* allow overriding of the function's actual arg types */
+ if (item->class_args)
+ processTypesSpec(item->class_args,
+ &member->lefttype, &member->righttype);
+
+ assignProcTypes(member, amoid, InvalidOid, optsProcNumber);
+ addFamilyMember(&procedures, member);
+ break;
+ case OPCLASS_ITEM_STORAGETYPE:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("STORAGE cannot be specified in ALTER OPERATOR FAMILY")));
+ break;
+ default:
+ elog(ERROR, "unrecognized item type: %d", item->itemtype);
+ break;
+ }
+ }
+
+ /*
+ * Let the index AM editorialize on the dependency choices. It could also
+ * do further validation on the operators and functions, if it likes.
+ */
+ if (amroutine->amadjustmembers)
+ amroutine->amadjustmembers(opfamilyoid,
+ InvalidOid, /* no specific opclass */
+ operators,
+ procedures);
+
+ /*
+ * Add tuples to pg_amop and pg_amproc tying in the operators and
+ * functions. Dependencies on them are inserted, too.
+ */
+ storeOperators(stmt->opfamilyname, amoid, opfamilyoid,
+ operators, true);
+ storeProcedures(stmt->opfamilyname, amoid, opfamilyoid,
+ procedures, true);
+
+ /* make information available to event triggers */
+ EventTriggerCollectAlterOpFam(stmt, opfamilyoid,
+ operators, procedures);
+}
+
+/*
+ * DROP part of ALTER OP FAMILY
+ */
+static void
+AlterOpFamilyDrop(AlterOpFamilyStmt *stmt, Oid amoid, Oid opfamilyoid,
+ int maxOpNumber, int maxProcNumber, List *items)
+{
+ List *operators; /* OpFamilyMember list for operators */
+ List *procedures; /* OpFamilyMember list for support procs */
+ ListCell *l;
+
+ operators = NIL;
+ procedures = NIL;
+
+ /*
+ * Scan the "items" list to obtain additional info.
+ */
+ foreach(l, items)
+ {
+ CreateOpClassItem *item = lfirst_node(CreateOpClassItem, l);
+ Oid lefttype,
+ righttype;
+ OpFamilyMember *member;
+
+ switch (item->itemtype)
+ {
+ case OPCLASS_ITEM_OPERATOR:
+ if (item->number <= 0 || item->number > maxOpNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("invalid operator number %d,"
+ " must be between 1 and %d",
+ item->number, maxOpNumber)));
+ processTypesSpec(item->class_args, &lefttype, &righttype);
+ /* Save the info */
+ member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember));
+ member->is_func = false;
+ member->number = item->number;
+ member->lefttype = lefttype;
+ member->righttype = righttype;
+ addFamilyMember(&operators, member);
+ break;
+ case OPCLASS_ITEM_FUNCTION:
+ if (item->number <= 0 || item->number > maxProcNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("invalid function number %d,"
+ " must be between 1 and %d",
+ item->number, maxProcNumber)));
+ processTypesSpec(item->class_args, &lefttype, &righttype);
+ /* Save the info */
+ member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember));
+ member->is_func = true;
+ member->number = item->number;
+ member->lefttype = lefttype;
+ member->righttype = righttype;
+ addFamilyMember(&procedures, member);
+ break;
+ case OPCLASS_ITEM_STORAGETYPE:
+ /* grammar prevents this from appearing */
+ default:
+ elog(ERROR, "unrecognized item type: %d", item->itemtype);
+ break;
+ }
+ }
+
+ /*
+ * Remove tuples from pg_amop and pg_amproc.
+ */
+ dropOperators(stmt->opfamilyname, amoid, opfamilyoid, operators);
+ dropProcedures(stmt->opfamilyname, amoid, opfamilyoid, procedures);
+
+ /* make information available to event triggers */
+ EventTriggerCollectAlterOpFam(stmt, opfamilyoid,
+ operators, procedures);
+}
+
+
+/*
+ * Deal with explicit arg types used in ALTER ADD/DROP
+ */
+static void
+processTypesSpec(List *args, Oid *lefttype, Oid *righttype)
+{
+ TypeName *typeName;
+
+ Assert(args != NIL);
+
+ typeName = (TypeName *) linitial(args);
+ *lefttype = typenameTypeId(NULL, typeName);
+
+ if (list_length(args) > 1)
+ {
+ typeName = (TypeName *) lsecond(args);
+ *righttype = typenameTypeId(NULL, typeName);
+ }
+ else
+ *righttype = *lefttype;
+
+ if (list_length(args) > 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("one or two argument types must be specified")));
+}
+
+
+/*
+ * Determine the lefttype/righttype to assign to an operator,
+ * and do any validity checking we can manage.
+ */
+static void
+assignOperTypes(OpFamilyMember *member, Oid amoid, Oid typeoid)
+{
+ Operator optup;
+ Form_pg_operator opform;
+
+ /* Fetch the operator definition */
+ optup = SearchSysCache1(OPEROID, ObjectIdGetDatum(member->object));
+ if (!HeapTupleIsValid(optup))
+ elog(ERROR, "cache lookup failed for operator %u", member->object);
+ opform = (Form_pg_operator) GETSTRUCT(optup);
+
+ /*
+ * Opfamily operators must be binary.
+ */
+ if (opform->oprkind != 'b')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("index operators must be binary")));
+
+ if (OidIsValid(member->sortfamily))
+ {
+ /*
+ * Ordering op, check index supports that. (We could perhaps also
+ * check that the operator returns a type supported by the sortfamily,
+ * but that seems more trouble than it's worth here. If it does not,
+ * the operator will never be matchable to any ORDER BY clause, but no
+ * worse consequences can ensue. Also, trying to check that would
+ * create an ordering hazard during dump/reload: it's possible that
+ * the family has been created but not yet populated with the required
+ * operators.)
+ */
+ IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(amoid, false);
+
+ if (!amroutine->amcanorderbyop)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("access method \"%s\" does not support ordering operators",
+ get_am_name(amoid))));
+ }
+ else
+ {
+ /*
+ * Search operators must return boolean.
+ */
+ if (opform->oprresult != BOOLOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("index search operators must return boolean")));
+ }
+
+ /*
+ * If lefttype/righttype isn't specified, use the operator's input types
+ */
+ if (!OidIsValid(member->lefttype))
+ member->lefttype = opform->oprleft;
+ if (!OidIsValid(member->righttype))
+ member->righttype = opform->oprright;
+
+ ReleaseSysCache(optup);
+}
+
+/*
+ * Determine the lefttype/righttype to assign to a support procedure,
+ * and do any validity checking we can manage.
+ */
+static void
+assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid,
+ int opclassOptsProcNum)
+{
+ HeapTuple proctup;
+ Form_pg_proc procform;
+
+ /* Fetch the procedure definition */
+ proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(member->object));
+ if (!HeapTupleIsValid(proctup))
+ elog(ERROR, "cache lookup failed for function %u", member->object);
+ procform = (Form_pg_proc) GETSTRUCT(proctup);
+
+ /* Check the signature of the opclass options parsing function */
+ if (member->number == opclassOptsProcNum)
+ {
+ if (OidIsValid(typeoid))
+ {
+ if ((OidIsValid(member->lefttype) && member->lefttype != typeoid) ||
+ (OidIsValid(member->righttype) && member->righttype != typeoid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("associated data types for operator class options parsing functions must match opclass input type")));
+ }
+ else
+ {
+ if (member->lefttype != member->righttype)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("left and right associated data types for operator class options parsing functions must match")));
+ }
+
+ if (procform->prorettype != VOIDOID ||
+ procform->pronargs != 1 ||
+ procform->proargtypes.values[0] != INTERNALOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("invalid operator class options parsing function"),
+ errhint("Valid signature of operator class options parsing function is %s.",
+ "(internal) RETURNS void")));
+ }
+
+ /*
+ * btree comparison procs must be 2-arg procs returning int4. btree
+ * sortsupport procs must take internal and return void. btree in_range
+ * procs must be 5-arg procs returning bool. btree equalimage procs must
+ * take 1 arg and return bool. hash support proc 1 must be a 1-arg proc
+ * returning int4, while proc 2 must be a 2-arg proc returning int8.
+ * Otherwise we don't know.
+ */
+ else if (amoid == BTREE_AM_OID)
+ {
+ if (member->number == BTORDER_PROC)
+ {
+ if (procform->pronargs != 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("btree comparison functions must have two arguments")));
+ if (procform->prorettype != INT4OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("btree comparison functions must return integer")));
+
+ /*
+ * If lefttype/righttype isn't specified, use the proc's input
+ * types
+ */
+ if (!OidIsValid(member->lefttype))
+ member->lefttype = procform->proargtypes.values[0];
+ if (!OidIsValid(member->righttype))
+ member->righttype = procform->proargtypes.values[1];
+ }
+ else if (member->number == BTSORTSUPPORT_PROC)
+ {
+ if (procform->pronargs != 1 ||
+ procform->proargtypes.values[0] != INTERNALOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("btree sort support functions must accept type \"internal\"")));
+ if (procform->prorettype != VOIDOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("btree sort support functions must return void")));
+
+ /*
+ * Can't infer lefttype/righttype from proc, so use default rule
+ */
+ }
+ else if (member->number == BTINRANGE_PROC)
+ {
+ if (procform->pronargs != 5)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("btree in_range functions must have five arguments")));
+ if (procform->prorettype != BOOLOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("btree in_range functions must return boolean")));
+
+ /*
+ * If lefttype/righttype isn't specified, use the proc's input
+ * types (we look at the test-value and offset arguments)
+ */
+ if (!OidIsValid(member->lefttype))
+ member->lefttype = procform->proargtypes.values[0];
+ if (!OidIsValid(member->righttype))
+ member->righttype = procform->proargtypes.values[2];
+ }
+ else if (member->number == BTEQUALIMAGE_PROC)
+ {
+ if (procform->pronargs != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("btree equal image functions must have one argument")));
+ if (procform->prorettype != BOOLOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("btree equal image functions must return boolean")));
+
+ /*
+ * pg_amproc functions are indexed by (lefttype, righttype), but
+ * an equalimage function can only be called at CREATE INDEX time.
+ * The same opclass opcintype OID is always used for leftype and
+ * righttype. Providing a cross-type routine isn't sensible.
+ * Reject cross-type ALTER OPERATOR FAMILY ... ADD FUNCTION 4
+ * statements here.
+ */
+ if (member->lefttype != member->righttype)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("btree equal image functions must not be cross-type")));
+ }
+ }
+ else if (amoid == HASH_AM_OID)
+ {
+ if (member->number == HASHSTANDARD_PROC)
+ {
+ if (procform->pronargs != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("hash function 1 must have one argument")));
+ if (procform->prorettype != INT4OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("hash function 1 must return integer")));
+ }
+ else if (member->number == HASHEXTENDED_PROC)
+ {
+ if (procform->pronargs != 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("hash function 2 must have two arguments")));
+ if (procform->prorettype != INT8OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("hash function 2 must return bigint")));
+ }
+
+ /*
+ * If lefttype/righttype isn't specified, use the proc's input type
+ */
+ if (!OidIsValid(member->lefttype))
+ member->lefttype = procform->proargtypes.values[0];
+ if (!OidIsValid(member->righttype))
+ member->righttype = procform->proargtypes.values[0];
+ }
+
+ /*
+ * The default in CREATE OPERATOR CLASS is to use the class' opcintype as
+ * lefttype and righttype. In CREATE or ALTER OPERATOR FAMILY, opcintype
+ * isn't available, so make the user specify the types.
+ */
+ if (!OidIsValid(member->lefttype))
+ member->lefttype = typeoid;
+ if (!OidIsValid(member->righttype))
+ member->righttype = typeoid;
+
+ if (!OidIsValid(member->lefttype) || !OidIsValid(member->righttype))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("associated data types must be specified for index support function")));
+
+ ReleaseSysCache(proctup);
+}
+
+/*
+ * Add a new family member to the appropriate list, after checking for
+ * duplicated strategy or proc number.
+ */
+static void
+addFamilyMember(List **list, OpFamilyMember *member)
+{
+ ListCell *l;
+
+ foreach(l, *list)
+ {
+ OpFamilyMember *old = (OpFamilyMember *) lfirst(l);
+
+ if (old->number == member->number &&
+ old->lefttype == member->lefttype &&
+ old->righttype == member->righttype)
+ {
+ if (member->is_func)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("function number %d for (%s,%s) appears more than once",
+ member->number,
+ format_type_be(member->lefttype),
+ format_type_be(member->righttype))));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("operator number %d for (%s,%s) appears more than once",
+ member->number,
+ format_type_be(member->lefttype),
+ format_type_be(member->righttype))));
+ }
+ }
+ *list = lappend(*list, member);
+}
+
+/*
+ * Dump the operators to pg_amop
+ *
+ * We also make dependency entries in pg_depend for the pg_amop entries.
+ */
+static void
+storeOperators(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+ List *operators, bool isAdd)
+{
+ Relation rel;
+ Datum values[Natts_pg_amop];
+ bool nulls[Natts_pg_amop];
+ HeapTuple tup;
+ Oid entryoid;
+ ObjectAddress myself,
+ referenced;
+ ListCell *l;
+
+ rel = table_open(AccessMethodOperatorRelationId, RowExclusiveLock);
+
+ foreach(l, operators)
+ {
+ OpFamilyMember *op = (OpFamilyMember *) lfirst(l);
+ char oppurpose;
+
+ /*
+ * If adding to an existing family, check for conflict with an
+ * existing pg_amop entry (just to give a nicer error message)
+ */
+ if (isAdd &&
+ SearchSysCacheExists4(AMOPSTRATEGY,
+ ObjectIdGetDatum(opfamilyoid),
+ ObjectIdGetDatum(op->lefttype),
+ ObjectIdGetDatum(op->righttype),
+ Int16GetDatum(op->number)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("operator %d(%s,%s) already exists in operator family \"%s\"",
+ op->number,
+ format_type_be(op->lefttype),
+ format_type_be(op->righttype),
+ NameListToString(opfamilyname))));
+
+ oppurpose = OidIsValid(op->sortfamily) ? AMOP_ORDER : AMOP_SEARCH;
+
+ /* Create the pg_amop entry */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ entryoid = GetNewOidWithIndex(rel, AccessMethodOperatorOidIndexId,
+ Anum_pg_amop_oid);
+ values[Anum_pg_amop_oid - 1] = ObjectIdGetDatum(entryoid);
+ values[Anum_pg_amop_amopfamily - 1] = ObjectIdGetDatum(opfamilyoid);
+ values[Anum_pg_amop_amoplefttype - 1] = ObjectIdGetDatum(op->lefttype);
+ values[Anum_pg_amop_amoprighttype - 1] = ObjectIdGetDatum(op->righttype);
+ values[Anum_pg_amop_amopstrategy - 1] = Int16GetDatum(op->number);
+ values[Anum_pg_amop_amoppurpose - 1] = CharGetDatum(oppurpose);
+ values[Anum_pg_amop_amopopr - 1] = ObjectIdGetDatum(op->object);
+ values[Anum_pg_amop_amopmethod - 1] = ObjectIdGetDatum(amoid);
+ values[Anum_pg_amop_amopsortfamily - 1] = ObjectIdGetDatum(op->sortfamily);
+
+ tup = heap_form_tuple(rel->rd_att, values, nulls);
+
+ CatalogTupleInsert(rel, tup);
+
+ heap_freetuple(tup);
+
+ /* Make its dependencies */
+ myself.classId = AccessMethodOperatorRelationId;
+ myself.objectId = entryoid;
+ myself.objectSubId = 0;
+
+ referenced.classId = OperatorRelationId;
+ referenced.objectId = op->object;
+ referenced.objectSubId = 0;
+
+ /* see comments in amapi.h about dependency strength */
+ recordDependencyOn(&myself, &referenced,
+ op->ref_is_hard ? DEPENDENCY_NORMAL : DEPENDENCY_AUTO);
+
+ referenced.classId = op->ref_is_family ? OperatorFamilyRelationId :
+ OperatorClassRelationId;
+ referenced.objectId = op->refobjid;
+ referenced.objectSubId = 0;
+
+ recordDependencyOn(&myself, &referenced,
+ op->ref_is_hard ? DEPENDENCY_INTERNAL : DEPENDENCY_AUTO);
+
+ /* A search operator also needs a dep on the referenced opfamily */
+ if (OidIsValid(op->sortfamily))
+ {
+ referenced.classId = OperatorFamilyRelationId;
+ referenced.objectId = op->sortfamily;
+ referenced.objectSubId = 0;
+
+ recordDependencyOn(&myself, &referenced,
+ op->ref_is_hard ? DEPENDENCY_NORMAL : DEPENDENCY_AUTO);
+ }
+
+ /* Post create hook of this access method operator */
+ InvokeObjectPostCreateHook(AccessMethodOperatorRelationId,
+ entryoid, 0);
+ }
+
+ table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Dump the procedures (support routines) to pg_amproc
+ *
+ * We also make dependency entries in pg_depend for the pg_amproc entries.
+ */
+static void
+storeProcedures(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+ List *procedures, bool isAdd)
+{
+ Relation rel;
+ Datum values[Natts_pg_amproc];
+ bool nulls[Natts_pg_amproc];
+ HeapTuple tup;
+ Oid entryoid;
+ ObjectAddress myself,
+ referenced;
+ ListCell *l;
+
+ rel = table_open(AccessMethodProcedureRelationId, RowExclusiveLock);
+
+ foreach(l, procedures)
+ {
+ OpFamilyMember *proc = (OpFamilyMember *) lfirst(l);
+
+ /*
+ * If adding to an existing family, check for conflict with an
+ * existing pg_amproc entry (just to give a nicer error message)
+ */
+ if (isAdd &&
+ SearchSysCacheExists4(AMPROCNUM,
+ ObjectIdGetDatum(opfamilyoid),
+ ObjectIdGetDatum(proc->lefttype),
+ ObjectIdGetDatum(proc->righttype),
+ Int16GetDatum(proc->number)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("function %d(%s,%s) already exists in operator family \"%s\"",
+ proc->number,
+ format_type_be(proc->lefttype),
+ format_type_be(proc->righttype),
+ NameListToString(opfamilyname))));
+
+ /* Create the pg_amproc entry */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ entryoid = GetNewOidWithIndex(rel, AccessMethodProcedureOidIndexId,
+ Anum_pg_amproc_oid);
+ values[Anum_pg_amproc_oid - 1] = ObjectIdGetDatum(entryoid);
+ values[Anum_pg_amproc_amprocfamily - 1] = ObjectIdGetDatum(opfamilyoid);
+ values[Anum_pg_amproc_amproclefttype - 1] = ObjectIdGetDatum(proc->lefttype);
+ values[Anum_pg_amproc_amprocrighttype - 1] = ObjectIdGetDatum(proc->righttype);
+ values[Anum_pg_amproc_amprocnum - 1] = Int16GetDatum(proc->number);
+ values[Anum_pg_amproc_amproc - 1] = ObjectIdGetDatum(proc->object);
+
+ tup = heap_form_tuple(rel->rd_att, values, nulls);
+
+ CatalogTupleInsert(rel, tup);
+
+ heap_freetuple(tup);
+
+ /* Make its dependencies */
+ myself.classId = AccessMethodProcedureRelationId;
+ myself.objectId = entryoid;
+ myself.objectSubId = 0;
+
+ referenced.classId = ProcedureRelationId;
+ referenced.objectId = proc->object;
+ referenced.objectSubId = 0;
+
+ /* see comments in amapi.h about dependency strength */
+ recordDependencyOn(&myself, &referenced,
+ proc->ref_is_hard ? DEPENDENCY_NORMAL : DEPENDENCY_AUTO);
+
+ referenced.classId = proc->ref_is_family ? OperatorFamilyRelationId :
+ OperatorClassRelationId;
+ referenced.objectId = proc->refobjid;
+ referenced.objectSubId = 0;
+
+ recordDependencyOn(&myself, &referenced,
+ proc->ref_is_hard ? DEPENDENCY_INTERNAL : DEPENDENCY_AUTO);
+
+ /* Post create hook of access method procedure */
+ InvokeObjectPostCreateHook(AccessMethodProcedureRelationId,
+ entryoid, 0);
+ }
+
+ table_close(rel, RowExclusiveLock);
+}
+
+
+/*
+ * Remove operator entries from an opfamily.
+ *
+ * Note: this is only allowed for "loose" members of an opfamily, hence
+ * behavior is always RESTRICT.
+ */
+static void
+dropOperators(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+ List *operators)
+{
+ ListCell *l;
+
+ foreach(l, operators)
+ {
+ OpFamilyMember *op = (OpFamilyMember *) lfirst(l);
+ Oid amopid;
+ ObjectAddress object;
+
+ amopid = GetSysCacheOid4(AMOPSTRATEGY, Anum_pg_amop_oid,
+ ObjectIdGetDatum(opfamilyoid),
+ ObjectIdGetDatum(op->lefttype),
+ ObjectIdGetDatum(op->righttype),
+ Int16GetDatum(op->number));
+ if (!OidIsValid(amopid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("operator %d(%s,%s) does not exist in operator family \"%s\"",
+ op->number,
+ format_type_be(op->lefttype),
+ format_type_be(op->righttype),
+ NameListToString(opfamilyname))));
+
+ object.classId = AccessMethodOperatorRelationId;
+ object.objectId = amopid;
+ object.objectSubId = 0;
+
+ performDeletion(&object, DROP_RESTRICT, 0);
+ }
+}
+
+/*
+ * Remove procedure entries from an opfamily.
+ *
+ * Note: this is only allowed for "loose" members of an opfamily, hence
+ * behavior is always RESTRICT.
+ */
+static void
+dropProcedures(List *opfamilyname, Oid amoid, Oid opfamilyoid,
+ List *procedures)
+{
+ ListCell *l;
+
+ foreach(l, procedures)
+ {
+ OpFamilyMember *op = (OpFamilyMember *) lfirst(l);
+ Oid amprocid;
+ ObjectAddress object;
+
+ amprocid = GetSysCacheOid4(AMPROCNUM, Anum_pg_amproc_oid,
+ ObjectIdGetDatum(opfamilyoid),
+ ObjectIdGetDatum(op->lefttype),
+ ObjectIdGetDatum(op->righttype),
+ Int16GetDatum(op->number));
+ if (!OidIsValid(amprocid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("function %d(%s,%s) does not exist in operator family \"%s\"",
+ op->number,
+ format_type_be(op->lefttype),
+ format_type_be(op->righttype),
+ NameListToString(opfamilyname))));
+
+ object.classId = AccessMethodProcedureRelationId;
+ object.objectId = amprocid;
+ object.objectSubId = 0;
+
+ performDeletion(&object, DROP_RESTRICT, 0);
+ }
+}
+
+/*
+ * Subroutine for ALTER OPERATOR CLASS SET SCHEMA/RENAME
+ *
+ * Is there an operator class with the given name and signature already
+ * in the given namespace? If so, raise an appropriate error message.
+ */
+void
+IsThereOpClassInNamespace(const char *opcname, Oid opcmethod,
+ Oid opcnamespace)
+{
+ /* make sure the new name doesn't exist */
+ if (SearchSysCacheExists3(CLAAMNAMENSP,
+ ObjectIdGetDatum(opcmethod),
+ CStringGetDatum(opcname),
+ ObjectIdGetDatum(opcnamespace)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("operator class \"%s\" for access method \"%s\" already exists in schema \"%s\"",
+ opcname,
+ get_am_name(opcmethod),
+ get_namespace_name(opcnamespace))));
+}
+
+/*
+ * Subroutine for ALTER OPERATOR FAMILY SET SCHEMA/RENAME
+ *
+ * Is there an operator family with the given name and signature already
+ * in the given namespace? If so, raise an appropriate error message.
+ */
+void
+IsThereOpFamilyInNamespace(const char *opfname, Oid opfmethod,
+ Oid opfnamespace)
+{
+ /* make sure the new name doesn't exist */
+ if (SearchSysCacheExists3(OPFAMILYAMNAMENSP,
+ ObjectIdGetDatum(opfmethod),
+ CStringGetDatum(opfname),
+ ObjectIdGetDatum(opfnamespace)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("operator family \"%s\" for access method \"%s\" already exists in schema \"%s\"",
+ opfname,
+ get_am_name(opfmethod),
+ get_namespace_name(opfnamespace))));
+}
diff --git a/src/backend/commands/operatorcmds.c b/src/backend/commands/operatorcmds.c
new file mode 100644
index 0000000..a5924d7
--- /dev/null
+++ b/src/backend/commands/operatorcmds.c
@@ -0,0 +1,552 @@
+/*-------------------------------------------------------------------------
+ *
+ * operatorcmds.c
+ *
+ * Routines for operator manipulation commands
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/operatorcmds.c
+ *
+ * DESCRIPTION
+ * The "DefineFoo" routines take the parse tree and pick out the
+ * appropriate arguments/flags, passing the results to the
+ * corresponding "FooDefine" routines (in src/catalog) that do
+ * the actual catalog-munging. These routines also verify permission
+ * of the user to execute the command.
+ *
+ * NOTES
+ * These things must be defined and committed in the following order:
+ * "create function":
+ * input/output, recv/send functions
+ * "create type":
+ * type
+ * "create operator":
+ * operators
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_type.h"
+#include "commands/alter.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "parser/parse_oper.h"
+#include "parser/parse_type.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+static Oid ValidateRestrictionEstimator(List *restrictionName);
+static Oid ValidateJoinEstimator(List *joinName);
+
+/*
+ * DefineOperator
+ * this function extracts all the information from the
+ * parameter list generated by the parser and then has
+ * OperatorCreate() do all the actual work.
+ *
+ * 'parameters' is a list of DefElem
+ */
+ObjectAddress
+DefineOperator(List *names, List *parameters)
+{
+ char *oprName;
+ Oid oprNamespace;
+ AclResult aclresult;
+ bool canMerge = false; /* operator merges */
+ bool canHash = false; /* operator hashes */
+ List *functionName = NIL; /* function for operator */
+ TypeName *typeName1 = NULL; /* first type name */
+ TypeName *typeName2 = NULL; /* second type name */
+ Oid typeId1 = InvalidOid; /* types converted to OID */
+ Oid typeId2 = InvalidOid;
+ Oid rettype;
+ List *commutatorName = NIL; /* optional commutator operator name */
+ List *negatorName = NIL; /* optional negator operator name */
+ List *restrictionName = NIL; /* optional restrict. sel. function */
+ List *joinName = NIL; /* optional join sel. function */
+ Oid functionOid; /* functions converted to OID */
+ Oid restrictionOid;
+ Oid joinOid;
+ Oid typeId[2]; /* to hold left and right arg */
+ int nargs;
+ ListCell *pl;
+
+ /* Convert list of names to a name and namespace */
+ oprNamespace = QualifiedNameGetCreationNamespace(names, &oprName);
+
+ /* Check we have creation rights in target namespace */
+ aclresult = pg_namespace_aclcheck(oprNamespace, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(oprNamespace));
+
+ /*
+ * loop over the definition list and extract the information we need.
+ */
+ foreach(pl, parameters)
+ {
+ DefElem *defel = (DefElem *) lfirst(pl);
+
+ if (strcmp(defel->defname, "leftarg") == 0)
+ {
+ typeName1 = defGetTypeName(defel);
+ if (typeName1->setof)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("SETOF type not allowed for operator argument")));
+ }
+ else if (strcmp(defel->defname, "rightarg") == 0)
+ {
+ typeName2 = defGetTypeName(defel);
+ if (typeName2->setof)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("SETOF type not allowed for operator argument")));
+ }
+ /* "function" and "procedure" are equivalent here */
+ else if (strcmp(defel->defname, "function") == 0)
+ functionName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "procedure") == 0)
+ functionName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "commutator") == 0)
+ commutatorName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "negator") == 0)
+ negatorName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "restrict") == 0)
+ restrictionName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "join") == 0)
+ joinName = defGetQualifiedName(defel);
+ else if (strcmp(defel->defname, "hashes") == 0)
+ canHash = defGetBoolean(defel);
+ else if (strcmp(defel->defname, "merges") == 0)
+ canMerge = defGetBoolean(defel);
+ /* These obsolete options are taken as meaning canMerge */
+ else if (strcmp(defel->defname, "sort1") == 0)
+ canMerge = true;
+ else if (strcmp(defel->defname, "sort2") == 0)
+ canMerge = true;
+ else if (strcmp(defel->defname, "ltcmp") == 0)
+ canMerge = true;
+ else if (strcmp(defel->defname, "gtcmp") == 0)
+ canMerge = true;
+ else
+ {
+ /* WARNING, not ERROR, for historical backwards-compatibility */
+ ereport(WARNING,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("operator attribute \"%s\" not recognized",
+ defel->defname)));
+ }
+ }
+
+ /*
+ * make sure we have our required definitions
+ */
+ if (functionName == NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("operator function must be specified")));
+
+ /* Transform type names to type OIDs */
+ if (typeName1)
+ typeId1 = typenameTypeId(NULL, typeName1);
+ if (typeName2)
+ typeId2 = typenameTypeId(NULL, typeName2);
+
+ /*
+ * If only the right argument is missing, the user is likely trying to
+ * create a postfix operator, so give them a hint about why that does not
+ * work. But if both arguments are missing, do not mention postfix
+ * operators, as the user most likely simply neglected to mention the
+ * arguments.
+ */
+ if (!OidIsValid(typeId1) && !OidIsValid(typeId2))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("operator argument types must be specified")));
+ if (!OidIsValid(typeId2))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("operator right argument type must be specified"),
+ errdetail("Postfix operators are not supported.")));
+
+ if (typeName1)
+ {
+ aclresult = pg_type_aclcheck(typeId1, GetUserId(), ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error_type(aclresult, typeId1);
+ }
+
+ if (typeName2)
+ {
+ aclresult = pg_type_aclcheck(typeId2, GetUserId(), ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error_type(aclresult, typeId2);
+ }
+
+ /*
+ * Look up the operator's underlying function.
+ */
+ if (!OidIsValid(typeId1))
+ {
+ typeId[0] = typeId2;
+ nargs = 1;
+ }
+ else if (!OidIsValid(typeId2))
+ {
+ typeId[0] = typeId1;
+ nargs = 1;
+ }
+ else
+ {
+ typeId[0] = typeId1;
+ typeId[1] = typeId2;
+ nargs = 2;
+ }
+ functionOid = LookupFuncName(functionName, nargs, typeId, false);
+
+ /*
+ * We require EXECUTE rights for the function. This isn't strictly
+ * necessary, since EXECUTE will be checked at any attempted use of the
+ * operator, but it seems like a good idea anyway.
+ */
+ aclresult = pg_proc_aclcheck(functionOid, GetUserId(), ACL_EXECUTE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_FUNCTION,
+ NameListToString(functionName));
+
+ rettype = get_func_rettype(functionOid);
+ aclresult = pg_type_aclcheck(rettype, GetUserId(), ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error_type(aclresult, rettype);
+
+ /*
+ * Look up restriction and join estimators if specified
+ */
+ if (restrictionName)
+ restrictionOid = ValidateRestrictionEstimator(restrictionName);
+ else
+ restrictionOid = InvalidOid;
+ if (joinName)
+ joinOid = ValidateJoinEstimator(joinName);
+ else
+ joinOid = InvalidOid;
+
+ /*
+ * now have OperatorCreate do all the work..
+ */
+ return
+ OperatorCreate(oprName, /* operator name */
+ oprNamespace, /* namespace */
+ typeId1, /* left type id */
+ typeId2, /* right type id */
+ functionOid, /* function for operator */
+ commutatorName, /* optional commutator operator name */
+ negatorName, /* optional negator operator name */
+ restrictionOid, /* optional restrict. sel. function */
+ joinOid, /* optional join sel. function name */
+ canMerge, /* operator merges */
+ canHash); /* operator hashes */
+}
+
+/*
+ * Look up a restriction estimator function by name, and verify that it has
+ * the correct signature and we have the permissions to attach it to an
+ * operator.
+ */
+static Oid
+ValidateRestrictionEstimator(List *restrictionName)
+{
+ Oid typeId[4];
+ Oid restrictionOid;
+ AclResult aclresult;
+
+ typeId[0] = INTERNALOID; /* PlannerInfo */
+ typeId[1] = OIDOID; /* operator OID */
+ typeId[2] = INTERNALOID; /* args list */
+ typeId[3] = INT4OID; /* varRelid */
+
+ restrictionOid = LookupFuncName(restrictionName, 4, typeId, false);
+
+ /* estimators must return float8 */
+ if (get_func_rettype(restrictionOid) != FLOAT8OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("restriction estimator function %s must return type %s",
+ NameListToString(restrictionName), "float8")));
+
+ /* Require EXECUTE rights for the estimator */
+ aclresult = pg_proc_aclcheck(restrictionOid, GetUserId(), ACL_EXECUTE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_FUNCTION,
+ NameListToString(restrictionName));
+
+ return restrictionOid;
+}
+
+/*
+ * Look up a join estimator function by name, and verify that it has the
+ * correct signature and we have the permissions to attach it to an
+ * operator.
+ */
+static Oid
+ValidateJoinEstimator(List *joinName)
+{
+ Oid typeId[5];
+ Oid joinOid;
+ Oid joinOid2;
+ AclResult aclresult;
+
+ typeId[0] = INTERNALOID; /* PlannerInfo */
+ typeId[1] = OIDOID; /* operator OID */
+ typeId[2] = INTERNALOID; /* args list */
+ typeId[3] = INT2OID; /* jointype */
+ typeId[4] = INTERNALOID; /* SpecialJoinInfo */
+
+ /*
+ * As of Postgres 8.4, the preferred signature for join estimators has 5
+ * arguments, but we still allow the old 4-argument form. Whine about
+ * ambiguity if both forms exist.
+ */
+ joinOid = LookupFuncName(joinName, 5, typeId, true);
+ joinOid2 = LookupFuncName(joinName, 4, typeId, true);
+ if (OidIsValid(joinOid))
+ {
+ if (OidIsValid(joinOid2))
+ ereport(ERROR,
+ (errcode(ERRCODE_AMBIGUOUS_FUNCTION),
+ errmsg("join estimator function %s has multiple matches",
+ NameListToString(joinName))));
+ }
+ else
+ {
+ joinOid = joinOid2;
+ /* If not found, reference the 5-argument signature in error msg */
+ if (!OidIsValid(joinOid))
+ joinOid = LookupFuncName(joinName, 5, typeId, false);
+ }
+
+ /* estimators must return float8 */
+ if (get_func_rettype(joinOid) != FLOAT8OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("join estimator function %s must return type %s",
+ NameListToString(joinName), "float8")));
+
+ /* Require EXECUTE rights for the estimator */
+ aclresult = pg_proc_aclcheck(joinOid, GetUserId(), ACL_EXECUTE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_FUNCTION,
+ NameListToString(joinName));
+
+ return joinOid;
+}
+
+/*
+ * Guts of operator deletion.
+ */
+void
+RemoveOperatorById(Oid operOid)
+{
+ Relation relation;
+ HeapTuple tup;
+ Form_pg_operator op;
+
+ relation = table_open(OperatorRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operOid));
+ if (!HeapTupleIsValid(tup)) /* should not happen */
+ elog(ERROR, "cache lookup failed for operator %u", operOid);
+ op = (Form_pg_operator) GETSTRUCT(tup);
+
+ /*
+ * Reset links from commutator and negator, if any. In case of a
+ * self-commutator or self-negator, this means we have to re-fetch the
+ * updated tuple. (We could optimize away updates on the tuple we're
+ * about to drop, but it doesn't seem worth convoluting the logic for.)
+ */
+ if (OidIsValid(op->oprcom) || OidIsValid(op->oprnegate))
+ {
+ OperatorUpd(operOid, op->oprcom, op->oprnegate, true);
+ if (operOid == op->oprcom || operOid == op->oprnegate)
+ {
+ ReleaseSysCache(tup);
+ tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operOid));
+ if (!HeapTupleIsValid(tup)) /* should not happen */
+ elog(ERROR, "cache lookup failed for operator %u", operOid);
+ }
+ }
+
+ CatalogTupleDelete(relation, &tup->t_self);
+
+ ReleaseSysCache(tup);
+
+ table_close(relation, RowExclusiveLock);
+}
+
+/*
+ * AlterOperator
+ * routine implementing ALTER OPERATOR <operator> SET (option = ...).
+ *
+ * Currently, only RESTRICT and JOIN estimator functions can be changed.
+ */
+ObjectAddress
+AlterOperator(AlterOperatorStmt *stmt)
+{
+ ObjectAddress address;
+ Oid oprId;
+ Relation catalog;
+ HeapTuple tup;
+ Form_pg_operator oprForm;
+ int i;
+ ListCell *pl;
+ Datum values[Natts_pg_operator];
+ bool nulls[Natts_pg_operator];
+ bool replaces[Natts_pg_operator];
+ List *restrictionName = NIL; /* optional restrict. sel. function */
+ bool updateRestriction = false;
+ Oid restrictionOid;
+ List *joinName = NIL; /* optional join sel. function */
+ bool updateJoin = false;
+ Oid joinOid;
+
+ /* Look up the operator */
+ oprId = LookupOperWithArgs(stmt->opername, false);
+ catalog = table_open(OperatorRelationId, RowExclusiveLock);
+ tup = SearchSysCacheCopy1(OPEROID, ObjectIdGetDatum(oprId));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for operator %u", oprId);
+ oprForm = (Form_pg_operator) GETSTRUCT(tup);
+
+ /* Process options */
+ foreach(pl, stmt->options)
+ {
+ DefElem *defel = (DefElem *) lfirst(pl);
+ List *param;
+
+ if (defel->arg == NULL)
+ param = NIL; /* NONE, removes the function */
+ else
+ param = defGetQualifiedName(defel);
+
+ if (strcmp(defel->defname, "restrict") == 0)
+ {
+ restrictionName = param;
+ updateRestriction = true;
+ }
+ else if (strcmp(defel->defname, "join") == 0)
+ {
+ joinName = param;
+ updateJoin = true;
+ }
+
+ /*
+ * The rest of the options that CREATE accepts cannot be changed.
+ * Check for them so that we can give a meaningful error message.
+ */
+ else if (strcmp(defel->defname, "leftarg") == 0 ||
+ strcmp(defel->defname, "rightarg") == 0 ||
+ strcmp(defel->defname, "function") == 0 ||
+ strcmp(defel->defname, "procedure") == 0 ||
+ strcmp(defel->defname, "commutator") == 0 ||
+ strcmp(defel->defname, "negator") == 0 ||
+ strcmp(defel->defname, "hashes") == 0 ||
+ strcmp(defel->defname, "merges") == 0)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("operator attribute \"%s\" cannot be changed",
+ defel->defname)));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("operator attribute \"%s\" not recognized",
+ defel->defname)));
+ }
+
+ /* Check permissions. Must be owner. */
+ if (!pg_oper_ownercheck(oprId, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_OPERATOR,
+ NameStr(oprForm->oprname));
+
+ /*
+ * Look up restriction and join estimators if specified
+ */
+ if (restrictionName)
+ restrictionOid = ValidateRestrictionEstimator(restrictionName);
+ else
+ restrictionOid = InvalidOid;
+ if (joinName)
+ joinOid = ValidateJoinEstimator(joinName);
+ else
+ joinOid = InvalidOid;
+
+ /* Perform additional checks, like OperatorCreate does */
+ if (!(OidIsValid(oprForm->oprleft) && OidIsValid(oprForm->oprright)))
+ {
+ /* If it's not a binary op, these things mustn't be set: */
+ if (OidIsValid(joinOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("only binary operators can have join selectivity")));
+ }
+
+ if (oprForm->oprresult != BOOLOID)
+ {
+ if (OidIsValid(restrictionOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("only boolean operators can have restriction selectivity")));
+ if (OidIsValid(joinOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("only boolean operators can have join selectivity")));
+ }
+
+ /* Update the tuple */
+ for (i = 0; i < Natts_pg_operator; ++i)
+ {
+ values[i] = (Datum) 0;
+ replaces[i] = false;
+ nulls[i] = false;
+ }
+ if (updateRestriction)
+ {
+ replaces[Anum_pg_operator_oprrest - 1] = true;
+ values[Anum_pg_operator_oprrest - 1] = restrictionOid;
+ }
+ if (updateJoin)
+ {
+ replaces[Anum_pg_operator_oprjoin - 1] = true;
+ values[Anum_pg_operator_oprjoin - 1] = joinOid;
+ }
+
+ tup = heap_modify_tuple(tup, RelationGetDescr(catalog),
+ values, nulls, replaces);
+
+ CatalogTupleUpdate(catalog, &tup->t_self, tup);
+
+ address = makeOperatorDependencies(tup, false, true);
+
+ InvokeObjectPostAlterHook(OperatorRelationId, oprId, 0);
+
+ table_close(catalog, NoLock);
+
+ return address;
+}
diff --git a/src/backend/commands/policy.c b/src/backend/commands/policy.c
new file mode 100644
index 0000000..a59ee3b
--- /dev/null
+++ b/src/backend/commands/policy.c
@@ -0,0 +1,1285 @@
+/*-------------------------------------------------------------------------
+ *
+ * policy.c
+ * Commands for manipulating policies.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/commands/policy.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup.h"
+#include "access/htup_details.h"
+#include "access/relation.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_policy.h"
+#include "catalog/pg_type.h"
+#include "commands/policy.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/pg_list.h"
+#include "parser/parse_clause.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_node.h"
+#include "parser/parse_relation.h"
+#include "rewrite/rewriteManip.h"
+#include "rewrite/rowsecurity.h"
+#include "storage/lock.h"
+#include "utils/acl.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+static void RangeVarCallbackForPolicy(const RangeVar *rv,
+ Oid relid, Oid oldrelid, void *arg);
+static char parse_policy_command(const char *cmd_name);
+static Datum *policy_role_list_to_array(List *roles, int *num_roles);
+
+/*
+ * Callback to RangeVarGetRelidExtended().
+ *
+ * Checks the following:
+ * - the relation specified is a table.
+ * - current user owns the table.
+ * - the table is not a system table.
+ *
+ * If any of these checks fails then an error is raised.
+ */
+static void
+RangeVarCallbackForPolicy(const RangeVar *rv, Oid relid, Oid oldrelid,
+ void *arg)
+{
+ HeapTuple tuple;
+ Form_pg_class classform;
+ char relkind;
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ return;
+
+ classform = (Form_pg_class) GETSTRUCT(tuple);
+ relkind = classform->relkind;
+
+ /* Must own relation. */
+ if (!pg_class_ownercheck(relid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relid)), rv->relname);
+
+ /* No system table modifications unless explicitly allowed. */
+ if (!allowSystemTableMods && IsSystemClass(relid, classform))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied: \"%s\" is a system catalog",
+ rv->relname)));
+
+ /* Relation type MUST be a table. */
+ if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a table", rv->relname)));
+
+ ReleaseSysCache(tuple);
+}
+
+/*
+ * parse_policy_command -
+ * helper function to convert full command strings to their char
+ * representation.
+ *
+ * cmd_name - full string command name. Valid values are 'all', 'select',
+ * 'insert', 'update' and 'delete'.
+ *
+ */
+static char
+parse_policy_command(const char *cmd_name)
+{
+ char polcmd;
+
+ if (!cmd_name)
+ elog(ERROR, "unrecognized policy command");
+
+ if (strcmp(cmd_name, "all") == 0)
+ polcmd = '*';
+ else if (strcmp(cmd_name, "select") == 0)
+ polcmd = ACL_SELECT_CHR;
+ else if (strcmp(cmd_name, "insert") == 0)
+ polcmd = ACL_INSERT_CHR;
+ else if (strcmp(cmd_name, "update") == 0)
+ polcmd = ACL_UPDATE_CHR;
+ else if (strcmp(cmd_name, "delete") == 0)
+ polcmd = ACL_DELETE_CHR;
+ else
+ elog(ERROR, "unrecognized policy command");
+
+ return polcmd;
+}
+
+/*
+ * policy_role_list_to_array
+ * helper function to convert a list of RoleSpecs to an array of
+ * role id Datums.
+ */
+static Datum *
+policy_role_list_to_array(List *roles, int *num_roles)
+{
+ Datum *role_oids;
+ ListCell *cell;
+ int i = 0;
+
+ /* Handle no roles being passed in as being for public */
+ if (roles == NIL)
+ {
+ *num_roles = 1;
+ role_oids = (Datum *) palloc(*num_roles * sizeof(Datum));
+ role_oids[0] = ObjectIdGetDatum(ACL_ID_PUBLIC);
+
+ return role_oids;
+ }
+
+ *num_roles = list_length(roles);
+ role_oids = (Datum *) palloc(*num_roles * sizeof(Datum));
+
+ foreach(cell, roles)
+ {
+ RoleSpec *spec = lfirst(cell);
+
+ /*
+ * PUBLIC covers all roles, so it only makes sense alone.
+ */
+ if (spec->roletype == ROLESPEC_PUBLIC)
+ {
+ if (*num_roles != 1)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("ignoring specified roles other than PUBLIC"),
+ errhint("All roles are members of the PUBLIC role.")));
+ *num_roles = 1;
+ }
+ role_oids[0] = ObjectIdGetDatum(ACL_ID_PUBLIC);
+
+ return role_oids;
+ }
+ else
+ role_oids[i++] =
+ ObjectIdGetDatum(get_rolespec_oid(spec, false));
+ }
+
+ return role_oids;
+}
+
+/*
+ * Load row security policy from the catalog, and store it in
+ * the relation's relcache entry.
+ *
+ * Note that caller should have verified that pg_class.relrowsecurity
+ * is true for this relation.
+ */
+void
+RelationBuildRowSecurity(Relation relation)
+{
+ MemoryContext rscxt;
+ MemoryContext oldcxt = CurrentMemoryContext;
+ RowSecurityDesc *rsdesc;
+ Relation catalog;
+ ScanKeyData skey;
+ SysScanDesc sscan;
+ HeapTuple tuple;
+
+ /*
+ * Create a memory context to hold everything associated with this
+ * relation's row security policy. This makes it easy to clean up during
+ * a relcache flush. However, to cover the possibility of an error
+ * partway through, we don't make the context long-lived till we're done.
+ */
+ rscxt = AllocSetContextCreate(CurrentMemoryContext,
+ "row security descriptor",
+ ALLOCSET_SMALL_SIZES);
+ MemoryContextCopyAndSetIdentifier(rscxt,
+ RelationGetRelationName(relation));
+
+ rsdesc = MemoryContextAllocZero(rscxt, sizeof(RowSecurityDesc));
+ rsdesc->rscxt = rscxt;
+
+ /*
+ * Now scan pg_policy for RLS policies associated with this relation.
+ * Because we use the index on (polrelid, polname), we should consistently
+ * visit the rel's policies in name order, at least when system indexes
+ * aren't disabled. This simplifies equalRSDesc().
+ */
+ catalog = table_open(PolicyRelationId, AccessShareLock);
+
+ ScanKeyInit(&skey,
+ Anum_pg_policy_polrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(relation)));
+
+ sscan = systable_beginscan(catalog, PolicyPolrelidPolnameIndexId, true,
+ NULL, 1, &skey);
+
+ while (HeapTupleIsValid(tuple = systable_getnext(sscan)))
+ {
+ Form_pg_policy policy_form = (Form_pg_policy) GETSTRUCT(tuple);
+ RowSecurityPolicy *policy;
+ Datum datum;
+ bool isnull;
+ char *str_value;
+
+ policy = MemoryContextAllocZero(rscxt, sizeof(RowSecurityPolicy));
+
+ /*
+ * Note: we must be sure that pass-by-reference data gets copied into
+ * rscxt. We avoid making that context current over wider spans than
+ * we have to, though.
+ */
+
+ /* Get policy command */
+ policy->polcmd = policy_form->polcmd;
+
+ /* Get policy, permissive or restrictive */
+ policy->permissive = policy_form->polpermissive;
+
+ /* Get policy name */
+ policy->policy_name =
+ MemoryContextStrdup(rscxt, NameStr(policy_form->polname));
+
+ /* Get policy roles */
+ datum = heap_getattr(tuple, Anum_pg_policy_polroles,
+ RelationGetDescr(catalog), &isnull);
+ /* shouldn't be null, but let's check for luck */
+ if (isnull)
+ elog(ERROR, "unexpected null value in pg_policy.polroles");
+ MemoryContextSwitchTo(rscxt);
+ policy->roles = DatumGetArrayTypePCopy(datum);
+ MemoryContextSwitchTo(oldcxt);
+
+ /* Get policy qual */
+ datum = heap_getattr(tuple, Anum_pg_policy_polqual,
+ RelationGetDescr(catalog), &isnull);
+ if (!isnull)
+ {
+ str_value = TextDatumGetCString(datum);
+ MemoryContextSwitchTo(rscxt);
+ policy->qual = (Expr *) stringToNode(str_value);
+ MemoryContextSwitchTo(oldcxt);
+ pfree(str_value);
+ }
+ else
+ policy->qual = NULL;
+
+ /* Get WITH CHECK qual */
+ datum = heap_getattr(tuple, Anum_pg_policy_polwithcheck,
+ RelationGetDescr(catalog), &isnull);
+ if (!isnull)
+ {
+ str_value = TextDatumGetCString(datum);
+ MemoryContextSwitchTo(rscxt);
+ policy->with_check_qual = (Expr *) stringToNode(str_value);
+ MemoryContextSwitchTo(oldcxt);
+ pfree(str_value);
+ }
+ else
+ policy->with_check_qual = NULL;
+
+ /* We want to cache whether there are SubLinks in these expressions */
+ policy->hassublinks = checkExprHasSubLink((Node *) policy->qual) ||
+ checkExprHasSubLink((Node *) policy->with_check_qual);
+
+ /*
+ * Add this object to list. For historical reasons, the list is built
+ * in reverse order.
+ */
+ MemoryContextSwitchTo(rscxt);
+ rsdesc->policies = lcons(policy, rsdesc->policies);
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ systable_endscan(sscan);
+ table_close(catalog, AccessShareLock);
+
+ /*
+ * Success. Reparent the descriptor's memory context under
+ * CacheMemoryContext so that it will live indefinitely, then attach the
+ * policy descriptor to the relcache entry.
+ */
+ MemoryContextSetParent(rscxt, CacheMemoryContext);
+
+ relation->rd_rsdesc = rsdesc;
+}
+
+/*
+ * RemovePolicyById -
+ * remove a policy by its OID. If a policy does not exist with the provided
+ * oid, then an error is raised.
+ *
+ * policy_id - the oid of the policy.
+ */
+void
+RemovePolicyById(Oid policy_id)
+{
+ Relation pg_policy_rel;
+ SysScanDesc sscan;
+ ScanKeyData skey[1];
+ HeapTuple tuple;
+ Oid relid;
+ Relation rel;
+
+ pg_policy_rel = table_open(PolicyRelationId, RowExclusiveLock);
+
+ /*
+ * Find the policy to delete.
+ */
+ ScanKeyInit(&skey[0],
+ Anum_pg_policy_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(policy_id));
+
+ sscan = systable_beginscan(pg_policy_rel, PolicyOidIndexId, true,
+ NULL, 1, skey);
+
+ tuple = systable_getnext(sscan);
+
+ /* If the policy exists, then remove it, otherwise raise an error. */
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "could not find tuple for policy %u", policy_id);
+
+ /*
+ * Open and exclusive-lock the relation the policy belongs to. (We need
+ * exclusive lock to lock out queries that might otherwise depend on the
+ * set of policies the rel has; furthermore we've got to hold the lock
+ * till commit.)
+ */
+ relid = ((Form_pg_policy) GETSTRUCT(tuple))->polrelid;
+
+ rel = table_open(relid, AccessExclusiveLock);
+ if (rel->rd_rel->relkind != RELKIND_RELATION &&
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a table",
+ RelationGetRelationName(rel))));
+
+ if (!allowSystemTableMods && IsSystemRelation(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied: \"%s\" is a system catalog",
+ RelationGetRelationName(rel))));
+
+ CatalogTupleDelete(pg_policy_rel, &tuple->t_self);
+
+ systable_endscan(sscan);
+
+ /*
+ * Note that, unlike some of the other flags in pg_class, relrowsecurity
+ * is not just an indication of if policies exist. When relrowsecurity is
+ * set by a user, then all access to the relation must be through a
+ * policy. If no policy is defined for the relation then a default-deny
+ * policy is created and all records are filtered (except for queries from
+ * the owner).
+ */
+ CacheInvalidateRelcache(rel);
+
+ table_close(rel, NoLock);
+
+ /* Clean up */
+ table_close(pg_policy_rel, RowExclusiveLock);
+}
+
+/*
+ * RemoveRoleFromObjectPolicy -
+ * remove a role from a policy's applicable-roles list.
+ *
+ * Returns true if the role was successfully removed from the policy.
+ * Returns false if the role was not removed because it would have left
+ * polroles empty (which is disallowed, though perhaps it should not be).
+ * On false return, the caller should instead drop the policy altogether.
+ *
+ * roleid - the oid of the role to remove
+ * classid - should always be PolicyRelationId
+ * policy_id - the oid of the policy.
+ */
+bool
+RemoveRoleFromObjectPolicy(Oid roleid, Oid classid, Oid policy_id)
+{
+ Relation pg_policy_rel;
+ SysScanDesc sscan;
+ ScanKeyData skey[1];
+ HeapTuple tuple;
+ Oid relid;
+ ArrayType *policy_roles;
+ Datum roles_datum;
+ Oid *roles;
+ int num_roles;
+ Datum *role_oids;
+ bool attr_isnull;
+ bool keep_policy = true;
+ int i,
+ j;
+
+ Assert(classid == PolicyRelationId);
+
+ pg_policy_rel = table_open(PolicyRelationId, RowExclusiveLock);
+
+ /*
+ * Find the policy to update.
+ */
+ ScanKeyInit(&skey[0],
+ Anum_pg_policy_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(policy_id));
+
+ sscan = systable_beginscan(pg_policy_rel, PolicyOidIndexId, true,
+ NULL, 1, skey);
+
+ tuple = systable_getnext(sscan);
+
+ /* Raise an error if we don't find the policy. */
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "could not find tuple for policy %u", policy_id);
+
+ /* Identify rel the policy belongs to */
+ relid = ((Form_pg_policy) GETSTRUCT(tuple))->polrelid;
+
+ /* Get the current set of roles */
+ roles_datum = heap_getattr(tuple,
+ Anum_pg_policy_polroles,
+ RelationGetDescr(pg_policy_rel),
+ &attr_isnull);
+
+ Assert(!attr_isnull);
+
+ policy_roles = DatumGetArrayTypePCopy(roles_datum);
+ roles = (Oid *) ARR_DATA_PTR(policy_roles);
+ num_roles = ARR_DIMS(policy_roles)[0];
+
+ /*
+ * Rebuild the polroles array, without any mentions of the target role.
+ * Ordinarily there'd be exactly one, but we must cope with duplicate
+ * mentions, since CREATE/ALTER POLICY historically have allowed that.
+ */
+ role_oids = (Datum *) palloc(num_roles * sizeof(Datum));
+ for (i = 0, j = 0; i < num_roles; i++)
+ {
+ if (roles[i] != roleid)
+ role_oids[j++] = ObjectIdGetDatum(roles[i]);
+ }
+ num_roles = j;
+
+ /* If any roles remain, update the policy entry. */
+ if (num_roles > 0)
+ {
+ ArrayType *role_ids;
+ Datum values[Natts_pg_policy];
+ bool isnull[Natts_pg_policy];
+ bool replaces[Natts_pg_policy];
+ HeapTuple new_tuple;
+ HeapTuple reltup;
+ ObjectAddress target;
+ ObjectAddress myself;
+
+ /* zero-clear */
+ memset(values, 0, sizeof(values));
+ memset(replaces, 0, sizeof(replaces));
+ memset(isnull, 0, sizeof(isnull));
+
+ /* This is the array for the new tuple */
+ role_ids = construct_array(role_oids, num_roles, OIDOID,
+ sizeof(Oid), true, TYPALIGN_INT);
+
+ replaces[Anum_pg_policy_polroles - 1] = true;
+ values[Anum_pg_policy_polroles - 1] = PointerGetDatum(role_ids);
+
+ new_tuple = heap_modify_tuple(tuple,
+ RelationGetDescr(pg_policy_rel),
+ values, isnull, replaces);
+ CatalogTupleUpdate(pg_policy_rel, &new_tuple->t_self, new_tuple);
+
+ /* Remove all the old shared dependencies (roles) */
+ deleteSharedDependencyRecordsFor(PolicyRelationId, policy_id, 0);
+
+ /* Record the new shared dependencies (roles) */
+ myself.classId = PolicyRelationId;
+ myself.objectId = policy_id;
+ myself.objectSubId = 0;
+
+ target.classId = AuthIdRelationId;
+ target.objectSubId = 0;
+ for (i = 0; i < num_roles; i++)
+ {
+ target.objectId = DatumGetObjectId(role_oids[i]);
+ /* no need for dependency on the public role */
+ if (target.objectId != ACL_ID_PUBLIC)
+ recordSharedDependencyOn(&myself, &target,
+ SHARED_DEPENDENCY_POLICY);
+ }
+
+ InvokeObjectPostAlterHook(PolicyRelationId, policy_id, 0);
+
+ heap_freetuple(new_tuple);
+
+ /* Make updates visible */
+ CommandCounterIncrement();
+
+ /*
+ * Invalidate relcache entry for rel the policy belongs to, to force
+ * redoing any dependent plans. In case of a race condition where the
+ * rel was just dropped, we need do nothing.
+ */
+ reltup = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ if (HeapTupleIsValid(reltup))
+ {
+ CacheInvalidateRelcacheByTuple(reltup);
+ ReleaseSysCache(reltup);
+ }
+ }
+ else
+ {
+ /* No roles would remain, so drop the policy instead. */
+ keep_policy = false;
+ }
+
+ /* Clean up. */
+ systable_endscan(sscan);
+
+ table_close(pg_policy_rel, RowExclusiveLock);
+
+ return keep_policy;
+}
+
+/*
+ * CreatePolicy -
+ * handles the execution of the CREATE POLICY command.
+ *
+ * stmt - the CreatePolicyStmt that describes the policy to create.
+ */
+ObjectAddress
+CreatePolicy(CreatePolicyStmt *stmt)
+{
+ Relation pg_policy_rel;
+ Oid policy_id;
+ Relation target_table;
+ Oid table_id;
+ char polcmd;
+ Datum *role_oids;
+ int nitems = 0;
+ ArrayType *role_ids;
+ ParseState *qual_pstate;
+ ParseState *with_check_pstate;
+ ParseNamespaceItem *nsitem;
+ Node *qual;
+ Node *with_check_qual;
+ ScanKeyData skey[2];
+ SysScanDesc sscan;
+ HeapTuple policy_tuple;
+ Datum values[Natts_pg_policy];
+ bool isnull[Natts_pg_policy];
+ ObjectAddress target;
+ ObjectAddress myself;
+ int i;
+
+ /* Parse command */
+ polcmd = parse_policy_command(stmt->cmd_name);
+
+ /*
+ * If the command is SELECT or DELETE then WITH CHECK should be NULL.
+ */
+ if ((polcmd == ACL_SELECT_CHR || polcmd == ACL_DELETE_CHR)
+ && stmt->with_check != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("WITH CHECK cannot be applied to SELECT or DELETE")));
+
+ /*
+ * If the command is INSERT then WITH CHECK should be the only expression
+ * provided.
+ */
+ if (polcmd == ACL_INSERT_CHR && stmt->qual != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("only WITH CHECK expression allowed for INSERT")));
+
+ /* Collect role ids */
+ role_oids = policy_role_list_to_array(stmt->roles, &nitems);
+ role_ids = construct_array(role_oids, nitems, OIDOID,
+ sizeof(Oid), true, TYPALIGN_INT);
+
+ /* Parse the supplied clause */
+ qual_pstate = make_parsestate(NULL);
+ with_check_pstate = make_parsestate(NULL);
+
+ /* zero-clear */
+ memset(values, 0, sizeof(values));
+ memset(isnull, 0, sizeof(isnull));
+
+ /* Get id of table. Also handles permissions checks. */
+ table_id = RangeVarGetRelidExtended(stmt->table, AccessExclusiveLock,
+ 0,
+ RangeVarCallbackForPolicy,
+ (void *) stmt);
+
+ /* Open target_table to build quals. No additional lock is necessary. */
+ target_table = relation_open(table_id, NoLock);
+
+ /* Add for the regular security quals */
+ nsitem = addRangeTableEntryForRelation(qual_pstate, target_table,
+ AccessShareLock,
+ NULL, false, false);
+ addNSItemToQuery(qual_pstate, nsitem, false, true, true);
+
+ /* Add for the with-check quals */
+ nsitem = addRangeTableEntryForRelation(with_check_pstate, target_table,
+ AccessShareLock,
+ NULL, false, false);
+ addNSItemToQuery(with_check_pstate, nsitem, false, true, true);
+
+ qual = transformWhereClause(qual_pstate,
+ stmt->qual,
+ EXPR_KIND_POLICY,
+ "POLICY");
+
+ with_check_qual = transformWhereClause(with_check_pstate,
+ stmt->with_check,
+ EXPR_KIND_POLICY,
+ "POLICY");
+
+ /* Fix up collation information */
+ assign_expr_collations(qual_pstate, qual);
+ assign_expr_collations(with_check_pstate, with_check_qual);
+
+ /* Open pg_policy catalog */
+ pg_policy_rel = table_open(PolicyRelationId, RowExclusiveLock);
+
+ /* Set key - policy's relation id. */
+ ScanKeyInit(&skey[0],
+ Anum_pg_policy_polrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(table_id));
+
+ /* Set key - policy's name. */
+ ScanKeyInit(&skey[1],
+ Anum_pg_policy_polname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(stmt->policy_name));
+
+ sscan = systable_beginscan(pg_policy_rel,
+ PolicyPolrelidPolnameIndexId, true, NULL, 2,
+ skey);
+
+ policy_tuple = systable_getnext(sscan);
+
+ /* Complain if the policy name already exists for the table */
+ if (HeapTupleIsValid(policy_tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("policy \"%s\" for table \"%s\" already exists",
+ stmt->policy_name, RelationGetRelationName(target_table))));
+
+ policy_id = GetNewOidWithIndex(pg_policy_rel, PolicyOidIndexId,
+ Anum_pg_policy_oid);
+ values[Anum_pg_policy_oid - 1] = ObjectIdGetDatum(policy_id);
+ values[Anum_pg_policy_polrelid - 1] = ObjectIdGetDatum(table_id);
+ values[Anum_pg_policy_polname - 1] = DirectFunctionCall1(namein,
+ CStringGetDatum(stmt->policy_name));
+ values[Anum_pg_policy_polcmd - 1] = CharGetDatum(polcmd);
+ values[Anum_pg_policy_polpermissive - 1] = BoolGetDatum(stmt->permissive);
+ values[Anum_pg_policy_polroles - 1] = PointerGetDatum(role_ids);
+
+ /* Add qual if present. */
+ if (qual)
+ values[Anum_pg_policy_polqual - 1] = CStringGetTextDatum(nodeToString(qual));
+ else
+ isnull[Anum_pg_policy_polqual - 1] = true;
+
+ /* Add WITH CHECK qual if present */
+ if (with_check_qual)
+ values[Anum_pg_policy_polwithcheck - 1] = CStringGetTextDatum(nodeToString(with_check_qual));
+ else
+ isnull[Anum_pg_policy_polwithcheck - 1] = true;
+
+ policy_tuple = heap_form_tuple(RelationGetDescr(pg_policy_rel), values,
+ isnull);
+
+ CatalogTupleInsert(pg_policy_rel, policy_tuple);
+
+ /* Record Dependencies */
+ target.classId = RelationRelationId;
+ target.objectId = table_id;
+ target.objectSubId = 0;
+
+ myself.classId = PolicyRelationId;
+ myself.objectId = policy_id;
+ myself.objectSubId = 0;
+
+ recordDependencyOn(&myself, &target, DEPENDENCY_AUTO);
+
+ recordDependencyOnExpr(&myself, qual, qual_pstate->p_rtable,
+ DEPENDENCY_NORMAL);
+
+ recordDependencyOnExpr(&myself, with_check_qual,
+ with_check_pstate->p_rtable, DEPENDENCY_NORMAL);
+
+ /* Register role dependencies */
+ target.classId = AuthIdRelationId;
+ target.objectSubId = 0;
+ for (i = 0; i < nitems; i++)
+ {
+ target.objectId = DatumGetObjectId(role_oids[i]);
+ /* no dependency if public */
+ if (target.objectId != ACL_ID_PUBLIC)
+ recordSharedDependencyOn(&myself, &target,
+ SHARED_DEPENDENCY_POLICY);
+ }
+
+ InvokeObjectPostCreateHook(PolicyRelationId, policy_id, 0);
+
+ /* Invalidate Relation Cache */
+ CacheInvalidateRelcache(target_table);
+
+ /* Clean up. */
+ heap_freetuple(policy_tuple);
+ free_parsestate(qual_pstate);
+ free_parsestate(with_check_pstate);
+ systable_endscan(sscan);
+ relation_close(target_table, NoLock);
+ table_close(pg_policy_rel, RowExclusiveLock);
+
+ return myself;
+}
+
+/*
+ * AlterPolicy -
+ * handles the execution of the ALTER POLICY command.
+ *
+ * stmt - the AlterPolicyStmt that describes the policy and how to alter it.
+ */
+ObjectAddress
+AlterPolicy(AlterPolicyStmt *stmt)
+{
+ Relation pg_policy_rel;
+ Oid policy_id;
+ Relation target_table;
+ Oid table_id;
+ Datum *role_oids = NULL;
+ int nitems = 0;
+ ArrayType *role_ids = NULL;
+ List *qual_parse_rtable = NIL;
+ List *with_check_parse_rtable = NIL;
+ Node *qual = NULL;
+ Node *with_check_qual = NULL;
+ ScanKeyData skey[2];
+ SysScanDesc sscan;
+ HeapTuple policy_tuple;
+ HeapTuple new_tuple;
+ Datum values[Natts_pg_policy];
+ bool isnull[Natts_pg_policy];
+ bool replaces[Natts_pg_policy];
+ ObjectAddress target;
+ ObjectAddress myself;
+ Datum polcmd_datum;
+ char polcmd;
+ bool polcmd_isnull;
+ int i;
+
+ /* Parse role_ids */
+ if (stmt->roles != NULL)
+ {
+ role_oids = policy_role_list_to_array(stmt->roles, &nitems);
+ role_ids = construct_array(role_oids, nitems, OIDOID,
+ sizeof(Oid), true, TYPALIGN_INT);
+ }
+
+ /* Get id of table. Also handles permissions checks. */
+ table_id = RangeVarGetRelidExtended(stmt->table, AccessExclusiveLock,
+ 0,
+ RangeVarCallbackForPolicy,
+ (void *) stmt);
+
+ target_table = relation_open(table_id, NoLock);
+
+ /* Parse the using policy clause */
+ if (stmt->qual)
+ {
+ ParseNamespaceItem *nsitem;
+ ParseState *qual_pstate = make_parsestate(NULL);
+
+ nsitem = addRangeTableEntryForRelation(qual_pstate, target_table,
+ AccessShareLock,
+ NULL, false, false);
+
+ addNSItemToQuery(qual_pstate, nsitem, false, true, true);
+
+ qual = transformWhereClause(qual_pstate, stmt->qual,
+ EXPR_KIND_POLICY,
+ "POLICY");
+
+ /* Fix up collation information */
+ assign_expr_collations(qual_pstate, qual);
+
+ qual_parse_rtable = qual_pstate->p_rtable;
+ free_parsestate(qual_pstate);
+ }
+
+ /* Parse the with-check policy clause */
+ if (stmt->with_check)
+ {
+ ParseNamespaceItem *nsitem;
+ ParseState *with_check_pstate = make_parsestate(NULL);
+
+ nsitem = addRangeTableEntryForRelation(with_check_pstate, target_table,
+ AccessShareLock,
+ NULL, false, false);
+
+ addNSItemToQuery(with_check_pstate, nsitem, false, true, true);
+
+ with_check_qual = transformWhereClause(with_check_pstate,
+ stmt->with_check,
+ EXPR_KIND_POLICY,
+ "POLICY");
+
+ /* Fix up collation information */
+ assign_expr_collations(with_check_pstate, with_check_qual);
+
+ with_check_parse_rtable = with_check_pstate->p_rtable;
+ free_parsestate(with_check_pstate);
+ }
+
+ /* zero-clear */
+ memset(values, 0, sizeof(values));
+ memset(replaces, 0, sizeof(replaces));
+ memset(isnull, 0, sizeof(isnull));
+
+ /* Find policy to update. */
+ pg_policy_rel = table_open(PolicyRelationId, RowExclusiveLock);
+
+ /* Set key - policy's relation id. */
+ ScanKeyInit(&skey[0],
+ Anum_pg_policy_polrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(table_id));
+
+ /* Set key - policy's name. */
+ ScanKeyInit(&skey[1],
+ Anum_pg_policy_polname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(stmt->policy_name));
+
+ sscan = systable_beginscan(pg_policy_rel,
+ PolicyPolrelidPolnameIndexId, true, NULL, 2,
+ skey);
+
+ policy_tuple = systable_getnext(sscan);
+
+ /* Check that the policy is found, raise an error if not. */
+ if (!HeapTupleIsValid(policy_tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("policy \"%s\" for table \"%s\" does not exist",
+ stmt->policy_name,
+ RelationGetRelationName(target_table))));
+
+ /* Get policy command */
+ polcmd_datum = heap_getattr(policy_tuple, Anum_pg_policy_polcmd,
+ RelationGetDescr(pg_policy_rel),
+ &polcmd_isnull);
+ Assert(!polcmd_isnull);
+ polcmd = DatumGetChar(polcmd_datum);
+
+ /*
+ * If the command is SELECT or DELETE then WITH CHECK should be NULL.
+ */
+ if ((polcmd == ACL_SELECT_CHR || polcmd == ACL_DELETE_CHR)
+ && stmt->with_check != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("only USING expression allowed for SELECT, DELETE")));
+
+ /*
+ * If the command is INSERT then WITH CHECK should be the only expression
+ * provided.
+ */
+ if ((polcmd == ACL_INSERT_CHR)
+ && stmt->qual != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("only WITH CHECK expression allowed for INSERT")));
+
+ policy_id = ((Form_pg_policy) GETSTRUCT(policy_tuple))->oid;
+
+ if (role_ids != NULL)
+ {
+ replaces[Anum_pg_policy_polroles - 1] = true;
+ values[Anum_pg_policy_polroles - 1] = PointerGetDatum(role_ids);
+ }
+ else
+ {
+ Oid *roles;
+ Datum roles_datum;
+ bool attr_isnull;
+ ArrayType *policy_roles;
+
+ /*
+ * We need to pull the set of roles this policy applies to from what's
+ * in the catalog, so that we can recreate the dependencies correctly
+ * for the policy.
+ */
+
+ roles_datum = heap_getattr(policy_tuple, Anum_pg_policy_polroles,
+ RelationGetDescr(pg_policy_rel),
+ &attr_isnull);
+ Assert(!attr_isnull);
+
+ policy_roles = DatumGetArrayTypePCopy(roles_datum);
+
+ roles = (Oid *) ARR_DATA_PTR(policy_roles);
+
+ nitems = ARR_DIMS(policy_roles)[0];
+
+ role_oids = (Datum *) palloc(nitems * sizeof(Datum));
+
+ for (i = 0; i < nitems; i++)
+ role_oids[i] = ObjectIdGetDatum(roles[i]);
+ }
+
+ if (qual != NULL)
+ {
+ replaces[Anum_pg_policy_polqual - 1] = true;
+ values[Anum_pg_policy_polqual - 1]
+ = CStringGetTextDatum(nodeToString(qual));
+ }
+ else
+ {
+ Datum value_datum;
+ bool attr_isnull;
+
+ /*
+ * We need to pull the USING expression and build the range table for
+ * the policy from what's in the catalog, so that we can recreate the
+ * dependencies correctly for the policy.
+ */
+
+ /* Check if the policy has a USING expr */
+ value_datum = heap_getattr(policy_tuple, Anum_pg_policy_polqual,
+ RelationGetDescr(pg_policy_rel),
+ &attr_isnull);
+ if (!attr_isnull)
+ {
+ char *qual_value;
+ ParseState *qual_pstate;
+
+ /* parsestate is built just to build the range table */
+ qual_pstate = make_parsestate(NULL);
+
+ qual_value = TextDatumGetCString(value_datum);
+ qual = stringToNode(qual_value);
+
+ /* Add this rel to the parsestate's rangetable, for dependencies */
+ (void) addRangeTableEntryForRelation(qual_pstate, target_table,
+ AccessShareLock,
+ NULL, false, false);
+
+ qual_parse_rtable = qual_pstate->p_rtable;
+ free_parsestate(qual_pstate);
+ }
+ }
+
+ if (with_check_qual != NULL)
+ {
+ replaces[Anum_pg_policy_polwithcheck - 1] = true;
+ values[Anum_pg_policy_polwithcheck - 1]
+ = CStringGetTextDatum(nodeToString(with_check_qual));
+ }
+ else
+ {
+ Datum value_datum;
+ bool attr_isnull;
+
+ /*
+ * We need to pull the WITH CHECK expression and build the range table
+ * for the policy from what's in the catalog, so that we can recreate
+ * the dependencies correctly for the policy.
+ */
+
+ /* Check if the policy has a WITH CHECK expr */
+ value_datum = heap_getattr(policy_tuple, Anum_pg_policy_polwithcheck,
+ RelationGetDescr(pg_policy_rel),
+ &attr_isnull);
+ if (!attr_isnull)
+ {
+ char *with_check_value;
+ ParseState *with_check_pstate;
+
+ /* parsestate is built just to build the range table */
+ with_check_pstate = make_parsestate(NULL);
+
+ with_check_value = TextDatumGetCString(value_datum);
+ with_check_qual = stringToNode(with_check_value);
+
+ /* Add this rel to the parsestate's rangetable, for dependencies */
+ (void) addRangeTableEntryForRelation(with_check_pstate,
+ target_table,
+ AccessShareLock,
+ NULL, false, false);
+
+ with_check_parse_rtable = with_check_pstate->p_rtable;
+ free_parsestate(with_check_pstate);
+ }
+ }
+
+ new_tuple = heap_modify_tuple(policy_tuple,
+ RelationGetDescr(pg_policy_rel),
+ values, isnull, replaces);
+ CatalogTupleUpdate(pg_policy_rel, &new_tuple->t_self, new_tuple);
+
+ /* Update Dependencies. */
+ deleteDependencyRecordsFor(PolicyRelationId, policy_id, false);
+
+ /* Record Dependencies */
+ target.classId = RelationRelationId;
+ target.objectId = table_id;
+ target.objectSubId = 0;
+
+ myself.classId = PolicyRelationId;
+ myself.objectId = policy_id;
+ myself.objectSubId = 0;
+
+ recordDependencyOn(&myself, &target, DEPENDENCY_AUTO);
+
+ recordDependencyOnExpr(&myself, qual, qual_parse_rtable, DEPENDENCY_NORMAL);
+
+ recordDependencyOnExpr(&myself, with_check_qual, with_check_parse_rtable,
+ DEPENDENCY_NORMAL);
+
+ /* Register role dependencies */
+ deleteSharedDependencyRecordsFor(PolicyRelationId, policy_id, 0);
+ target.classId = AuthIdRelationId;
+ target.objectSubId = 0;
+ for (i = 0; i < nitems; i++)
+ {
+ target.objectId = DatumGetObjectId(role_oids[i]);
+ /* no dependency if public */
+ if (target.objectId != ACL_ID_PUBLIC)
+ recordSharedDependencyOn(&myself, &target,
+ SHARED_DEPENDENCY_POLICY);
+ }
+
+ InvokeObjectPostAlterHook(PolicyRelationId, policy_id, 0);
+
+ heap_freetuple(new_tuple);
+
+ /* Invalidate Relation Cache */
+ CacheInvalidateRelcache(target_table);
+
+ /* Clean up. */
+ systable_endscan(sscan);
+ relation_close(target_table, NoLock);
+ table_close(pg_policy_rel, RowExclusiveLock);
+
+ return myself;
+}
+
+/*
+ * rename_policy -
+ * change the name of a policy on a relation
+ */
+ObjectAddress
+rename_policy(RenameStmt *stmt)
+{
+ Relation pg_policy_rel;
+ Relation target_table;
+ Oid table_id;
+ Oid opoloid;
+ ScanKeyData skey[2];
+ SysScanDesc sscan;
+ HeapTuple policy_tuple;
+ ObjectAddress address;
+
+ /* Get id of table. Also handles permissions checks. */
+ table_id = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock,
+ 0,
+ RangeVarCallbackForPolicy,
+ (void *) stmt);
+
+ target_table = relation_open(table_id, NoLock);
+
+ pg_policy_rel = table_open(PolicyRelationId, RowExclusiveLock);
+
+ /* First pass -- check for conflict */
+
+ /* Add key - policy's relation id. */
+ ScanKeyInit(&skey[0],
+ Anum_pg_policy_polrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(table_id));
+
+ /* Add key - policy's name. */
+ ScanKeyInit(&skey[1],
+ Anum_pg_policy_polname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(stmt->newname));
+
+ sscan = systable_beginscan(pg_policy_rel,
+ PolicyPolrelidPolnameIndexId, true, NULL, 2,
+ skey);
+
+ if (HeapTupleIsValid(systable_getnext(sscan)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("policy \"%s\" for table \"%s\" already exists",
+ stmt->newname, RelationGetRelationName(target_table))));
+
+ systable_endscan(sscan);
+
+ /* Second pass -- find existing policy and update */
+ /* Add key - policy's relation id. */
+ ScanKeyInit(&skey[0],
+ Anum_pg_policy_polrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(table_id));
+
+ /* Add key - policy's name. */
+ ScanKeyInit(&skey[1],
+ Anum_pg_policy_polname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(stmt->subname));
+
+ sscan = systable_beginscan(pg_policy_rel,
+ PolicyPolrelidPolnameIndexId, true, NULL, 2,
+ skey);
+
+ policy_tuple = systable_getnext(sscan);
+
+ /* Complain if we did not find the policy */
+ if (!HeapTupleIsValid(policy_tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("policy \"%s\" for table \"%s\" does not exist",
+ stmt->subname, RelationGetRelationName(target_table))));
+
+ opoloid = ((Form_pg_policy) GETSTRUCT(policy_tuple))->oid;
+
+ policy_tuple = heap_copytuple(policy_tuple);
+
+ namestrcpy(&((Form_pg_policy) GETSTRUCT(policy_tuple))->polname,
+ stmt->newname);
+
+ CatalogTupleUpdate(pg_policy_rel, &policy_tuple->t_self, policy_tuple);
+
+ InvokeObjectPostAlterHook(PolicyRelationId, opoloid, 0);
+
+ ObjectAddressSet(address, PolicyRelationId, opoloid);
+
+ /*
+ * Invalidate relation's relcache entry so that other backends (and this
+ * one too!) are sent SI message to make them rebuild relcache entries.
+ * (Ideally this should happen automatically...)
+ */
+ CacheInvalidateRelcache(target_table);
+
+ /* Clean up. */
+ systable_endscan(sscan);
+ table_close(pg_policy_rel, RowExclusiveLock);
+ relation_close(target_table, NoLock);
+
+ return address;
+}
+
+/*
+ * get_relation_policy_oid - Look up a policy by name to find its OID
+ *
+ * If missing_ok is false, throw an error if policy not found. If
+ * true, just return InvalidOid.
+ */
+Oid
+get_relation_policy_oid(Oid relid, const char *policy_name, bool missing_ok)
+{
+ Relation pg_policy_rel;
+ ScanKeyData skey[2];
+ SysScanDesc sscan;
+ HeapTuple policy_tuple;
+ Oid policy_oid;
+
+ pg_policy_rel = table_open(PolicyRelationId, AccessShareLock);
+
+ /* Add key - policy's relation id. */
+ ScanKeyInit(&skey[0],
+ Anum_pg_policy_polrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid));
+
+ /* Add key - policy's name. */
+ ScanKeyInit(&skey[1],
+ Anum_pg_policy_polname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(policy_name));
+
+ sscan = systable_beginscan(pg_policy_rel,
+ PolicyPolrelidPolnameIndexId, true, NULL, 2,
+ skey);
+
+ policy_tuple = systable_getnext(sscan);
+
+ if (!HeapTupleIsValid(policy_tuple))
+ {
+ if (!missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("policy \"%s\" for table \"%s\" does not exist",
+ policy_name, get_rel_name(relid))));
+
+ policy_oid = InvalidOid;
+ }
+ else
+ policy_oid = ((Form_pg_policy) GETSTRUCT(policy_tuple))->oid;
+
+ /* Clean up. */
+ systable_endscan(sscan);
+ table_close(pg_policy_rel, AccessShareLock);
+
+ return policy_oid;
+}
+
+/*
+ * relation_has_policies - Determine if relation has any policies
+ */
+bool
+relation_has_policies(Relation rel)
+{
+ Relation catalog;
+ ScanKeyData skey;
+ SysScanDesc sscan;
+ HeapTuple policy_tuple;
+ bool ret = false;
+
+ catalog = table_open(PolicyRelationId, AccessShareLock);
+ ScanKeyInit(&skey,
+ Anum_pg_policy_polrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+ sscan = systable_beginscan(catalog, PolicyPolrelidPolnameIndexId, true,
+ NULL, 1, &skey);
+ policy_tuple = systable_getnext(sscan);
+ if (HeapTupleIsValid(policy_tuple))
+ ret = true;
+
+ systable_endscan(sscan);
+ table_close(catalog, AccessShareLock);
+
+ return ret;
+}
diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c
new file mode 100644
index 0000000..9902c5c
--- /dev/null
+++ b/src/backend/commands/portalcmds.c
@@ -0,0 +1,496 @@
+/*-------------------------------------------------------------------------
+ *
+ * portalcmds.c
+ * Utility commands affecting portals (that is, SQL cursor commands)
+ *
+ * Note: see also tcop/pquery.c, which implements portal operations for
+ * the FE/BE protocol. This module uses pquery.c for some operations.
+ * And both modules depend on utils/mmgr/portalmem.c, which controls
+ * storage management for portals (but doesn't run any queries in them).
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/portalcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "access/xact.h"
+#include "commands/portalcmds.h"
+#include "executor/executor.h"
+#include "executor/tstoreReceiver.h"
+#include "miscadmin.h"
+#include "rewrite/rewriteHandler.h"
+#include "tcop/pquery.h"
+#include "tcop/tcopprot.h"
+#include "utils/memutils.h"
+#include "utils/snapmgr.h"
+
+
+/*
+ * PerformCursorOpen
+ * Execute SQL DECLARE CURSOR command.
+ */
+void
+PerformCursorOpen(ParseState *pstate, DeclareCursorStmt *cstmt, ParamListInfo params,
+ bool isTopLevel)
+{
+ Query *query = castNode(Query, cstmt->query);
+ List *rewritten;
+ PlannedStmt *plan;
+ Portal portal;
+ MemoryContext oldContext;
+ char *queryString;
+
+ /*
+ * Disallow empty-string cursor name (conflicts with protocol-level
+ * unnamed portal).
+ */
+ if (!cstmt->portalname || cstmt->portalname[0] == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_CURSOR_NAME),
+ errmsg("invalid cursor name: must not be empty")));
+
+ /*
+ * If this is a non-holdable cursor, we require that this statement has
+ * been executed inside a transaction block (or else, it would have no
+ * user-visible effect).
+ */
+ if (!(cstmt->options & CURSOR_OPT_HOLD))
+ RequireTransactionBlock(isTopLevel, "DECLARE CURSOR");
+ else if (InSecurityRestrictedOperation())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("cannot create a cursor WITH HOLD within security-restricted operation")));
+
+ /*
+ * Parse analysis was done already, but we still have to run the rule
+ * rewriter. We do not do AcquireRewriteLocks: we assume the query either
+ * came straight from the parser, or suitable locks were acquired by
+ * plancache.c.
+ */
+ rewritten = QueryRewrite(query);
+
+ /* SELECT should never rewrite to more or less than one query */
+ if (list_length(rewritten) != 1)
+ elog(ERROR, "non-SELECT statement in DECLARE CURSOR");
+
+ query = linitial_node(Query, rewritten);
+
+ if (query->commandType != CMD_SELECT)
+ elog(ERROR, "non-SELECT statement in DECLARE CURSOR");
+
+ /* Plan the query, applying the specified options */
+ plan = pg_plan_query(query, pstate->p_sourcetext, cstmt->options, params);
+
+ /*
+ * Create a portal and copy the plan and query string into its memory.
+ */
+ portal = CreatePortal(cstmt->portalname, false, false);
+
+ oldContext = MemoryContextSwitchTo(portal->portalContext);
+
+ plan = copyObject(plan);
+
+ queryString = pstrdup(pstate->p_sourcetext);
+
+ PortalDefineQuery(portal,
+ NULL,
+ queryString,
+ CMDTAG_SELECT, /* cursor's query is always a SELECT */
+ list_make1(plan),
+ NULL);
+
+ /*----------
+ * Also copy the outer portal's parameter list into the inner portal's
+ * memory context. We want to pass down the parameter values in case we
+ * had a command like
+ * DECLARE c CURSOR FOR SELECT ... WHERE foo = $1
+ * This will have been parsed using the outer parameter set and the
+ * parameter value needs to be preserved for use when the cursor is
+ * executed.
+ *----------
+ */
+ params = copyParamList(params);
+
+ MemoryContextSwitchTo(oldContext);
+
+ /*
+ * Set up options for portal.
+ *
+ * If the user didn't specify a SCROLL type, allow or disallow scrolling
+ * based on whether it would require any additional runtime overhead to do
+ * so. Also, we disallow scrolling for FOR UPDATE cursors.
+ */
+ portal->cursorOptions = cstmt->options;
+ if (!(portal->cursorOptions & (CURSOR_OPT_SCROLL | CURSOR_OPT_NO_SCROLL)))
+ {
+ if (plan->rowMarks == NIL &&
+ ExecSupportsBackwardScan(plan->planTree))
+ portal->cursorOptions |= CURSOR_OPT_SCROLL;
+ else
+ portal->cursorOptions |= CURSOR_OPT_NO_SCROLL;
+ }
+
+ /*
+ * Start execution, inserting parameters if any.
+ */
+ PortalStart(portal, params, 0, GetActiveSnapshot());
+
+ Assert(portal->strategy == PORTAL_ONE_SELECT);
+
+ /*
+ * We're done; the query won't actually be run until PerformPortalFetch is
+ * called.
+ */
+}
+
+/*
+ * PerformPortalFetch
+ * Execute SQL FETCH or MOVE command.
+ *
+ * stmt: parsetree node for command
+ * dest: where to send results
+ * qc: where to store a command completion status data.
+ *
+ * qc may be NULL if caller doesn't want status data.
+ */
+void
+PerformPortalFetch(FetchStmt *stmt,
+ DestReceiver *dest,
+ QueryCompletion *qc)
+{
+ Portal portal;
+ uint64 nprocessed;
+
+ /*
+ * Disallow empty-string cursor name (conflicts with protocol-level
+ * unnamed portal).
+ */
+ if (!stmt->portalname || stmt->portalname[0] == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_CURSOR_NAME),
+ errmsg("invalid cursor name: must not be empty")));
+
+ /* get the portal from the portal name */
+ portal = GetPortalByName(stmt->portalname);
+ if (!PortalIsValid(portal))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_CURSOR),
+ errmsg("cursor \"%s\" does not exist", stmt->portalname)));
+ return; /* keep compiler happy */
+ }
+
+ /* Adjust dest if needed. MOVE wants destination DestNone */
+ if (stmt->ismove)
+ dest = None_Receiver;
+
+ /* Do it */
+ nprocessed = PortalRunFetch(portal,
+ stmt->direction,
+ stmt->howMany,
+ dest);
+
+ /* Return command status if wanted */
+ if (qc)
+ SetQueryCompletion(qc, stmt->ismove ? CMDTAG_MOVE : CMDTAG_FETCH,
+ nprocessed);
+}
+
+/*
+ * PerformPortalClose
+ * Close a cursor.
+ */
+void
+PerformPortalClose(const char *name)
+{
+ Portal portal;
+
+ /* NULL means CLOSE ALL */
+ if (name == NULL)
+ {
+ PortalHashTableDeleteAll();
+ return;
+ }
+
+ /*
+ * Disallow empty-string cursor name (conflicts with protocol-level
+ * unnamed portal).
+ */
+ if (name[0] == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_CURSOR_NAME),
+ errmsg("invalid cursor name: must not be empty")));
+
+ /*
+ * get the portal from the portal name
+ */
+ portal = GetPortalByName(name);
+ if (!PortalIsValid(portal))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_CURSOR),
+ errmsg("cursor \"%s\" does not exist", name)));
+ return; /* keep compiler happy */
+ }
+
+ /*
+ * Note: PortalCleanup is called as a side-effect, if not already done.
+ */
+ PortalDrop(portal, false);
+}
+
+/*
+ * PortalCleanup
+ *
+ * Clean up a portal when it's dropped. This is the standard cleanup hook
+ * for portals.
+ *
+ * Note: if portal->status is PORTAL_FAILED, we are probably being called
+ * during error abort, and must be careful to avoid doing anything that
+ * is likely to fail again.
+ */
+void
+PortalCleanup(Portal portal)
+{
+ QueryDesc *queryDesc;
+
+ /*
+ * sanity checks
+ */
+ AssertArg(PortalIsValid(portal));
+ AssertArg(portal->cleanup == PortalCleanup);
+
+ /*
+ * Shut down executor, if still running. We skip this during error abort,
+ * since other mechanisms will take care of releasing executor resources,
+ * and we can't be sure that ExecutorEnd itself wouldn't fail.
+ */
+ queryDesc = portal->queryDesc;
+ if (queryDesc)
+ {
+ /*
+ * Reset the queryDesc before anything else. This prevents us from
+ * trying to shut down the executor twice, in case of an error below.
+ * The transaction abort mechanisms will take care of resource cleanup
+ * in such a case.
+ */
+ portal->queryDesc = NULL;
+
+ if (portal->status != PORTAL_FAILED)
+ {
+ ResourceOwner saveResourceOwner;
+
+ /* We must make the portal's resource owner current */
+ saveResourceOwner = CurrentResourceOwner;
+ if (portal->resowner)
+ CurrentResourceOwner = portal->resowner;
+
+ ExecutorFinish(queryDesc);
+ ExecutorEnd(queryDesc);
+ FreeQueryDesc(queryDesc);
+
+ CurrentResourceOwner = saveResourceOwner;
+ }
+ }
+}
+
+/*
+ * PersistHoldablePortal
+ *
+ * Prepare the specified Portal for access outside of the current
+ * transaction. When this function returns, all future accesses to the
+ * portal must be done via the Tuplestore (not by invoking the
+ * executor).
+ */
+void
+PersistHoldablePortal(Portal portal)
+{
+ QueryDesc *queryDesc = portal->queryDesc;
+ Portal saveActivePortal;
+ ResourceOwner saveResourceOwner;
+ MemoryContext savePortalContext;
+ MemoryContext oldcxt;
+
+ /*
+ * If we're preserving a holdable portal, we had better be inside the
+ * transaction that originally created it.
+ */
+ Assert(portal->createSubid != InvalidSubTransactionId);
+ Assert(queryDesc != NULL);
+
+ /*
+ * Caller must have created the tuplestore already ... but not a snapshot.
+ */
+ Assert(portal->holdContext != NULL);
+ Assert(portal->holdStore != NULL);
+ Assert(portal->holdSnapshot == NULL);
+
+ /*
+ * Before closing down the executor, we must copy the tupdesc into
+ * long-term memory, since it was created in executor memory.
+ */
+ oldcxt = MemoryContextSwitchTo(portal->holdContext);
+
+ portal->tupDesc = CreateTupleDescCopy(portal->tupDesc);
+
+ MemoryContextSwitchTo(oldcxt);
+
+ /*
+ * Check for improper portal use, and mark portal active.
+ */
+ MarkPortalActive(portal);
+
+ /*
+ * Set up global portal context pointers.
+ */
+ saveActivePortal = ActivePortal;
+ saveResourceOwner = CurrentResourceOwner;
+ savePortalContext = PortalContext;
+ PG_TRY();
+ {
+ ScanDirection direction = ForwardScanDirection;
+
+ ActivePortal = portal;
+ if (portal->resowner)
+ CurrentResourceOwner = portal->resowner;
+ PortalContext = portal->portalContext;
+
+ MemoryContextSwitchTo(PortalContext);
+
+ PushActiveSnapshot(queryDesc->snapshot);
+
+ /*
+ * If the portal is marked scrollable, we need to store the entire
+ * result set in the tuplestore, so that subsequent backward FETCHs
+ * can be processed. Otherwise, store only the not-yet-fetched rows.
+ * (The latter is not only more efficient, but avoids semantic
+ * problems if the query's output isn't stable.)
+ *
+ * In the no-scroll case, tuple indexes in the tuplestore will not
+ * match the cursor's nominal position (portalPos). Currently this
+ * causes no difficulty because we only navigate in the tuplestore by
+ * relative position, except for the tuplestore_skiptuples call below
+ * and the tuplestore_rescan call in DoPortalRewind, both of which are
+ * disabled for no-scroll cursors. But someday we might need to track
+ * the offset between the holdStore and the cursor's nominal position
+ * explicitly.
+ */
+ if (portal->cursorOptions & CURSOR_OPT_SCROLL)
+ {
+ ExecutorRewind(queryDesc);
+ }
+ else
+ {
+ /*
+ * If we already reached end-of-query, set the direction to
+ * NoMovement to avoid trying to fetch any tuples. (This check
+ * exists because not all plan node types are robust about being
+ * called again if they've already returned NULL once.) We'll
+ * still set up an empty tuplestore, though, to keep this from
+ * being a special case later.
+ */
+ if (portal->atEnd)
+ direction = NoMovementScanDirection;
+ }
+
+ /*
+ * Change the destination to output to the tuplestore. Note we tell
+ * the tuplestore receiver to detoast all data passed through it; this
+ * makes it safe to not keep a snapshot associated with the data.
+ */
+ queryDesc->dest = CreateDestReceiver(DestTuplestore);
+ SetTuplestoreDestReceiverParams(queryDesc->dest,
+ portal->holdStore,
+ portal->holdContext,
+ true,
+ NULL,
+ NULL);
+
+ /* Fetch the result set into the tuplestore */
+ ExecutorRun(queryDesc, direction, 0L, false);
+
+ queryDesc->dest->rDestroy(queryDesc->dest);
+ queryDesc->dest = NULL;
+
+ /*
+ * Now shut down the inner executor.
+ */
+ portal->queryDesc = NULL; /* prevent double shutdown */
+ ExecutorFinish(queryDesc);
+ ExecutorEnd(queryDesc);
+ FreeQueryDesc(queryDesc);
+
+ /*
+ * Set the position in the result set.
+ */
+ MemoryContextSwitchTo(portal->holdContext);
+
+ if (portal->atEnd)
+ {
+ /*
+ * Just force the tuplestore forward to its end. The size of the
+ * skip request here is arbitrary.
+ */
+ while (tuplestore_skiptuples(portal->holdStore, 1000000, true))
+ /* continue */ ;
+ }
+ else
+ {
+ tuplestore_rescan(portal->holdStore);
+
+ /*
+ * In the no-scroll case, the start of the tuplestore is exactly
+ * where we want to be, so no repositioning is wanted.
+ */
+ if (portal->cursorOptions & CURSOR_OPT_SCROLL)
+ {
+ if (!tuplestore_skiptuples(portal->holdStore,
+ portal->portalPos,
+ true))
+ elog(ERROR, "unexpected end of tuple stream");
+ }
+ }
+ }
+ PG_CATCH();
+ {
+ /* Uncaught error while executing portal: mark it dead */
+ MarkPortalFailed(portal);
+
+ /* Restore global vars and propagate error */
+ ActivePortal = saveActivePortal;
+ CurrentResourceOwner = saveResourceOwner;
+ PortalContext = savePortalContext;
+
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ MemoryContextSwitchTo(oldcxt);
+
+ /* Mark portal not active */
+ portal->status = PORTAL_READY;
+
+ ActivePortal = saveActivePortal;
+ CurrentResourceOwner = saveResourceOwner;
+ PortalContext = savePortalContext;
+
+ PopActiveSnapshot();
+
+ /*
+ * We can now release any subsidiary memory of the portal's context; we'll
+ * never use it again. The executor already dropped its context, but this
+ * will clean up anything that glommed onto the portal's context via
+ * PortalContext.
+ */
+ MemoryContextDeleteChildren(portal->portalContext);
+}
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
new file mode 100644
index 0000000..fc5c7f9
--- /dev/null
+++ b/src/backend/commands/prepare.c
@@ -0,0 +1,729 @@
+/*-------------------------------------------------------------------------
+ *
+ * prepare.c
+ * Prepareable SQL statements via PREPARE, EXECUTE and DEALLOCATE
+ *
+ * This module also implements storage of prepared statements that are
+ * accessed via the extended FE/BE query protocol.
+ *
+ *
+ * Copyright (c) 2002-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/commands/prepare.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "access/xact.h"
+#include "catalog/pg_type.h"
+#include "commands/createas.h"
+#include "commands/prepare.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/analyze.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_type.h"
+#include "rewrite/rewriteHandler.h"
+#include "tcop/pquery.h"
+#include "tcop/utility.h"
+#include "utils/builtins.h"
+#include "utils/snapmgr.h"
+#include "utils/timestamp.h"
+
+
+/*
+ * The hash table in which prepared queries are stored. This is
+ * per-backend: query plans are not shared between backends.
+ * The keys for this hash table are the arguments to PREPARE and EXECUTE
+ * (statement names); the entries are PreparedStatement structs.
+ */
+static HTAB *prepared_queries = NULL;
+
+static void InitQueryHashTable(void);
+static ParamListInfo EvaluateParams(ParseState *pstate,
+ PreparedStatement *pstmt, List *params,
+ EState *estate);
+static Datum build_regtype_array(Oid *param_types, int num_params);
+
+/*
+ * Implements the 'PREPARE' utility statement.
+ */
+void
+PrepareQuery(ParseState *pstate, PrepareStmt *stmt,
+ int stmt_location, int stmt_len)
+{
+ RawStmt *rawstmt;
+ CachedPlanSource *plansource;
+ Oid *argtypes = NULL;
+ int nargs;
+ List *query_list;
+
+ /*
+ * Disallow empty-string statement name (conflicts with protocol-level
+ * unnamed statement).
+ */
+ if (!stmt->name || stmt->name[0] == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PSTATEMENT_DEFINITION),
+ errmsg("invalid statement name: must not be empty")));
+
+ /*
+ * Need to wrap the contained statement in a RawStmt node to pass it to
+ * parse analysis.
+ */
+ rawstmt = makeNode(RawStmt);
+ rawstmt->stmt = stmt->query;
+ rawstmt->stmt_location = stmt_location;
+ rawstmt->stmt_len = stmt_len;
+
+ /*
+ * Create the CachedPlanSource before we do parse analysis, since it needs
+ * to see the unmodified raw parse tree.
+ */
+ plansource = CreateCachedPlan(rawstmt, pstate->p_sourcetext,
+ CreateCommandTag(stmt->query));
+
+ /* Transform list of TypeNames to array of type OIDs */
+ nargs = list_length(stmt->argtypes);
+
+ if (nargs)
+ {
+ int i;
+ ListCell *l;
+
+ argtypes = (Oid *) palloc(nargs * sizeof(Oid));
+ i = 0;
+
+ foreach(l, stmt->argtypes)
+ {
+ TypeName *tn = lfirst(l);
+ Oid toid = typenameTypeId(pstate, tn);
+
+ argtypes[i++] = toid;
+ }
+ }
+
+ /*
+ * Analyze the statement using these parameter types (any parameters
+ * passed in from above us will not be visible to it), allowing
+ * information about unknown parameters to be deduced from context.
+ * Rewrite the query. The result could be 0, 1, or many queries.
+ */
+ query_list = pg_analyze_and_rewrite_varparams(rawstmt, pstate->p_sourcetext,
+ &argtypes, &nargs, NULL);
+
+ /* Finish filling in the CachedPlanSource */
+ CompleteCachedPlan(plansource,
+ query_list,
+ NULL,
+ argtypes,
+ nargs,
+ NULL,
+ NULL,
+ CURSOR_OPT_PARALLEL_OK, /* allow parallel mode */
+ true); /* fixed result */
+
+ /*
+ * Save the results.
+ */
+ StorePreparedStatement(stmt->name,
+ plansource,
+ true);
+}
+
+/*
+ * ExecuteQuery --- implement the 'EXECUTE' utility statement.
+ *
+ * This code also supports CREATE TABLE ... AS EXECUTE. That case is
+ * indicated by passing a non-null intoClause. The DestReceiver is already
+ * set up correctly for CREATE TABLE AS, but we still have to make a few
+ * other adjustments here.
+ */
+void
+ExecuteQuery(ParseState *pstate,
+ ExecuteStmt *stmt, IntoClause *intoClause,
+ ParamListInfo params,
+ DestReceiver *dest, QueryCompletion *qc)
+{
+ PreparedStatement *entry;
+ CachedPlan *cplan;
+ List *plan_list;
+ ParamListInfo paramLI = NULL;
+ EState *estate = NULL;
+ Portal portal;
+ char *query_string;
+ int eflags;
+ long count;
+
+ /* Look it up in the hash table */
+ entry = FetchPreparedStatement(stmt->name, true);
+
+ /* Shouldn't find a non-fixed-result cached plan */
+ if (!entry->plansource->fixed_result)
+ elog(ERROR, "EXECUTE does not support variable-result cached plans");
+
+ /* Evaluate parameters, if any */
+ if (entry->plansource->num_params > 0)
+ {
+ /*
+ * Need an EState to evaluate parameters; must not delete it till end
+ * of query, in case parameters are pass-by-reference. Note that the
+ * passed-in "params" could possibly be referenced in the parameter
+ * expressions.
+ */
+ estate = CreateExecutorState();
+ estate->es_param_list_info = params;
+ paramLI = EvaluateParams(pstate, entry, stmt->params, estate);
+ }
+
+ /* Create a new portal to run the query in */
+ portal = CreateNewPortal();
+ /* Don't display the portal in pg_cursors, it is for internal use only */
+ portal->visible = false;
+
+ /* Copy the plan's saved query string into the portal's memory */
+ query_string = MemoryContextStrdup(portal->portalContext,
+ entry->plansource->query_string);
+
+ /* Replan if needed, and increment plan refcount for portal */
+ cplan = GetCachedPlan(entry->plansource, paramLI, NULL, NULL);
+ plan_list = cplan->stmt_list;
+
+ /*
+ * DO NOT add any logic that could possibly throw an error between
+ * GetCachedPlan and PortalDefineQuery, or you'll leak the plan refcount.
+ */
+ PortalDefineQuery(portal,
+ NULL,
+ query_string,
+ entry->plansource->commandTag,
+ plan_list,
+ cplan);
+
+ /*
+ * For CREATE TABLE ... AS EXECUTE, we must verify that the prepared
+ * statement is one that produces tuples. Currently we insist that it be
+ * a plain old SELECT. In future we might consider supporting other
+ * things such as INSERT ... RETURNING, but there are a couple of issues
+ * to be settled first, notably how WITH NO DATA should be handled in such
+ * a case (do we really want to suppress execution?) and how to pass down
+ * the OID-determining eflags (PortalStart won't handle them in such a
+ * case, and for that matter it's not clear the executor will either).
+ *
+ * For CREATE TABLE ... AS EXECUTE, we also have to ensure that the proper
+ * eflags and fetch count are passed to PortalStart/PortalRun.
+ */
+ if (intoClause)
+ {
+ PlannedStmt *pstmt;
+
+ if (list_length(plan_list) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("prepared statement is not a SELECT")));
+ pstmt = linitial_node(PlannedStmt, plan_list);
+ if (pstmt->commandType != CMD_SELECT)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("prepared statement is not a SELECT")));
+
+ /* Set appropriate eflags */
+ eflags = GetIntoRelEFlags(intoClause);
+
+ /* And tell PortalRun whether to run to completion or not */
+ if (intoClause->skipData)
+ count = 0;
+ else
+ count = FETCH_ALL;
+ }
+ else
+ {
+ /* Plain old EXECUTE */
+ eflags = 0;
+ count = FETCH_ALL;
+ }
+
+ /*
+ * Run the portal as appropriate.
+ */
+ PortalStart(portal, paramLI, eflags, GetActiveSnapshot());
+
+ (void) PortalRun(portal, count, false, true, dest, dest, qc);
+
+ PortalDrop(portal, false);
+
+ if (estate)
+ FreeExecutorState(estate);
+
+ /* No need to pfree other memory, MemoryContext will be reset */
+}
+
+/*
+ * EvaluateParams: evaluate a list of parameters.
+ *
+ * pstate: parse state
+ * pstmt: statement we are getting parameters for.
+ * params: list of given parameter expressions (raw parser output!)
+ * estate: executor state to use.
+ *
+ * Returns a filled-in ParamListInfo -- this can later be passed to
+ * CreateQueryDesc(), which allows the executor to make use of the parameters
+ * during query execution.
+ */
+static ParamListInfo
+EvaluateParams(ParseState *pstate, PreparedStatement *pstmt, List *params,
+ EState *estate)
+{
+ Oid *param_types = pstmt->plansource->param_types;
+ int num_params = pstmt->plansource->num_params;
+ int nparams = list_length(params);
+ ParamListInfo paramLI;
+ List *exprstates;
+ ListCell *l;
+ int i;
+
+ if (nparams != num_params)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("wrong number of parameters for prepared statement \"%s\"",
+ pstmt->stmt_name),
+ errdetail("Expected %d parameters but got %d.",
+ num_params, nparams)));
+
+ /* Quick exit if no parameters */
+ if (num_params == 0)
+ return NULL;
+
+ /*
+ * We have to run parse analysis for the expressions. Since the parser is
+ * not cool about scribbling on its input, copy first.
+ */
+ params = copyObject(params);
+
+ i = 0;
+ foreach(l, params)
+ {
+ Node *expr = lfirst(l);
+ Oid expected_type_id = param_types[i];
+ Oid given_type_id;
+
+ expr = transformExpr(pstate, expr, EXPR_KIND_EXECUTE_PARAMETER);
+
+ given_type_id = exprType(expr);
+
+ expr = coerce_to_target_type(pstate, expr, given_type_id,
+ expected_type_id, -1,
+ COERCION_ASSIGNMENT,
+ COERCE_IMPLICIT_CAST,
+ -1);
+
+ if (expr == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("parameter $%d of type %s cannot be coerced to the expected type %s",
+ i + 1,
+ format_type_be(given_type_id),
+ format_type_be(expected_type_id)),
+ errhint("You will need to rewrite or cast the expression."),
+ parser_errposition(pstate, exprLocation(lfirst(l)))));
+
+ /* Take care of collations in the finished expression. */
+ assign_expr_collations(pstate, expr);
+
+ lfirst(l) = expr;
+ i++;
+ }
+
+ /* Prepare the expressions for execution */
+ exprstates = ExecPrepareExprList(params, estate);
+
+ paramLI = makeParamList(num_params);
+
+ i = 0;
+ foreach(l, exprstates)
+ {
+ ExprState *n = (ExprState *) lfirst(l);
+ ParamExternData *prm = &paramLI->params[i];
+
+ prm->ptype = param_types[i];
+ prm->pflags = PARAM_FLAG_CONST;
+ prm->value = ExecEvalExprSwitchContext(n,
+ GetPerTupleExprContext(estate),
+ &prm->isnull);
+
+ i++;
+ }
+
+ return paramLI;
+}
+
+
+/*
+ * Initialize query hash table upon first use.
+ */
+static void
+InitQueryHashTable(void)
+{
+ HASHCTL hash_ctl;
+
+ hash_ctl.keysize = NAMEDATALEN;
+ hash_ctl.entrysize = sizeof(PreparedStatement);
+
+ prepared_queries = hash_create("Prepared Queries",
+ 32,
+ &hash_ctl,
+ HASH_ELEM | HASH_STRINGS);
+}
+
+/*
+ * Store all the data pertaining to a query in the hash table using
+ * the specified key. The passed CachedPlanSource should be "unsaved"
+ * in case we get an error here; we'll save it once we've created the hash
+ * table entry.
+ */
+void
+StorePreparedStatement(const char *stmt_name,
+ CachedPlanSource *plansource,
+ bool from_sql)
+{
+ PreparedStatement *entry;
+ TimestampTz cur_ts = GetCurrentStatementStartTimestamp();
+ bool found;
+
+ /* Initialize the hash table, if necessary */
+ if (!prepared_queries)
+ InitQueryHashTable();
+
+ /* Add entry to hash table */
+ entry = (PreparedStatement *) hash_search(prepared_queries,
+ stmt_name,
+ HASH_ENTER,
+ &found);
+
+ /* Shouldn't get a duplicate entry */
+ if (found)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_PSTATEMENT),
+ errmsg("prepared statement \"%s\" already exists",
+ stmt_name)));
+
+ /* Fill in the hash table entry */
+ entry->plansource = plansource;
+ entry->from_sql = from_sql;
+ entry->prepare_time = cur_ts;
+
+ /* Now it's safe to move the CachedPlanSource to permanent memory */
+ SaveCachedPlan(plansource);
+}
+
+/*
+ * Lookup an existing query in the hash table. If the query does not
+ * actually exist, throw ereport(ERROR) or return NULL per second parameter.
+ *
+ * Note: this does not force the referenced plancache entry to be valid,
+ * since not all callers care.
+ */
+PreparedStatement *
+FetchPreparedStatement(const char *stmt_name, bool throwError)
+{
+ PreparedStatement *entry;
+
+ /*
+ * If the hash table hasn't been initialized, it can't be storing
+ * anything, therefore it couldn't possibly store our plan.
+ */
+ if (prepared_queries)
+ entry = (PreparedStatement *) hash_search(prepared_queries,
+ stmt_name,
+ HASH_FIND,
+ NULL);
+ else
+ entry = NULL;
+
+ if (!entry && throwError)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_PSTATEMENT),
+ errmsg("prepared statement \"%s\" does not exist",
+ stmt_name)));
+
+ return entry;
+}
+
+/*
+ * Given a prepared statement, determine the result tupledesc it will
+ * produce. Returns NULL if the execution will not return tuples.
+ *
+ * Note: the result is created or copied into current memory context.
+ */
+TupleDesc
+FetchPreparedStatementResultDesc(PreparedStatement *stmt)
+{
+ /*
+ * Since we don't allow prepared statements' result tupdescs to change,
+ * there's no need to worry about revalidating the cached plan here.
+ */
+ Assert(stmt->plansource->fixed_result);
+ if (stmt->plansource->resultDesc)
+ return CreateTupleDescCopy(stmt->plansource->resultDesc);
+ else
+ return NULL;
+}
+
+/*
+ * Given a prepared statement that returns tuples, extract the query
+ * targetlist. Returns NIL if the statement doesn't have a determinable
+ * targetlist.
+ *
+ * Note: this is pretty ugly, but since it's only used in corner cases like
+ * Describe Statement on an EXECUTE command, we don't worry too much about
+ * efficiency.
+ */
+List *
+FetchPreparedStatementTargetList(PreparedStatement *stmt)
+{
+ List *tlist;
+
+ /* Get the plan's primary targetlist */
+ tlist = CachedPlanGetTargetList(stmt->plansource, NULL);
+
+ /* Copy into caller's context in case plan gets invalidated */
+ return copyObject(tlist);
+}
+
+/*
+ * Implements the 'DEALLOCATE' utility statement: deletes the
+ * specified plan from storage.
+ */
+void
+DeallocateQuery(DeallocateStmt *stmt)
+{
+ if (stmt->name)
+ DropPreparedStatement(stmt->name, true);
+ else
+ DropAllPreparedStatements();
+}
+
+/*
+ * Internal version of DEALLOCATE
+ *
+ * If showError is false, dropping a nonexistent statement is a no-op.
+ */
+void
+DropPreparedStatement(const char *stmt_name, bool showError)
+{
+ PreparedStatement *entry;
+
+ /* Find the query's hash table entry; raise error if wanted */
+ entry = FetchPreparedStatement(stmt_name, showError);
+
+ if (entry)
+ {
+ /* Release the plancache entry */
+ DropCachedPlan(entry->plansource);
+
+ /* Now we can remove the hash table entry */
+ hash_search(prepared_queries, entry->stmt_name, HASH_REMOVE, NULL);
+ }
+}
+
+/*
+ * Drop all cached statements.
+ */
+void
+DropAllPreparedStatements(void)
+{
+ HASH_SEQ_STATUS seq;
+ PreparedStatement *entry;
+
+ /* nothing cached */
+ if (!prepared_queries)
+ return;
+
+ /* walk over cache */
+ hash_seq_init(&seq, prepared_queries);
+ while ((entry = hash_seq_search(&seq)) != NULL)
+ {
+ /* Release the plancache entry */
+ DropCachedPlan(entry->plansource);
+
+ /* Now we can remove the hash table entry */
+ hash_search(prepared_queries, entry->stmt_name, HASH_REMOVE, NULL);
+ }
+}
+
+/*
+ * Implements the 'EXPLAIN EXECUTE' utility statement.
+ *
+ * "into" is NULL unless we are doing EXPLAIN CREATE TABLE AS EXECUTE,
+ * in which case executing the query should result in creating that table.
+ *
+ * Note: the passed-in queryString is that of the EXPLAIN EXECUTE,
+ * not the original PREPARE; we get the latter string from the plancache.
+ */
+void
+ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
+ const char *queryString, ParamListInfo params,
+ QueryEnvironment *queryEnv)
+{
+ PreparedStatement *entry;
+ const char *query_string;
+ CachedPlan *cplan;
+ List *plan_list;
+ ListCell *p;
+ ParamListInfo paramLI = NULL;
+ EState *estate = NULL;
+ instr_time planstart;
+ instr_time planduration;
+ BufferUsage bufusage_start,
+ bufusage;
+
+ if (es->buffers)
+ bufusage_start = pgBufferUsage;
+ INSTR_TIME_SET_CURRENT(planstart);
+
+ /* Look it up in the hash table */
+ entry = FetchPreparedStatement(execstmt->name, true);
+
+ /* Shouldn't find a non-fixed-result cached plan */
+ if (!entry->plansource->fixed_result)
+ elog(ERROR, "EXPLAIN EXECUTE does not support variable-result cached plans");
+
+ query_string = entry->plansource->query_string;
+
+ /* Evaluate parameters, if any */
+ if (entry->plansource->num_params)
+ {
+ ParseState *pstate;
+
+ pstate = make_parsestate(NULL);
+ pstate->p_sourcetext = queryString;
+
+ /*
+ * Need an EState to evaluate parameters; must not delete it till end
+ * of query, in case parameters are pass-by-reference. Note that the
+ * passed-in "params" could possibly be referenced in the parameter
+ * expressions.
+ */
+ estate = CreateExecutorState();
+ estate->es_param_list_info = params;
+
+ paramLI = EvaluateParams(pstate, entry, execstmt->params, estate);
+ }
+
+ /* Replan if needed, and acquire a transient refcount */
+ cplan = GetCachedPlan(entry->plansource, paramLI,
+ CurrentResourceOwner, queryEnv);
+
+ INSTR_TIME_SET_CURRENT(planduration);
+ INSTR_TIME_SUBTRACT(planduration, planstart);
+
+ /* calc differences of buffer counters. */
+ if (es->buffers)
+ {
+ memset(&bufusage, 0, sizeof(BufferUsage));
+ BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
+ }
+
+ plan_list = cplan->stmt_list;
+
+ /* Explain each query */
+ foreach(p, plan_list)
+ {
+ PlannedStmt *pstmt = lfirst_node(PlannedStmt, p);
+
+ if (pstmt->commandType != CMD_UTILITY)
+ ExplainOnePlan(pstmt, into, es, query_string, paramLI, queryEnv,
+ &planduration, (es->buffers ? &bufusage : NULL));
+ else
+ ExplainOneUtility(pstmt->utilityStmt, into, es, query_string,
+ paramLI, queryEnv);
+
+ /* No need for CommandCounterIncrement, as ExplainOnePlan did it */
+
+ /* Separate plans with an appropriate separator */
+ if (lnext(plan_list, p) != NULL)
+ ExplainSeparatePlans(es);
+ }
+
+ if (estate)
+ FreeExecutorState(estate);
+
+ ReleaseCachedPlan(cplan, CurrentResourceOwner);
+}
+
+/*
+ * This set returning function reads all the prepared statements and
+ * returns a set of (name, statement, prepare_time, param_types, from_sql,
+ * generic_plans, custom_plans).
+ */
+Datum
+pg_prepared_statement(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+
+ /*
+ * We put all the tuples into a tuplestore in one scan of the hashtable.
+ * This avoids any issue of the hashtable possibly changing between calls.
+ */
+ InitMaterializedSRF(fcinfo, 0);
+
+ /* hash table might be uninitialized */
+ if (prepared_queries)
+ {
+ HASH_SEQ_STATUS hash_seq;
+ PreparedStatement *prep_stmt;
+
+ hash_seq_init(&hash_seq, prepared_queries);
+ while ((prep_stmt = hash_seq_search(&hash_seq)) != NULL)
+ {
+ Datum values[7];
+ bool nulls[7];
+
+ MemSet(nulls, 0, sizeof(nulls));
+
+ values[0] = CStringGetTextDatum(prep_stmt->stmt_name);
+ values[1] = CStringGetTextDatum(prep_stmt->plansource->query_string);
+ values[2] = TimestampTzGetDatum(prep_stmt->prepare_time);
+ values[3] = build_regtype_array(prep_stmt->plansource->param_types,
+ prep_stmt->plansource->num_params);
+ values[4] = BoolGetDatum(prep_stmt->from_sql);
+ values[5] = Int64GetDatumFast(prep_stmt->plansource->num_generic_plans);
+ values[6] = Int64GetDatumFast(prep_stmt->plansource->num_custom_plans);
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+ }
+
+ return (Datum) 0;
+}
+
+/*
+ * This utility function takes a C array of Oids, and returns a Datum
+ * pointing to a one-dimensional Postgres array of regtypes. An empty
+ * array is returned as a zero-element array, not NULL.
+ */
+static Datum
+build_regtype_array(Oid *param_types, int num_params)
+{
+ Datum *tmp_ary;
+ ArrayType *result;
+ int i;
+
+ tmp_ary = (Datum *) palloc(num_params * sizeof(Datum));
+
+ for (i = 0; i < num_params; i++)
+ tmp_ary[i] = ObjectIdGetDatum(param_types[i]);
+
+ /* XXX: this hardcodes assumptions about the regtype type */
+ result = construct_array(tmp_ary, num_params, REGTYPEOID,
+ 4, true, TYPALIGN_INT);
+ return PointerGetDatum(result);
+}
diff --git a/src/backend/commands/proclang.c b/src/backend/commands/proclang.c
new file mode 100644
index 0000000..4a093f4
--- /dev/null
+++ b/src/backend/commands/proclang.c
@@ -0,0 +1,239 @@
+/*-------------------------------------------------------------------------
+ *
+ * proclang.c
+ * PostgreSQL LANGUAGE support code.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/commands/proclang.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/table.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_language.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_type.h"
+#include "commands/defrem.h"
+#include "commands/proclang.h"
+#include "miscadmin.h"
+#include "parser/parse_func.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+/*
+ * CREATE LANGUAGE
+ */
+ObjectAddress
+CreateProceduralLanguage(CreatePLangStmt *stmt)
+{
+ const char *languageName = stmt->plname;
+ Oid languageOwner = GetUserId();
+ Oid handlerOid,
+ inlineOid,
+ valOid;
+ Oid funcrettype;
+ Oid funcargtypes[1];
+ Relation rel;
+ TupleDesc tupDesc;
+ Datum values[Natts_pg_language];
+ bool nulls[Natts_pg_language];
+ bool replaces[Natts_pg_language];
+ NameData langname;
+ HeapTuple oldtup;
+ HeapTuple tup;
+ Oid langoid;
+ bool is_update;
+ ObjectAddress myself,
+ referenced;
+ ObjectAddresses *addrs;
+
+ /*
+ * Check permission
+ */
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to create custom procedural language")));
+
+ /*
+ * Lookup the PL handler function and check that it is of the expected
+ * return type
+ */
+ Assert(stmt->plhandler);
+ handlerOid = LookupFuncName(stmt->plhandler, 0, NULL, false);
+ funcrettype = get_func_rettype(handlerOid);
+ if (funcrettype != LANGUAGE_HANDLEROID)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("function %s must return type %s",
+ NameListToString(stmt->plhandler), "language_handler")));
+
+ /* validate the inline function */
+ if (stmt->plinline)
+ {
+ funcargtypes[0] = INTERNALOID;
+ inlineOid = LookupFuncName(stmt->plinline, 1, funcargtypes, false);
+ /* return value is ignored, so we don't check the type */
+ }
+ else
+ inlineOid = InvalidOid;
+
+ /* validate the validator function */
+ if (stmt->plvalidator)
+ {
+ funcargtypes[0] = OIDOID;
+ valOid = LookupFuncName(stmt->plvalidator, 1, funcargtypes, false);
+ /* return value is ignored, so we don't check the type */
+ }
+ else
+ valOid = InvalidOid;
+
+ /* ok to create it */
+ rel = table_open(LanguageRelationId, RowExclusiveLock);
+ tupDesc = RelationGetDescr(rel);
+
+ /* Prepare data to be inserted */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+ memset(replaces, true, sizeof(replaces));
+
+ namestrcpy(&langname, languageName);
+ values[Anum_pg_language_lanname - 1] = NameGetDatum(&langname);
+ values[Anum_pg_language_lanowner - 1] = ObjectIdGetDatum(languageOwner);
+ values[Anum_pg_language_lanispl - 1] = BoolGetDatum(true);
+ values[Anum_pg_language_lanpltrusted - 1] = BoolGetDatum(stmt->pltrusted);
+ values[Anum_pg_language_lanplcallfoid - 1] = ObjectIdGetDatum(handlerOid);
+ values[Anum_pg_language_laninline - 1] = ObjectIdGetDatum(inlineOid);
+ values[Anum_pg_language_lanvalidator - 1] = ObjectIdGetDatum(valOid);
+ nulls[Anum_pg_language_lanacl - 1] = true;
+
+ /* Check for pre-existing definition */
+ oldtup = SearchSysCache1(LANGNAME, PointerGetDatum(languageName));
+
+ if (HeapTupleIsValid(oldtup))
+ {
+ Form_pg_language oldform = (Form_pg_language) GETSTRUCT(oldtup);
+
+ /* There is one; okay to replace it? */
+ if (!stmt->replace)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("language \"%s\" already exists", languageName)));
+
+ /* This is currently pointless, since we already checked superuser */
+#ifdef NOT_USED
+ if (!pg_language_ownercheck(oldform->oid, languageOwner))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_LANGUAGE,
+ languageName);
+#endif
+
+ /*
+ * Do not change existing oid, ownership or permissions. Note
+ * dependency-update code below has to agree with this decision.
+ */
+ replaces[Anum_pg_language_oid - 1] = false;
+ replaces[Anum_pg_language_lanowner - 1] = false;
+ replaces[Anum_pg_language_lanacl - 1] = false;
+
+ /* Okay, do it... */
+ tup = heap_modify_tuple(oldtup, tupDesc, values, nulls, replaces);
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+ langoid = oldform->oid;
+ ReleaseSysCache(oldtup);
+ is_update = true;
+ }
+ else
+ {
+ /* Creating a new language */
+ langoid = GetNewOidWithIndex(rel, LanguageOidIndexId,
+ Anum_pg_language_oid);
+ values[Anum_pg_language_oid - 1] = ObjectIdGetDatum(langoid);
+ tup = heap_form_tuple(tupDesc, values, nulls);
+ CatalogTupleInsert(rel, tup);
+ is_update = false;
+ }
+
+ /*
+ * Create dependencies for the new language. If we are updating an
+ * existing language, first delete any existing pg_depend entries.
+ * (However, since we are not changing ownership or permissions, the
+ * shared dependencies do *not* need to change, and we leave them alone.)
+ */
+ myself.classId = LanguageRelationId;
+ myself.objectId = langoid;
+ myself.objectSubId = 0;
+
+ if (is_update)
+ deleteDependencyRecordsFor(myself.classId, myself.objectId, true);
+
+ /* dependency on owner of language */
+ if (!is_update)
+ recordDependencyOnOwner(myself.classId, myself.objectId,
+ languageOwner);
+
+ /* dependency on extension */
+ recordDependencyOnCurrentExtension(&myself, is_update);
+
+ addrs = new_object_addresses();
+
+ /* dependency on the PL handler function */
+ ObjectAddressSet(referenced, ProcedureRelationId, handlerOid);
+ add_exact_object_address(&referenced, addrs);
+
+ /* dependency on the inline handler function, if any */
+ if (OidIsValid(inlineOid))
+ {
+ ObjectAddressSet(referenced, ProcedureRelationId, inlineOid);
+ add_exact_object_address(&referenced, addrs);
+ }
+
+ /* dependency on the validator function, if any */
+ if (OidIsValid(valOid))
+ {
+ ObjectAddressSet(referenced, ProcedureRelationId, valOid);
+ add_exact_object_address(&referenced, addrs);
+ }
+
+ record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
+ free_object_addresses(addrs);
+
+ /* Post creation hook for new procedural language */
+ InvokeObjectPostCreateHook(LanguageRelationId, myself.objectId, 0);
+
+ table_close(rel, RowExclusiveLock);
+
+ return myself;
+}
+
+/*
+ * get_language_oid - given a language name, look up the OID
+ *
+ * If missing_ok is false, throw an error if language name not found. If
+ * true, just return InvalidOid.
+ */
+Oid
+get_language_oid(const char *langname, bool missing_ok)
+{
+ Oid oid;
+
+ oid = GetSysCacheOid1(LANGNAME, Anum_pg_language_oid,
+ CStringGetDatum(langname));
+ if (!OidIsValid(oid) && !missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("language \"%s\" does not exist", langname)));
+ return oid;
+}
diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
new file mode 100644
index 0000000..473c72e
--- /dev/null
+++ b/src/backend/commands/publicationcmds.c
@@ -0,0 +1,2006 @@
+/*-------------------------------------------------------------------------
+ *
+ * publicationcmds.c
+ * publication manipulation
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/commands/publicationcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/objectaddress.h"
+#include "catalog/partition.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_publication.h"
+#include "catalog/pg_publication_namespace.h"
+#include "catalog/pg_publication_rel.h"
+#include "catalog/pg_type.h"
+#include "commands/dbcommands.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/publicationcmds.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/parse_clause.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_relation.h"
+#include "storage/lmgr.h"
+#include "utils/acl.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/catcache.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/varlena.h"
+
+
+/*
+ * Information used to validate the columns in the row filter expression. See
+ * contain_invalid_rfcolumn_walker for details.
+ */
+typedef struct rf_context
+{
+ Bitmapset *bms_replident; /* bitset of replica identity columns */
+ bool pubviaroot; /* true if we are validating the parent
+ * relation's row filter */
+ Oid relid; /* relid of the relation */
+ Oid parentid; /* relid of the parent relation */
+} rf_context;
+
+static List *OpenTableList(List *tables);
+static void CloseTableList(List *rels);
+static void LockSchemaList(List *schemalist);
+static void PublicationAddTables(Oid pubid, List *rels, bool if_not_exists,
+ AlterPublicationStmt *stmt);
+static void PublicationDropTables(Oid pubid, List *rels, bool missing_ok);
+static void PublicationAddSchemas(Oid pubid, List *schemas, bool if_not_exists,
+ AlterPublicationStmt *stmt);
+static void PublicationDropSchemas(Oid pubid, List *schemas, bool missing_ok);
+
+
+static void
+parse_publication_options(ParseState *pstate,
+ List *options,
+ bool *publish_given,
+ PublicationActions *pubactions,
+ bool *publish_via_partition_root_given,
+ bool *publish_via_partition_root)
+{
+ ListCell *lc;
+
+ *publish_given = false;
+ *publish_via_partition_root_given = false;
+
+ /* defaults */
+ pubactions->pubinsert = true;
+ pubactions->pubupdate = true;
+ pubactions->pubdelete = true;
+ pubactions->pubtruncate = true;
+ *publish_via_partition_root = false;
+
+ /* Parse options */
+ foreach(lc, options)
+ {
+ DefElem *defel = (DefElem *) lfirst(lc);
+
+ if (strcmp(defel->defname, "publish") == 0)
+ {
+ char *publish;
+ List *publish_list;
+ ListCell *lc;
+
+ if (*publish_given)
+ errorConflictingDefElem(defel, pstate);
+
+ /*
+ * If publish option was given only the explicitly listed actions
+ * should be published.
+ */
+ pubactions->pubinsert = false;
+ pubactions->pubupdate = false;
+ pubactions->pubdelete = false;
+ pubactions->pubtruncate = false;
+
+ *publish_given = true;
+ publish = defGetString(defel);
+
+ if (!SplitIdentifierString(publish, ',', &publish_list))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid list syntax in parameter \"%s\"",
+ "publish")));
+
+ /* Process the option list. */
+ foreach(lc, publish_list)
+ {
+ char *publish_opt = (char *) lfirst(lc);
+
+ if (strcmp(publish_opt, "insert") == 0)
+ pubactions->pubinsert = true;
+ else if (strcmp(publish_opt, "update") == 0)
+ pubactions->pubupdate = true;
+ else if (strcmp(publish_opt, "delete") == 0)
+ pubactions->pubdelete = true;
+ else if (strcmp(publish_opt, "truncate") == 0)
+ pubactions->pubtruncate = true;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized value for publication option \"%s\": \"%s\"",
+ "publish", publish_opt)));
+ }
+ }
+ else if (strcmp(defel->defname, "publish_via_partition_root") == 0)
+ {
+ if (*publish_via_partition_root_given)
+ errorConflictingDefElem(defel, pstate);
+ *publish_via_partition_root_given = true;
+ *publish_via_partition_root = defGetBoolean(defel);
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized publication parameter: \"%s\"", defel->defname)));
+ }
+}
+
+/*
+ * Convert the PublicationObjSpecType list into schema oid list and
+ * PublicationTable list.
+ */
+static void
+ObjectsInPublicationToOids(List *pubobjspec_list, ParseState *pstate,
+ List **rels, List **schemas)
+{
+ ListCell *cell;
+ PublicationObjSpec *pubobj;
+
+ if (!pubobjspec_list)
+ return;
+
+ foreach(cell, pubobjspec_list)
+ {
+ Oid schemaid;
+ List *search_path;
+
+ pubobj = (PublicationObjSpec *) lfirst(cell);
+
+ switch (pubobj->pubobjtype)
+ {
+ case PUBLICATIONOBJ_TABLE:
+ *rels = lappend(*rels, pubobj->pubtable);
+ break;
+ case PUBLICATIONOBJ_TABLES_IN_SCHEMA:
+ schemaid = get_namespace_oid(pubobj->name, false);
+
+ /* Filter out duplicates if user specifies "sch1, sch1" */
+ *schemas = list_append_unique_oid(*schemas, schemaid);
+ break;
+ case PUBLICATIONOBJ_TABLES_IN_CUR_SCHEMA:
+ search_path = fetch_search_path(false);
+ if (search_path == NIL) /* nothing valid in search_path? */
+ ereport(ERROR,
+ errcode(ERRCODE_UNDEFINED_SCHEMA),
+ errmsg("no schema has been selected for CURRENT_SCHEMA"));
+
+ schemaid = linitial_oid(search_path);
+ list_free(search_path);
+
+ /* Filter out duplicates if user specifies "sch1, sch1" */
+ *schemas = list_append_unique_oid(*schemas, schemaid);
+ break;
+ default:
+ /* shouldn't happen */
+ elog(ERROR, "invalid publication object type %d", pubobj->pubobjtype);
+ break;
+ }
+ }
+}
+
+/*
+ * Returns true if any of the columns used in the row filter WHERE expression is
+ * not part of REPLICA IDENTITY, false otherwise.
+ */
+static bool
+contain_invalid_rfcolumn_walker(Node *node, rf_context *context)
+{
+ if (node == NULL)
+ return false;
+
+ if (IsA(node, Var))
+ {
+ Var *var = (Var *) node;
+ AttrNumber attnum = var->varattno;
+
+ /*
+ * If pubviaroot is true, we are validating the row filter of the
+ * parent table, but the bitmap contains the replica identity
+ * information of the child table. So, get the column number of the
+ * child table as parent and child column order could be different.
+ */
+ if (context->pubviaroot)
+ {
+ char *colname = get_attname(context->parentid, attnum, false);
+
+ attnum = get_attnum(context->relid, colname);
+ }
+
+ if (!bms_is_member(attnum - FirstLowInvalidHeapAttributeNumber,
+ context->bms_replident))
+ return true;
+ }
+
+ return expression_tree_walker(node, contain_invalid_rfcolumn_walker,
+ (void *) context);
+}
+
+/*
+ * Check if all columns referenced in the filter expression are part of the
+ * REPLICA IDENTITY index or not.
+ *
+ * Returns true if any invalid column is found.
+ */
+bool
+pub_rf_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
+ bool pubviaroot)
+{
+ HeapTuple rftuple;
+ Oid relid = RelationGetRelid(relation);
+ Oid publish_as_relid = RelationGetRelid(relation);
+ bool result = false;
+ Datum rfdatum;
+ bool rfisnull;
+
+ /*
+ * FULL means all columns are in the REPLICA IDENTITY, so all columns are
+ * allowed in the row filter and we can skip the validation.
+ */
+ if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+ return false;
+
+ /*
+ * For a partition, if pubviaroot is true, find the topmost ancestor that
+ * is published via this publication as we need to use its row filter
+ * expression to filter the partition's changes.
+ *
+ * Note that even though the row filter used is for an ancestor, the
+ * REPLICA IDENTITY used will be for the actual child table.
+ */
+ if (pubviaroot && relation->rd_rel->relispartition)
+ {
+ publish_as_relid
+ = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+ if (!OidIsValid(publish_as_relid))
+ publish_as_relid = relid;
+ }
+
+ rftuple = SearchSysCache2(PUBLICATIONRELMAP,
+ ObjectIdGetDatum(publish_as_relid),
+ ObjectIdGetDatum(pubid));
+
+ if (!HeapTupleIsValid(rftuple))
+ return false;
+
+ rfdatum = SysCacheGetAttr(PUBLICATIONRELMAP, rftuple,
+ Anum_pg_publication_rel_prqual,
+ &rfisnull);
+
+ if (!rfisnull)
+ {
+ rf_context context = {0};
+ Node *rfnode;
+ Bitmapset *bms = NULL;
+
+ context.pubviaroot = pubviaroot;
+ context.parentid = publish_as_relid;
+ context.relid = relid;
+
+ /* Remember columns that are part of the REPLICA IDENTITY */
+ bms = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+ context.bms_replident = bms;
+ rfnode = stringToNode(TextDatumGetCString(rfdatum));
+ result = contain_invalid_rfcolumn_walker(rfnode, &context);
+ }
+
+ ReleaseSysCache(rftuple);
+
+ return result;
+}
+
+/*
+ * Check if all columns referenced in the REPLICA IDENTITY are covered by
+ * the column list.
+ *
+ * Returns true if any replica identity column is not covered by column list.
+ */
+bool
+pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
+ bool pubviaroot)
+{
+ HeapTuple tuple;
+ Oid relid = RelationGetRelid(relation);
+ Oid publish_as_relid = RelationGetRelid(relation);
+ bool result = false;
+ Datum datum;
+ bool isnull;
+
+ /*
+ * For a partition, if pubviaroot is true, find the topmost ancestor that
+ * is published via this publication as we need to use its column list for
+ * the changes.
+ *
+ * Note that even though the column list used is for an ancestor, the
+ * REPLICA IDENTITY used will be for the actual child table.
+ */
+ if (pubviaroot && relation->rd_rel->relispartition)
+ {
+ publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+ if (!OidIsValid(publish_as_relid))
+ publish_as_relid = relid;
+ }
+
+ tuple = SearchSysCache2(PUBLICATIONRELMAP,
+ ObjectIdGetDatum(publish_as_relid),
+ ObjectIdGetDatum(pubid));
+
+ if (!HeapTupleIsValid(tuple))
+ return false;
+
+ datum = SysCacheGetAttr(PUBLICATIONRELMAP, tuple,
+ Anum_pg_publication_rel_prattrs,
+ &isnull);
+
+ if (!isnull)
+ {
+ int x;
+ Bitmapset *idattrs;
+ Bitmapset *columns = NULL;
+
+ /* With REPLICA IDENTITY FULL, no column list is allowed. */
+ if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+ result = true;
+
+ /* Transform the column list datum to a bitmapset. */
+ columns = pub_collist_to_bitmapset(NULL, datum, NULL);
+
+ /* Remember columns that are part of the REPLICA IDENTITY */
+ idattrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+ /*
+ * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are
+ * offset (to handle system columns the usual way), while column list
+ * does not use offset, so we can't do bms_is_subset(). Instead, we
+ * have to loop over the idattrs and check all of them are in the
+ * list.
+ */
+ x = -1;
+ while ((x = bms_next_member(idattrs, x)) >= 0)
+ {
+ AttrNumber attnum = (x + FirstLowInvalidHeapAttributeNumber);
+
+ /*
+ * If pubviaroot is true, we are validating the column list of the
+ * parent table, but the bitmap contains the replica identity
+ * information of the child table. The parent/child attnums may
+ * not match, so translate them to the parent - get the attname
+ * from the child, and look it up in the parent.
+ */
+ if (pubviaroot)
+ {
+ /* attribute name in the child table */
+ char *colname = get_attname(relid, attnum, false);
+
+ /*
+ * Determine the attnum for the attribute name in parent (we
+ * are using the column list defined on the parent).
+ */
+ attnum = get_attnum(publish_as_relid, colname);
+ }
+
+ /* replica identity column, not covered by the column list */
+ if (!bms_is_member(attnum, columns))
+ {
+ result = true;
+ break;
+ }
+ }
+
+ bms_free(idattrs);
+ bms_free(columns);
+ }
+
+ ReleaseSysCache(tuple);
+
+ return result;
+}
+
+/* check_functions_in_node callback */
+static bool
+contain_mutable_or_user_functions_checker(Oid func_id, void *context)
+{
+ return (func_volatile(func_id) != PROVOLATILE_IMMUTABLE ||
+ func_id >= FirstNormalObjectId);
+}
+
+/*
+ * The row filter walker checks if the row filter expression is a "simple
+ * expression".
+ *
+ * It allows only simple or compound expressions such as:
+ * - (Var Op Const)
+ * - (Var Op Var)
+ * - (Var Op Const) AND/OR (Var Op Const)
+ * - etc
+ * (where Var is a column of the table this filter belongs to)
+ *
+ * The simple expression has the following restrictions:
+ * - User-defined operators are not allowed;
+ * - User-defined functions are not allowed;
+ * - User-defined types are not allowed;
+ * - User-defined collations are not allowed;
+ * - Non-immutable built-in functions are not allowed;
+ * - System columns are not allowed.
+ *
+ * NOTES
+ *
+ * We don't allow user-defined functions/operators/types/collations because
+ * (a) if a user drops a user-defined object used in a row filter expression or
+ * if there is any other error while using it, the logical decoding
+ * infrastructure won't be able to recover from such an error even if the
+ * object is recreated again because a historic snapshot is used to evaluate
+ * the row filter;
+ * (b) a user-defined function can be used to access tables that could have
+ * unpleasant results because a historic snapshot is used. That's why only
+ * immutable built-in functions are allowed in row filter expressions.
+ *
+ * We don't allow system columns because currently, we don't have that
+ * information in the tuple passed to downstream. Also, as we don't replicate
+ * those to subscribers, there doesn't seem to be a need for a filter on those
+ * columns.
+ *
+ * We can allow other node types after more analysis and testing.
+ */
+static bool
+check_simple_rowfilter_expr_walker(Node *node, ParseState *pstate)
+{
+ char *errdetail_msg = NULL;
+
+ if (node == NULL)
+ return false;
+
+ switch (nodeTag(node))
+ {
+ case T_Var:
+ /* System columns are not allowed. */
+ if (((Var *) node)->varattno < InvalidAttrNumber)
+ errdetail_msg = _("System columns are not allowed.");
+ break;
+ case T_OpExpr:
+ case T_DistinctExpr:
+ case T_NullIfExpr:
+ /* OK, except user-defined operators are not allowed. */
+ if (((OpExpr *) node)->opno >= FirstNormalObjectId)
+ errdetail_msg = _("User-defined operators are not allowed.");
+ break;
+ case T_ScalarArrayOpExpr:
+ /* OK, except user-defined operators are not allowed. */
+ if (((ScalarArrayOpExpr *) node)->opno >= FirstNormalObjectId)
+ errdetail_msg = _("User-defined operators are not allowed.");
+
+ /*
+ * We don't need to check the hashfuncid and negfuncid of
+ * ScalarArrayOpExpr as those functions are only built for a
+ * subquery.
+ */
+ break;
+ case T_RowCompareExpr:
+ {
+ ListCell *opid;
+
+ /* OK, except user-defined operators are not allowed. */
+ foreach(opid, ((RowCompareExpr *) node)->opnos)
+ {
+ if (lfirst_oid(opid) >= FirstNormalObjectId)
+ {
+ errdetail_msg = _("User-defined operators are not allowed.");
+ break;
+ }
+ }
+ }
+ break;
+ case T_Const:
+ case T_FuncExpr:
+ case T_BoolExpr:
+ case T_RelabelType:
+ case T_CollateExpr:
+ case T_CaseExpr:
+ case T_CaseTestExpr:
+ case T_ArrayExpr:
+ case T_RowExpr:
+ case T_CoalesceExpr:
+ case T_MinMaxExpr:
+ case T_XmlExpr:
+ case T_NullTest:
+ case T_BooleanTest:
+ case T_List:
+ /* OK, supported */
+ break;
+ default:
+ errdetail_msg = _("Only columns, constants, built-in operators, built-in data types, built-in collations, and immutable built-in functions are allowed.");
+ break;
+ }
+
+ /*
+ * For all the supported nodes, if we haven't already found a problem,
+ * check the types, functions, and collations used in it. We check List
+ * by walking through each element.
+ */
+ if (!errdetail_msg && !IsA(node, List))
+ {
+ if (exprType(node) >= FirstNormalObjectId)
+ errdetail_msg = _("User-defined types are not allowed.");
+ else if (check_functions_in_node(node, contain_mutable_or_user_functions_checker,
+ (void *) pstate))
+ errdetail_msg = _("User-defined or built-in mutable functions are not allowed.");
+ else if (exprCollation(node) >= FirstNormalObjectId ||
+ exprInputCollation(node) >= FirstNormalObjectId)
+ errdetail_msg = _("User-defined collations are not allowed.");
+ }
+
+ /*
+ * If we found a problem in this node, throw error now. Otherwise keep
+ * going.
+ */
+ if (errdetail_msg)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("invalid publication WHERE expression"),
+ errdetail_internal("%s", errdetail_msg),
+ parser_errposition(pstate, exprLocation(node))));
+
+ return expression_tree_walker(node, check_simple_rowfilter_expr_walker,
+ (void *) pstate);
+}
+
+/*
+ * Check if the row filter expression is a "simple expression".
+ *
+ * See check_simple_rowfilter_expr_walker for details.
+ */
+static bool
+check_simple_rowfilter_expr(Node *node, ParseState *pstate)
+{
+ return check_simple_rowfilter_expr_walker(node, pstate);
+}
+
+/*
+ * Transform the publication WHERE expression for all the relations in the list,
+ * ensuring it is coerced to boolean and necessary collation information is
+ * added if required, and add a new nsitem/RTE for the associated relation to
+ * the ParseState's namespace list.
+ *
+ * Also check the publication row filter expression and throw an error if
+ * anything not permitted or unexpected is encountered.
+ */
+static void
+TransformPubWhereClauses(List *tables, const char *queryString,
+ bool pubviaroot)
+{
+ ListCell *lc;
+
+ foreach(lc, tables)
+ {
+ ParseNamespaceItem *nsitem;
+ Node *whereclause = NULL;
+ ParseState *pstate;
+ PublicationRelInfo *pri = (PublicationRelInfo *) lfirst(lc);
+
+ if (pri->whereClause == NULL)
+ continue;
+
+ /*
+ * If the publication doesn't publish changes via the root partitioned
+ * table, the partition's row filter will be used. So disallow using
+ * WHERE clause on partitioned table in this case.
+ */
+ if (!pubviaroot &&
+ pri->relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot use publication WHERE clause for relation \"%s\"",
+ RelationGetRelationName(pri->relation)),
+ errdetail("WHERE clause cannot be used for a partitioned table when %s is false.",
+ "publish_via_partition_root")));
+
+ /*
+ * A fresh pstate is required so that we only have "this" table in its
+ * rangetable
+ */
+ pstate = make_parsestate(NULL);
+ pstate->p_sourcetext = queryString;
+ nsitem = addRangeTableEntryForRelation(pstate, pri->relation,
+ AccessShareLock, NULL,
+ false, false);
+ addNSItemToQuery(pstate, nsitem, false, true, true);
+
+ whereclause = transformWhereClause(pstate,
+ copyObject(pri->whereClause),
+ EXPR_KIND_WHERE,
+ "PUBLICATION WHERE");
+
+ /* Fix up collation information */
+ assign_expr_collations(pstate, whereclause);
+
+ /*
+ * We allow only simple expressions in row filters. See
+ * check_simple_rowfilter_expr_walker.
+ */
+ check_simple_rowfilter_expr(whereclause, pstate);
+
+ free_parsestate(pstate);
+
+ pri->whereClause = whereclause;
+ }
+}
+
+
+/*
+ * Given a list of tables that are going to be added to a publication,
+ * verify that they fulfill the necessary preconditions, namely: no tables
+ * have a column list if any schema is published; and partitioned tables do
+ * not have column lists if publish_via_partition_root is not set.
+ *
+ * 'publish_schema' indicates that the publication contains any TABLES IN
+ * SCHEMA elements (newly added in this command, or preexisting).
+ * 'pubviaroot' is the value of publish_via_partition_root.
+ */
+static void
+CheckPubRelationColumnList(char *pubname, List *tables,
+ bool publish_schema, bool pubviaroot)
+{
+ ListCell *lc;
+
+ foreach(lc, tables)
+ {
+ PublicationRelInfo *pri = (PublicationRelInfo *) lfirst(lc);
+
+ if (pri->columns == NIL)
+ continue;
+
+ /*
+ * Disallow specifying column list if any schema is in the
+ * publication.
+ *
+ * XXX We could instead just forbid the case when the publication
+ * tries to publish the table with a column list and a schema for that
+ * table. However, if we do that then we need a restriction during
+ * ALTER TABLE ... SET SCHEMA to prevent such a case which doesn't
+ * seem to be a good idea.
+ */
+ if (publish_schema)
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot use column list for relation \"%s.%s\" in publication \"%s\"",
+ get_namespace_name(RelationGetNamespace(pri->relation)),
+ RelationGetRelationName(pri->relation), pubname),
+ errdetail("Column lists cannot be specified in publications containing FOR TABLES IN SCHEMA elements."));
+
+ /*
+ * If the publication doesn't publish changes via the root partitioned
+ * table, the partition's column list will be used. So disallow using
+ * a column list on the partitioned table in this case.
+ */
+ if (!pubviaroot &&
+ pri->relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot use column list for relation \"%s.%s\" in publication \"%s\"",
+ get_namespace_name(RelationGetNamespace(pri->relation)),
+ RelationGetRelationName(pri->relation), pubname),
+ errdetail("Column lists cannot be specified for partitioned tables when %s is false.",
+ "publish_via_partition_root")));
+ }
+}
+
+/*
+ * Create new publication.
+ */
+ObjectAddress
+CreatePublication(ParseState *pstate, CreatePublicationStmt *stmt)
+{
+ Relation rel;
+ ObjectAddress myself;
+ Oid puboid;
+ bool nulls[Natts_pg_publication];
+ Datum values[Natts_pg_publication];
+ HeapTuple tup;
+ bool publish_given;
+ PublicationActions pubactions;
+ bool publish_via_partition_root_given;
+ bool publish_via_partition_root;
+ AclResult aclresult;
+ List *relations = NIL;
+ List *schemaidlist = NIL;
+
+ /* must have CREATE privilege on database */
+ aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_DATABASE,
+ get_database_name(MyDatabaseId));
+
+ /* FOR ALL TABLES requires superuser */
+ if (stmt->for_all_tables && !superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to create FOR ALL TABLES publication")));
+
+ rel = table_open(PublicationRelationId, RowExclusiveLock);
+
+ /* Check if name is used */
+ puboid = GetSysCacheOid1(PUBLICATIONNAME, Anum_pg_publication_oid,
+ CStringGetDatum(stmt->pubname));
+ if (OidIsValid(puboid))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("publication \"%s\" already exists",
+ stmt->pubname)));
+
+ /* Form a tuple. */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ values[Anum_pg_publication_pubname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(stmt->pubname));
+ values[Anum_pg_publication_pubowner - 1] = ObjectIdGetDatum(GetUserId());
+
+ parse_publication_options(pstate,
+ stmt->options,
+ &publish_given, &pubactions,
+ &publish_via_partition_root_given,
+ &publish_via_partition_root);
+
+ puboid = GetNewOidWithIndex(rel, PublicationObjectIndexId,
+ Anum_pg_publication_oid);
+ values[Anum_pg_publication_oid - 1] = ObjectIdGetDatum(puboid);
+ values[Anum_pg_publication_puballtables - 1] =
+ BoolGetDatum(stmt->for_all_tables);
+ values[Anum_pg_publication_pubinsert - 1] =
+ BoolGetDatum(pubactions.pubinsert);
+ values[Anum_pg_publication_pubupdate - 1] =
+ BoolGetDatum(pubactions.pubupdate);
+ values[Anum_pg_publication_pubdelete - 1] =
+ BoolGetDatum(pubactions.pubdelete);
+ values[Anum_pg_publication_pubtruncate - 1] =
+ BoolGetDatum(pubactions.pubtruncate);
+ values[Anum_pg_publication_pubviaroot - 1] =
+ BoolGetDatum(publish_via_partition_root);
+
+ tup = heap_form_tuple(RelationGetDescr(rel), values, nulls);
+
+ /* Insert tuple into catalog. */
+ CatalogTupleInsert(rel, tup);
+ heap_freetuple(tup);
+
+ recordDependencyOnOwner(PublicationRelationId, puboid, GetUserId());
+
+ ObjectAddressSet(myself, PublicationRelationId, puboid);
+
+ /* Make the changes visible. */
+ CommandCounterIncrement();
+
+ /* Associate objects with the publication. */
+ if (stmt->for_all_tables)
+ {
+ /* Invalidate relcache so that publication info is rebuilt. */
+ CacheInvalidateRelcacheAll();
+ }
+ else
+ {
+ ObjectsInPublicationToOids(stmt->pubobjects, pstate, &relations,
+ &schemaidlist);
+
+ /* FOR TABLES IN SCHEMA requires superuser */
+ if (schemaidlist != NIL && !superuser())
+ ereport(ERROR,
+ errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to create FOR TABLES IN SCHEMA publication"));
+
+ if (list_length(relations) > 0)
+ {
+ List *rels;
+
+ rels = OpenTableList(relations);
+ TransformPubWhereClauses(rels, pstate->p_sourcetext,
+ publish_via_partition_root);
+
+ CheckPubRelationColumnList(stmt->pubname, rels,
+ schemaidlist != NIL,
+ publish_via_partition_root);
+
+ PublicationAddTables(puboid, rels, true, NULL);
+ CloseTableList(rels);
+ }
+
+ if (list_length(schemaidlist) > 0)
+ {
+ /*
+ * Schema lock is held until the publication is created to prevent
+ * concurrent schema deletion.
+ */
+ LockSchemaList(schemaidlist);
+ PublicationAddSchemas(puboid, schemaidlist, true, NULL);
+ }
+ }
+
+ table_close(rel, RowExclusiveLock);
+
+ InvokeObjectPostCreateHook(PublicationRelationId, puboid, 0);
+
+ if (wal_level != WAL_LEVEL_LOGICAL)
+ ereport(WARNING,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("wal_level is insufficient to publish logical changes"),
+ errhint("Set wal_level to \"logical\" before creating subscriptions.")));
+
+ return myself;
+}
+
+/*
+ * Change options of a publication.
+ */
+static void
+AlterPublicationOptions(ParseState *pstate, AlterPublicationStmt *stmt,
+ Relation rel, HeapTuple tup)
+{
+ bool nulls[Natts_pg_publication];
+ bool replaces[Natts_pg_publication];
+ Datum values[Natts_pg_publication];
+ bool publish_given;
+ PublicationActions pubactions;
+ bool publish_via_partition_root_given;
+ bool publish_via_partition_root;
+ ObjectAddress obj;
+ Form_pg_publication pubform;
+ List *root_relids = NIL;
+ ListCell *lc;
+
+ parse_publication_options(pstate,
+ stmt->options,
+ &publish_given, &pubactions,
+ &publish_via_partition_root_given,
+ &publish_via_partition_root);
+
+ pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+ /*
+ * If the publication doesn't publish changes via the root partitioned
+ * table, the partition's row filter and column list will be used. So
+ * disallow using WHERE clause and column lists on partitioned table in
+ * this case.
+ */
+ if (!pubform->puballtables && publish_via_partition_root_given &&
+ !publish_via_partition_root)
+ {
+ /*
+ * Lock the publication so nobody else can do anything with it. This
+ * prevents concurrent alter to add partitioned table(s) with WHERE
+ * clause(s) and/or column lists which we don't allow when not
+ * publishing via root.
+ */
+ LockDatabaseObject(PublicationRelationId, pubform->oid, 0,
+ AccessShareLock);
+
+ root_relids = GetPublicationRelations(pubform->oid,
+ PUBLICATION_PART_ROOT);
+
+ foreach(lc, root_relids)
+ {
+ Oid relid = lfirst_oid(lc);
+ HeapTuple rftuple;
+ char relkind;
+ char *relname;
+ bool has_rowfilter;
+ bool has_collist;
+
+ /*
+ * Beware: we don't have lock on the relations, so cope silently
+ * with the cache lookups returning NULL.
+ */
+
+ rftuple = SearchSysCache2(PUBLICATIONRELMAP,
+ ObjectIdGetDatum(relid),
+ ObjectIdGetDatum(pubform->oid));
+ if (!HeapTupleIsValid(rftuple))
+ continue;
+ has_rowfilter = !heap_attisnull(rftuple, Anum_pg_publication_rel_prqual, NULL);
+ has_collist = !heap_attisnull(rftuple, Anum_pg_publication_rel_prattrs, NULL);
+ if (!has_rowfilter && !has_collist)
+ {
+ ReleaseSysCache(rftuple);
+ continue;
+ }
+
+ relkind = get_rel_relkind(relid);
+ if (relkind != RELKIND_PARTITIONED_TABLE)
+ {
+ ReleaseSysCache(rftuple);
+ continue;
+ }
+ relname = get_rel_name(relid);
+ if (relname == NULL) /* table concurrently dropped */
+ {
+ ReleaseSysCache(rftuple);
+ continue;
+ }
+
+ if (has_rowfilter)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot set parameter \"%s\" to false for publication \"%s\"",
+ "publish_via_partition_root",
+ stmt->pubname),
+ errdetail("The publication contains a WHERE clause for partitioned table \"%s\", which is not allowed when \"%s\" is false.",
+ relname, "publish_via_partition_root")));
+ Assert(has_collist);
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot set parameter \"%s\" to false for publication \"%s\"",
+ "publish_via_partition_root",
+ stmt->pubname),
+ errdetail("The publication contains a column list for partitioned table \"%s\", which is not allowed when \"%s\" is false.",
+ relname, "publish_via_partition_root")));
+ }
+ }
+
+ /* Everything ok, form a new tuple. */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+ memset(replaces, false, sizeof(replaces));
+
+ if (publish_given)
+ {
+ values[Anum_pg_publication_pubinsert - 1] = BoolGetDatum(pubactions.pubinsert);
+ replaces[Anum_pg_publication_pubinsert - 1] = true;
+
+ values[Anum_pg_publication_pubupdate - 1] = BoolGetDatum(pubactions.pubupdate);
+ replaces[Anum_pg_publication_pubupdate - 1] = true;
+
+ values[Anum_pg_publication_pubdelete - 1] = BoolGetDatum(pubactions.pubdelete);
+ replaces[Anum_pg_publication_pubdelete - 1] = true;
+
+ values[Anum_pg_publication_pubtruncate - 1] = BoolGetDatum(pubactions.pubtruncate);
+ replaces[Anum_pg_publication_pubtruncate - 1] = true;
+ }
+
+ if (publish_via_partition_root_given)
+ {
+ values[Anum_pg_publication_pubviaroot - 1] = BoolGetDatum(publish_via_partition_root);
+ replaces[Anum_pg_publication_pubviaroot - 1] = true;
+ }
+
+ tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls,
+ replaces);
+
+ /* Update the catalog. */
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+ CommandCounterIncrement();
+
+ pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+ /* Invalidate the relcache. */
+ if (pubform->puballtables)
+ {
+ CacheInvalidateRelcacheAll();
+ }
+ else
+ {
+ List *relids = NIL;
+ List *schemarelids = NIL;
+
+ /*
+ * For any partitioned tables contained in the publication, we must
+ * invalidate all partitions contained in the respective partition
+ * trees, not just those explicitly mentioned in the publication.
+ */
+ if (root_relids == NIL)
+ relids = GetPublicationRelations(pubform->oid,
+ PUBLICATION_PART_ALL);
+ else
+ {
+ /*
+ * We already got tables explicitly mentioned in the publication.
+ * Now get all partitions for the partitioned table in the list.
+ */
+ foreach(lc, root_relids)
+ relids = GetPubPartitionOptionRelations(relids,
+ PUBLICATION_PART_ALL,
+ lfirst_oid(lc));
+ }
+
+ schemarelids = GetAllSchemaPublicationRelations(pubform->oid,
+ PUBLICATION_PART_ALL);
+ relids = list_concat_unique_oid(relids, schemarelids);
+
+ InvalidatePublicationRels(relids);
+ }
+
+ ObjectAddressSet(obj, PublicationRelationId, pubform->oid);
+ EventTriggerCollectSimpleCommand(obj, InvalidObjectAddress,
+ (Node *) stmt);
+
+ InvokeObjectPostAlterHook(PublicationRelationId, pubform->oid, 0);
+}
+
+/*
+ * Invalidate the relations.
+ */
+void
+InvalidatePublicationRels(List *relids)
+{
+ /*
+ * We don't want to send too many individual messages, at some point it's
+ * cheaper to just reset whole relcache.
+ */
+ if (list_length(relids) < MAX_RELCACHE_INVAL_MSGS)
+ {
+ ListCell *lc;
+
+ foreach(lc, relids)
+ CacheInvalidateRelcacheByRelid(lfirst_oid(lc));
+ }
+ else
+ CacheInvalidateRelcacheAll();
+}
+
+/*
+ * Add or remove table to/from publication.
+ */
+static void
+AlterPublicationTables(AlterPublicationStmt *stmt, HeapTuple tup,
+ List *tables, const char *queryString,
+ bool publish_schema)
+{
+ List *rels = NIL;
+ Form_pg_publication pubform = (Form_pg_publication) GETSTRUCT(tup);
+ Oid pubid = pubform->oid;
+
+ /*
+ * Nothing to do if no objects, except in SET: for that it is quite
+ * possible that user has not specified any tables in which case we need
+ * to remove all the existing tables.
+ */
+ if (!tables && stmt->action != AP_SetObjects)
+ return;
+
+ rels = OpenTableList(tables);
+
+ if (stmt->action == AP_AddObjects)
+ {
+ TransformPubWhereClauses(rels, queryString, pubform->pubviaroot);
+
+ publish_schema |= is_schema_publication(pubid);
+
+ CheckPubRelationColumnList(stmt->pubname, rels, publish_schema,
+ pubform->pubviaroot);
+
+ PublicationAddTables(pubid, rels, false, stmt);
+ }
+ else if (stmt->action == AP_DropObjects)
+ PublicationDropTables(pubid, rels, false);
+ else /* AP_SetObjects */
+ {
+ List *oldrelids = GetPublicationRelations(pubid,
+ PUBLICATION_PART_ROOT);
+ List *delrels = NIL;
+ ListCell *oldlc;
+
+ TransformPubWhereClauses(rels, queryString, pubform->pubviaroot);
+
+ CheckPubRelationColumnList(stmt->pubname, rels, publish_schema,
+ pubform->pubviaroot);
+
+ /*
+ * To recreate the relation list for the publication, look for
+ * existing relations that do not need to be dropped.
+ */
+ foreach(oldlc, oldrelids)
+ {
+ Oid oldrelid = lfirst_oid(oldlc);
+ ListCell *newlc;
+ PublicationRelInfo *oldrel;
+ bool found = false;
+ HeapTuple rftuple;
+ Node *oldrelwhereclause = NULL;
+ Bitmapset *oldcolumns = NULL;
+
+ /* look up the cache for the old relmap */
+ rftuple = SearchSysCache2(PUBLICATIONRELMAP,
+ ObjectIdGetDatum(oldrelid),
+ ObjectIdGetDatum(pubid));
+
+ /*
+ * See if the existing relation currently has a WHERE clause or a
+ * column list. We need to compare those too.
+ */
+ if (HeapTupleIsValid(rftuple))
+ {
+ bool isnull = true;
+ Datum whereClauseDatum;
+ Datum columnListDatum;
+
+ /* Load the WHERE clause for this table. */
+ whereClauseDatum = SysCacheGetAttr(PUBLICATIONRELMAP, rftuple,
+ Anum_pg_publication_rel_prqual,
+ &isnull);
+ if (!isnull)
+ oldrelwhereclause = stringToNode(TextDatumGetCString(whereClauseDatum));
+
+ /* Transform the int2vector column list to a bitmap. */
+ columnListDatum = SysCacheGetAttr(PUBLICATIONRELMAP, rftuple,
+ Anum_pg_publication_rel_prattrs,
+ &isnull);
+
+ if (!isnull)
+ oldcolumns = pub_collist_to_bitmapset(NULL, columnListDatum, NULL);
+
+ ReleaseSysCache(rftuple);
+ }
+
+ foreach(newlc, rels)
+ {
+ PublicationRelInfo *newpubrel;
+ Oid newrelid;
+ Bitmapset *newcolumns = NULL;
+
+ newpubrel = (PublicationRelInfo *) lfirst(newlc);
+ newrelid = RelationGetRelid(newpubrel->relation);
+
+ /*
+ * If the new publication has column list, transform it to a
+ * bitmap too.
+ */
+ if (newpubrel->columns)
+ {
+ ListCell *lc;
+
+ foreach(lc, newpubrel->columns)
+ {
+ char *colname = strVal(lfirst(lc));
+ AttrNumber attnum = get_attnum(newrelid, colname);
+
+ newcolumns = bms_add_member(newcolumns, attnum);
+ }
+ }
+
+ /*
+ * Check if any of the new set of relations matches with the
+ * existing relations in the publication. Additionally, if the
+ * relation has an associated WHERE clause, check the WHERE
+ * expressions also match. Same for the column list. Drop the
+ * rest.
+ */
+ if (RelationGetRelid(newpubrel->relation) == oldrelid)
+ {
+ if (equal(oldrelwhereclause, newpubrel->whereClause) &&
+ bms_equal(oldcolumns, newcolumns))
+ {
+ found = true;
+ break;
+ }
+ }
+ }
+
+ /*
+ * Add the non-matched relations to a list so that they can be
+ * dropped.
+ */
+ if (!found)
+ {
+ oldrel = palloc(sizeof(PublicationRelInfo));
+ oldrel->whereClause = NULL;
+ oldrel->columns = NIL;
+ oldrel->relation = table_open(oldrelid,
+ ShareUpdateExclusiveLock);
+ delrels = lappend(delrels, oldrel);
+ }
+ }
+
+ /* And drop them. */
+ PublicationDropTables(pubid, delrels, true);
+
+ /*
+ * Don't bother calculating the difference for adding, we'll catch and
+ * skip existing ones when doing catalog update.
+ */
+ PublicationAddTables(pubid, rels, true, stmt);
+
+ CloseTableList(delrels);
+ }
+
+ CloseTableList(rels);
+}
+
+/*
+ * Alter the publication schemas.
+ *
+ * Add or remove schemas to/from publication.
+ */
+static void
+AlterPublicationSchemas(AlterPublicationStmt *stmt,
+ HeapTuple tup, List *schemaidlist)
+{
+ Form_pg_publication pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+ /*
+ * Nothing to do if no objects, except in SET: for that it is quite
+ * possible that user has not specified any schemas in which case we need
+ * to remove all the existing schemas.
+ */
+ if (!schemaidlist && stmt->action != AP_SetObjects)
+ return;
+
+ /*
+ * Schema lock is held until the publication is altered to prevent
+ * concurrent schema deletion.
+ */
+ LockSchemaList(schemaidlist);
+ if (stmt->action == AP_AddObjects)
+ {
+ ListCell *lc;
+ List *reloids;
+
+ reloids = GetPublicationRelations(pubform->oid, PUBLICATION_PART_ROOT);
+
+ foreach(lc, reloids)
+ {
+ HeapTuple coltuple;
+
+ coltuple = SearchSysCache2(PUBLICATIONRELMAP,
+ ObjectIdGetDatum(lfirst_oid(lc)),
+ ObjectIdGetDatum(pubform->oid));
+
+ if (!HeapTupleIsValid(coltuple))
+ continue;
+
+ /*
+ * Disallow adding schema if column list is already part of the
+ * publication. See CheckPubRelationColumnList.
+ */
+ if (!heap_attisnull(coltuple, Anum_pg_publication_rel_prattrs, NULL))
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot add schema to publication \"%s\"",
+ stmt->pubname),
+ errdetail("Schemas cannot be added if any tables that specify a column list are already part of the publication."));
+
+ ReleaseSysCache(coltuple);
+ }
+
+ PublicationAddSchemas(pubform->oid, schemaidlist, false, stmt);
+ }
+ else if (stmt->action == AP_DropObjects)
+ PublicationDropSchemas(pubform->oid, schemaidlist, false);
+ else /* AP_SetObjects */
+ {
+ List *oldschemaids = GetPublicationSchemas(pubform->oid);
+ List *delschemas = NIL;
+
+ /* Identify which schemas should be dropped */
+ delschemas = list_difference_oid(oldschemaids, schemaidlist);
+
+ /*
+ * Schema lock is held until the publication is altered to prevent
+ * concurrent schema deletion.
+ */
+ LockSchemaList(delschemas);
+
+ /* And drop them */
+ PublicationDropSchemas(pubform->oid, delschemas, true);
+
+ /*
+ * Don't bother calculating the difference for adding, we'll catch and
+ * skip existing ones when doing catalog update.
+ */
+ PublicationAddSchemas(pubform->oid, schemaidlist, true, stmt);
+ }
+}
+
+/*
+ * Check if relations and schemas can be in a given publication and throw
+ * appropriate error if not.
+ */
+static void
+CheckAlterPublication(AlterPublicationStmt *stmt, HeapTuple tup,
+ List *tables, List *schemaidlist)
+{
+ Form_pg_publication pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+ if ((stmt->action == AP_AddObjects || stmt->action == AP_SetObjects) &&
+ schemaidlist && !superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to add or set schemas")));
+
+ /*
+ * Check that user is allowed to manipulate the publication tables in
+ * schema
+ */
+ if (schemaidlist && pubform->puballtables)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("publication \"%s\" is defined as FOR ALL TABLES",
+ NameStr(pubform->pubname)),
+ errdetail("Schemas cannot be added to or dropped from FOR ALL TABLES publications.")));
+
+ /* Check that user is allowed to manipulate the publication tables. */
+ if (tables && pubform->puballtables)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("publication \"%s\" is defined as FOR ALL TABLES",
+ NameStr(pubform->pubname)),
+ errdetail("Tables cannot be added to or dropped from FOR ALL TABLES publications.")));
+}
+
+/*
+ * Alter the existing publication.
+ *
+ * This is dispatcher function for AlterPublicationOptions,
+ * AlterPublicationSchemas and AlterPublicationTables.
+ */
+void
+AlterPublication(ParseState *pstate, AlterPublicationStmt *stmt)
+{
+ Relation rel;
+ HeapTuple tup;
+ Form_pg_publication pubform;
+
+ rel = table_open(PublicationRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(PUBLICATIONNAME,
+ CStringGetDatum(stmt->pubname));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("publication \"%s\" does not exist",
+ stmt->pubname)));
+
+ pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+ /* must be owner */
+ if (!pg_publication_ownercheck(pubform->oid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_PUBLICATION,
+ stmt->pubname);
+
+ if (stmt->options)
+ AlterPublicationOptions(pstate, stmt, rel, tup);
+ else
+ {
+ List *relations = NIL;
+ List *schemaidlist = NIL;
+ Oid pubid = pubform->oid;
+
+ ObjectsInPublicationToOids(stmt->pubobjects, pstate, &relations,
+ &schemaidlist);
+
+ CheckAlterPublication(stmt, tup, relations, schemaidlist);
+
+ heap_freetuple(tup);
+
+ /* Lock the publication so nobody else can do anything with it. */
+ LockDatabaseObject(PublicationRelationId, pubid, 0,
+ AccessExclusiveLock);
+
+ /*
+ * It is possible that by the time we acquire the lock on publication,
+ * concurrent DDL has removed it. We can test this by checking the
+ * existence of publication. We get the tuple again to avoid the risk
+ * of any publication option getting changed.
+ */
+ tup = SearchSysCacheCopy1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("publication \"%s\" does not exist",
+ stmt->pubname));
+
+ AlterPublicationTables(stmt, tup, relations, pstate->p_sourcetext,
+ schemaidlist != NIL);
+ AlterPublicationSchemas(stmt, tup, schemaidlist);
+ }
+
+ /* Cleanup. */
+ heap_freetuple(tup);
+ table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Remove relation from publication by mapping OID.
+ */
+void
+RemovePublicationRelById(Oid proid)
+{
+ Relation rel;
+ HeapTuple tup;
+ Form_pg_publication_rel pubrel;
+ List *relids = NIL;
+
+ rel = table_open(PublicationRelRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(PUBLICATIONREL, ObjectIdGetDatum(proid));
+
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for publication table %u",
+ proid);
+
+ pubrel = (Form_pg_publication_rel) GETSTRUCT(tup);
+
+ /*
+ * Invalidate relcache so that publication info is rebuilt.
+ *
+ * For the partitioned tables, we must invalidate all partitions contained
+ * in the respective partition hierarchies, not just the one explicitly
+ * mentioned in the publication. This is required because we implicitly
+ * publish the child tables when the parent table is published.
+ */
+ relids = GetPubPartitionOptionRelations(relids, PUBLICATION_PART_ALL,
+ pubrel->prrelid);
+
+ InvalidatePublicationRels(relids);
+
+ CatalogTupleDelete(rel, &tup->t_self);
+
+ ReleaseSysCache(tup);
+
+ table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Remove the publication by mapping OID.
+ */
+void
+RemovePublicationById(Oid pubid)
+{
+ Relation rel;
+ HeapTuple tup;
+ Form_pg_publication pubform;
+
+ rel = table_open(PublicationRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for publication %u", pubid);
+
+ pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+ /* Invalidate relcache so that publication info is rebuilt. */
+ if (pubform->puballtables)
+ CacheInvalidateRelcacheAll();
+
+ CatalogTupleDelete(rel, &tup->t_self);
+
+ ReleaseSysCache(tup);
+
+ table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Remove schema from publication by mapping OID.
+ */
+void
+RemovePublicationSchemaById(Oid psoid)
+{
+ Relation rel;
+ HeapTuple tup;
+ List *schemaRels = NIL;
+ Form_pg_publication_namespace pubsch;
+
+ rel = table_open(PublicationNamespaceRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(PUBLICATIONNAMESPACE, ObjectIdGetDatum(psoid));
+
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for publication schema %u", psoid);
+
+ pubsch = (Form_pg_publication_namespace) GETSTRUCT(tup);
+
+ /*
+ * Invalidate relcache so that publication info is rebuilt. See
+ * RemovePublicationRelById for why we need to consider all the
+ * partitions.
+ */
+ schemaRels = GetSchemaPublicationRelations(pubsch->pnnspid,
+ PUBLICATION_PART_ALL);
+ InvalidatePublicationRels(schemaRels);
+
+ CatalogTupleDelete(rel, &tup->t_self);
+
+ ReleaseSysCache(tup);
+
+ table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Open relations specified by a PublicationTable list.
+ * The returned tables are locked in ShareUpdateExclusiveLock mode in order to
+ * add them to a publication.
+ */
+static List *
+OpenTableList(List *tables)
+{
+ List *relids = NIL;
+ List *rels = NIL;
+ ListCell *lc;
+ List *relids_with_rf = NIL;
+ List *relids_with_collist = NIL;
+
+ /*
+ * Open, share-lock, and check all the explicitly-specified relations
+ */
+ foreach(lc, tables)
+ {
+ PublicationTable *t = lfirst_node(PublicationTable, lc);
+ bool recurse = t->relation->inh;
+ Relation rel;
+ Oid myrelid;
+ PublicationRelInfo *pub_rel;
+
+ /* Allow query cancel in case this takes a long time */
+ CHECK_FOR_INTERRUPTS();
+
+ rel = table_openrv(t->relation, ShareUpdateExclusiveLock);
+ myrelid = RelationGetRelid(rel);
+
+ /*
+ * Filter out duplicates if user specifies "foo, foo".
+ *
+ * Note that this algorithm is known to not be very efficient (O(N^2))
+ * but given that it only works on list of tables given to us by user
+ * it's deemed acceptable.
+ */
+ if (list_member_oid(relids, myrelid))
+ {
+ /* Disallow duplicate tables if there are any with row filters. */
+ if (t->whereClause || list_member_oid(relids_with_rf, myrelid))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("conflicting or redundant WHERE clauses for table \"%s\"",
+ RelationGetRelationName(rel))));
+
+ /* Disallow duplicate tables if there are any with column lists. */
+ if (t->columns || list_member_oid(relids_with_collist, myrelid))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("conflicting or redundant column lists for table \"%s\"",
+ RelationGetRelationName(rel))));
+
+ table_close(rel, ShareUpdateExclusiveLock);
+ continue;
+ }
+
+ pub_rel = palloc(sizeof(PublicationRelInfo));
+ pub_rel->relation = rel;
+ pub_rel->whereClause = t->whereClause;
+ pub_rel->columns = t->columns;
+ rels = lappend(rels, pub_rel);
+ relids = lappend_oid(relids, myrelid);
+
+ if (t->whereClause)
+ relids_with_rf = lappend_oid(relids_with_rf, myrelid);
+
+ if (t->columns)
+ relids_with_collist = lappend_oid(relids_with_collist, myrelid);
+
+ /*
+ * Add children of this rel, if requested, so that they too are added
+ * to the publication. A partitioned table can't have any inheritance
+ * children other than its partitions, which need not be explicitly
+ * added to the publication.
+ */
+ if (recurse && rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ {
+ List *children;
+ ListCell *child;
+
+ children = find_all_inheritors(myrelid, ShareUpdateExclusiveLock,
+ NULL);
+
+ foreach(child, children)
+ {
+ Oid childrelid = lfirst_oid(child);
+
+ /* Allow query cancel in case this takes a long time */
+ CHECK_FOR_INTERRUPTS();
+
+ /*
+ * Skip duplicates if user specified both parent and child
+ * tables.
+ */
+ if (list_member_oid(relids, childrelid))
+ {
+ /*
+ * We don't allow to specify row filter for both parent
+ * and child table at the same time as it is not very
+ * clear which one should be given preference.
+ */
+ if (childrelid != myrelid &&
+ (t->whereClause || list_member_oid(relids_with_rf, childrelid)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("conflicting or redundant WHERE clauses for table \"%s\"",
+ RelationGetRelationName(rel))));
+
+ /*
+ * We don't allow to specify column list for both parent
+ * and child table at the same time as it is not very
+ * clear which one should be given preference.
+ */
+ if (childrelid != myrelid &&
+ (t->columns || list_member_oid(relids_with_collist, childrelid)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("conflicting or redundant column lists for table \"%s\"",
+ RelationGetRelationName(rel))));
+
+ continue;
+ }
+
+ /* find_all_inheritors already got lock */
+ rel = table_open(childrelid, NoLock);
+ pub_rel = palloc(sizeof(PublicationRelInfo));
+ pub_rel->relation = rel;
+ /* child inherits WHERE clause from parent */
+ pub_rel->whereClause = t->whereClause;
+
+ /* child inherits column list from parent */
+ pub_rel->columns = t->columns;
+ rels = lappend(rels, pub_rel);
+ relids = lappend_oid(relids, childrelid);
+
+ if (t->whereClause)
+ relids_with_rf = lappend_oid(relids_with_rf, childrelid);
+
+ if (t->columns)
+ relids_with_collist = lappend_oid(relids_with_collist, childrelid);
+ }
+ }
+ }
+
+ list_free(relids);
+ list_free(relids_with_rf);
+
+ return rels;
+}
+
+/*
+ * Close all relations in the list.
+ */
+static void
+CloseTableList(List *rels)
+{
+ ListCell *lc;
+
+ foreach(lc, rels)
+ {
+ PublicationRelInfo *pub_rel;
+
+ pub_rel = (PublicationRelInfo *) lfirst(lc);
+ table_close(pub_rel->relation, NoLock);
+ }
+
+ list_free_deep(rels);
+}
+
+/*
+ * Lock the schemas specified in the schema list in AccessShareLock mode in
+ * order to prevent concurrent schema deletion.
+ */
+static void
+LockSchemaList(List *schemalist)
+{
+ ListCell *lc;
+
+ foreach(lc, schemalist)
+ {
+ Oid schemaid = lfirst_oid(lc);
+
+ /* Allow query cancel in case this takes a long time */
+ CHECK_FOR_INTERRUPTS();
+ LockDatabaseObject(NamespaceRelationId, schemaid, 0, AccessShareLock);
+
+ /*
+ * It is possible that by the time we acquire the lock on schema,
+ * concurrent DDL has removed it. We can test this by checking the
+ * existence of schema.
+ */
+ if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(schemaid)))
+ ereport(ERROR,
+ errcode(ERRCODE_UNDEFINED_SCHEMA),
+ errmsg("schema with OID %u does not exist", schemaid));
+ }
+}
+
+/*
+ * Add listed tables to the publication.
+ */
+static void
+PublicationAddTables(Oid pubid, List *rels, bool if_not_exists,
+ AlterPublicationStmt *stmt)
+{
+ ListCell *lc;
+
+ Assert(!stmt || !stmt->for_all_tables);
+
+ foreach(lc, rels)
+ {
+ PublicationRelInfo *pub_rel = (PublicationRelInfo *) lfirst(lc);
+ Relation rel = pub_rel->relation;
+ ObjectAddress obj;
+
+ /* Must be owner of the table or superuser. */
+ if (!pg_class_ownercheck(RelationGetRelid(rel), GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind),
+ RelationGetRelationName(rel));
+
+ obj = publication_add_relation(pubid, pub_rel, if_not_exists);
+ if (stmt)
+ {
+ EventTriggerCollectSimpleCommand(obj, InvalidObjectAddress,
+ (Node *) stmt);
+
+ InvokeObjectPostCreateHook(PublicationRelRelationId,
+ obj.objectId, 0);
+ }
+ }
+}
+
+/*
+ * Remove listed tables from the publication.
+ */
+static void
+PublicationDropTables(Oid pubid, List *rels, bool missing_ok)
+{
+ ObjectAddress obj;
+ ListCell *lc;
+ Oid prid;
+
+ foreach(lc, rels)
+ {
+ PublicationRelInfo *pubrel = (PublicationRelInfo *) lfirst(lc);
+ Relation rel = pubrel->relation;
+ Oid relid = RelationGetRelid(rel);
+
+ if (pubrel->columns)
+ ereport(ERROR,
+ errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("column list must not be specified in ALTER PUBLICATION ... DROP"));
+
+ prid = GetSysCacheOid2(PUBLICATIONRELMAP, Anum_pg_publication_rel_oid,
+ ObjectIdGetDatum(relid),
+ ObjectIdGetDatum(pubid));
+ if (!OidIsValid(prid))
+ {
+ if (missing_ok)
+ continue;
+
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("relation \"%s\" is not part of the publication",
+ RelationGetRelationName(rel))));
+ }
+
+ if (pubrel->whereClause)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot use a WHERE clause when removing a table from a publication")));
+
+ ObjectAddressSet(obj, PublicationRelRelationId, prid);
+ performDeletion(&obj, DROP_CASCADE, 0);
+ }
+}
+
+/*
+ * Add listed schemas to the publication.
+ */
+static void
+PublicationAddSchemas(Oid pubid, List *schemas, bool if_not_exists,
+ AlterPublicationStmt *stmt)
+{
+ ListCell *lc;
+
+ Assert(!stmt || !stmt->for_all_tables);
+
+ foreach(lc, schemas)
+ {
+ Oid schemaid = lfirst_oid(lc);
+ ObjectAddress obj;
+
+ obj = publication_add_schema(pubid, schemaid, if_not_exists);
+ if (stmt)
+ {
+ EventTriggerCollectSimpleCommand(obj, InvalidObjectAddress,
+ (Node *) stmt);
+
+ InvokeObjectPostCreateHook(PublicationNamespaceRelationId,
+ obj.objectId, 0);
+ }
+ }
+}
+
+/*
+ * Remove listed schemas from the publication.
+ */
+static void
+PublicationDropSchemas(Oid pubid, List *schemas, bool missing_ok)
+{
+ ObjectAddress obj;
+ ListCell *lc;
+ Oid psid;
+
+ foreach(lc, schemas)
+ {
+ Oid schemaid = lfirst_oid(lc);
+
+ psid = GetSysCacheOid2(PUBLICATIONNAMESPACEMAP,
+ Anum_pg_publication_namespace_oid,
+ ObjectIdGetDatum(schemaid),
+ ObjectIdGetDatum(pubid));
+ if (!OidIsValid(psid))
+ {
+ if (missing_ok)
+ continue;
+
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tables from schema \"%s\" are not part of the publication",
+ get_namespace_name(schemaid))));
+ }
+
+ ObjectAddressSet(obj, PublicationNamespaceRelationId, psid);
+ performDeletion(&obj, DROP_CASCADE, 0);
+ }
+}
+
+/*
+ * Internal workhorse for changing a publication owner
+ */
+static void
+AlterPublicationOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId)
+{
+ Form_pg_publication form;
+
+ form = (Form_pg_publication) GETSTRUCT(tup);
+
+ if (form->pubowner == newOwnerId)
+ return;
+
+ if (!superuser())
+ {
+ AclResult aclresult;
+
+ /* Must be owner */
+ if (!pg_publication_ownercheck(form->oid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_PUBLICATION,
+ NameStr(form->pubname));
+
+ /* Must be able to become new owner */
+ check_is_member_of_role(GetUserId(), newOwnerId);
+
+ /* New owner must have CREATE privilege on database */
+ aclresult = pg_database_aclcheck(MyDatabaseId, newOwnerId, ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_DATABASE,
+ get_database_name(MyDatabaseId));
+
+ if (form->puballtables && !superuser_arg(newOwnerId))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to change owner of publication \"%s\"",
+ NameStr(form->pubname)),
+ errhint("The owner of a FOR ALL TABLES publication must be a superuser.")));
+
+ if (!superuser_arg(newOwnerId) && is_schema_publication(form->oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to change owner of publication \"%s\"",
+ NameStr(form->pubname)),
+ errhint("The owner of a FOR TABLES IN SCHEMA publication must be a superuser.")));
+ }
+
+ form->pubowner = newOwnerId;
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+ /* Update owner dependency reference */
+ changeDependencyOnOwner(PublicationRelationId,
+ form->oid,
+ newOwnerId);
+
+ InvokeObjectPostAlterHook(PublicationRelationId,
+ form->oid, 0);
+}
+
+/*
+ * Change publication owner -- by name
+ */
+ObjectAddress
+AlterPublicationOwner(const char *name, Oid newOwnerId)
+{
+ Oid subid;
+ HeapTuple tup;
+ Relation rel;
+ ObjectAddress address;
+ Form_pg_publication pubform;
+
+ rel = table_open(PublicationRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(PUBLICATIONNAME, CStringGetDatum(name));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("publication \"%s\" does not exist", name)));
+
+ pubform = (Form_pg_publication) GETSTRUCT(tup);
+ subid = pubform->oid;
+
+ AlterPublicationOwner_internal(rel, tup, newOwnerId);
+
+ ObjectAddressSet(address, PublicationRelationId, subid);
+
+ heap_freetuple(tup);
+
+ table_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * Change publication owner -- by OID
+ */
+void
+AlterPublicationOwner_oid(Oid subid, Oid newOwnerId)
+{
+ HeapTuple tup;
+ Relation rel;
+
+ rel = table_open(PublicationRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(PUBLICATIONOID, ObjectIdGetDatum(subid));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("publication with OID %u does not exist", subid)));
+
+ AlterPublicationOwner_internal(rel, tup, newOwnerId);
+
+ heap_freetuple(tup);
+
+ table_close(rel, RowExclusiveLock);
+}
diff --git a/src/backend/commands/schemacmds.c b/src/backend/commands/schemacmds.c
new file mode 100644
index 0000000..1a9132c
--- /dev/null
+++ b/src/backend/commands/schemacmds.c
@@ -0,0 +1,441 @@
+/*-------------------------------------------------------------------------
+ *
+ * schemacmds.c
+ * schema creation/manipulation commands
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/schemacmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_namespace.h"
+#include "commands/dbcommands.h"
+#include "commands/event_trigger.h"
+#include "commands/schemacmds.h"
+#include "miscadmin.h"
+#include "parser/parse_utilcmd.h"
+#include "parser/scansup.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+static void AlterSchemaOwner_internal(HeapTuple tup, Relation rel, Oid newOwnerId);
+
+/*
+ * CREATE SCHEMA
+ *
+ * Note: caller should pass in location information for the whole
+ * CREATE SCHEMA statement, which in turn we pass down as the location
+ * of the component commands. This comports with our general plan of
+ * reporting location/len for the whole command even when executing
+ * a subquery.
+ */
+Oid
+CreateSchemaCommand(CreateSchemaStmt *stmt, const char *queryString,
+ int stmt_location, int stmt_len)
+{
+ const char *schemaName = stmt->schemaname;
+ Oid namespaceId;
+ List *parsetree_list;
+ ListCell *parsetree_item;
+ Oid owner_uid;
+ Oid saved_uid;
+ int save_sec_context;
+ int save_nestlevel;
+ char *nsp = namespace_search_path;
+ AclResult aclresult;
+ ObjectAddress address;
+ StringInfoData pathbuf;
+
+ GetUserIdAndSecContext(&saved_uid, &save_sec_context);
+
+ /*
+ * Who is supposed to own the new schema?
+ */
+ if (stmt->authrole)
+ owner_uid = get_rolespec_oid(stmt->authrole, false);
+ else
+ owner_uid = saved_uid;
+
+ /* fill schema name with the user name if not specified */
+ if (!schemaName)
+ {
+ HeapTuple tuple;
+
+ tuple = SearchSysCache1(AUTHOID, ObjectIdGetDatum(owner_uid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for role %u", owner_uid);
+ schemaName =
+ pstrdup(NameStr(((Form_pg_authid) GETSTRUCT(tuple))->rolname));
+ ReleaseSysCache(tuple);
+ }
+
+ /*
+ * To create a schema, must have schema-create privilege on the current
+ * database and must be able to become the target role (this does not
+ * imply that the target role itself must have create-schema privilege).
+ * The latter provision guards against "giveaway" attacks. Note that a
+ * superuser will always have both of these privileges a fortiori.
+ */
+ aclresult = pg_database_aclcheck(MyDatabaseId, saved_uid, ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_DATABASE,
+ get_database_name(MyDatabaseId));
+
+ check_is_member_of_role(saved_uid, owner_uid);
+
+ /* Additional check to protect reserved schema names */
+ if (!allowSystemTableMods && IsReservedName(schemaName))
+ ereport(ERROR,
+ (errcode(ERRCODE_RESERVED_NAME),
+ errmsg("unacceptable schema name \"%s\"", schemaName),
+ errdetail("The prefix \"pg_\" is reserved for system schemas.")));
+
+ /*
+ * If if_not_exists was given and the schema already exists, bail out.
+ * (Note: we needn't check this when not if_not_exists, because
+ * NamespaceCreate will complain anyway.) We could do this before making
+ * the permissions checks, but since CREATE TABLE IF NOT EXISTS makes its
+ * creation-permission check first, we do likewise.
+ */
+ if (stmt->if_not_exists)
+ {
+ namespaceId = get_namespace_oid(schemaName, true);
+ if (OidIsValid(namespaceId))
+ {
+ /*
+ * If we are in an extension script, insist that the pre-existing
+ * object be a member of the extension, to avoid security risks.
+ */
+ ObjectAddressSet(address, NamespaceRelationId, namespaceId);
+ checkMembershipInCurrentExtension(&address);
+
+ /* OK to skip */
+ ereport(NOTICE,
+ (errcode(ERRCODE_DUPLICATE_SCHEMA),
+ errmsg("schema \"%s\" already exists, skipping",
+ schemaName)));
+ return InvalidOid;
+ }
+ }
+
+ /*
+ * If the requested authorization is different from the current user,
+ * temporarily set the current user so that the object(s) will be created
+ * with the correct ownership.
+ *
+ * (The setting will be restored at the end of this routine, or in case of
+ * error, transaction abort will clean things up.)
+ */
+ if (saved_uid != owner_uid)
+ SetUserIdAndSecContext(owner_uid,
+ save_sec_context | SECURITY_LOCAL_USERID_CHANGE);
+
+ /* Create the schema's namespace */
+ namespaceId = NamespaceCreate(schemaName, owner_uid, false);
+
+ /* Advance cmd counter to make the namespace visible */
+ CommandCounterIncrement();
+
+ /*
+ * Prepend the new schema to the current search path.
+ *
+ * We use the equivalent of a function SET option to allow the setting to
+ * persist for exactly the duration of the schema creation. guc.c also
+ * takes care of undoing the setting on error.
+ */
+ save_nestlevel = NewGUCNestLevel();
+
+ initStringInfo(&pathbuf);
+ appendStringInfoString(&pathbuf, quote_identifier(schemaName));
+
+ while (scanner_isspace(*nsp))
+ nsp++;
+
+ if (*nsp != '\0')
+ appendStringInfo(&pathbuf, ", %s", nsp);
+
+ (void) set_config_option("search_path", pathbuf.data,
+ PGC_USERSET, PGC_S_SESSION,
+ GUC_ACTION_SAVE, true, 0, false);
+
+ /*
+ * Report the new schema to possibly interested event triggers. Note we
+ * must do this here and not in ProcessUtilitySlow because otherwise the
+ * objects created below are reported before the schema, which would be
+ * wrong.
+ */
+ ObjectAddressSet(address, NamespaceRelationId, namespaceId);
+ EventTriggerCollectSimpleCommand(address, InvalidObjectAddress,
+ (Node *) stmt);
+
+ /*
+ * Examine the list of commands embedded in the CREATE SCHEMA command, and
+ * reorganize them into a sequentially executable order with no forward
+ * references. Note that the result is still a list of raw parsetrees ---
+ * we cannot, in general, run parse analysis on one statement until we
+ * have actually executed the prior ones.
+ */
+ parsetree_list = transformCreateSchemaStmtElements(stmt->schemaElts,
+ schemaName);
+
+ /*
+ * Execute each command contained in the CREATE SCHEMA. Since the grammar
+ * allows only utility commands in CREATE SCHEMA, there is no need to pass
+ * them through parse_analyze_*() or the rewriter; we can just hand them
+ * straight to ProcessUtility.
+ */
+ foreach(parsetree_item, parsetree_list)
+ {
+ Node *stmt = (Node *) lfirst(parsetree_item);
+ PlannedStmt *wrapper;
+
+ /* need to make a wrapper PlannedStmt */
+ wrapper = makeNode(PlannedStmt);
+ wrapper->commandType = CMD_UTILITY;
+ wrapper->canSetTag = false;
+ wrapper->utilityStmt = stmt;
+ wrapper->stmt_location = stmt_location;
+ wrapper->stmt_len = stmt_len;
+
+ /* do this step */
+ ProcessUtility(wrapper,
+ queryString,
+ false,
+ PROCESS_UTILITY_SUBCOMMAND,
+ NULL,
+ NULL,
+ None_Receiver,
+ NULL);
+
+ /* make sure later steps can see the object created here */
+ CommandCounterIncrement();
+ }
+
+ /*
+ * Restore the GUC variable search_path we set above.
+ */
+ AtEOXact_GUC(true, save_nestlevel);
+
+ /* Reset current user and security context */
+ SetUserIdAndSecContext(saved_uid, save_sec_context);
+
+ return namespaceId;
+}
+
+
+/*
+ * Rename schema
+ */
+ObjectAddress
+RenameSchema(const char *oldname, const char *newname)
+{
+ Oid nspOid;
+ HeapTuple tup;
+ Relation rel;
+ AclResult aclresult;
+ ObjectAddress address;
+ Form_pg_namespace nspform;
+
+ rel = table_open(NamespaceRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(NAMESPACENAME, CStringGetDatum(oldname));
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_SCHEMA),
+ errmsg("schema \"%s\" does not exist", oldname)));
+
+ nspform = (Form_pg_namespace) GETSTRUCT(tup);
+ nspOid = nspform->oid;
+
+ /* make sure the new name doesn't exist */
+ if (OidIsValid(get_namespace_oid(newname, true)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_SCHEMA),
+ errmsg("schema \"%s\" already exists", newname)));
+
+ /* must be owner */
+ if (!pg_namespace_ownercheck(nspOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA,
+ oldname);
+
+ /* must have CREATE privilege on database */
+ aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_DATABASE,
+ get_database_name(MyDatabaseId));
+
+ if (!allowSystemTableMods && IsReservedName(newname))
+ ereport(ERROR,
+ (errcode(ERRCODE_RESERVED_NAME),
+ errmsg("unacceptable schema name \"%s\"", newname),
+ errdetail("The prefix \"pg_\" is reserved for system schemas.")));
+
+ /* rename */
+ namestrcpy(&nspform->nspname, newname);
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+ InvokeObjectPostAlterHook(NamespaceRelationId, nspOid, 0);
+
+ ObjectAddressSet(address, NamespaceRelationId, nspOid);
+
+ table_close(rel, NoLock);
+ heap_freetuple(tup);
+
+ return address;
+}
+
+void
+AlterSchemaOwner_oid(Oid oid, Oid newOwnerId)
+{
+ HeapTuple tup;
+ Relation rel;
+
+ rel = table_open(NamespaceRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(NAMESPACEOID, ObjectIdGetDatum(oid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for schema %u", oid);
+
+ AlterSchemaOwner_internal(tup, rel, newOwnerId);
+
+ ReleaseSysCache(tup);
+
+ table_close(rel, RowExclusiveLock);
+}
+
+
+/*
+ * Change schema owner
+ */
+ObjectAddress
+AlterSchemaOwner(const char *name, Oid newOwnerId)
+{
+ Oid nspOid;
+ HeapTuple tup;
+ Relation rel;
+ ObjectAddress address;
+ Form_pg_namespace nspform;
+
+ rel = table_open(NamespaceRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(NAMESPACENAME, CStringGetDatum(name));
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_SCHEMA),
+ errmsg("schema \"%s\" does not exist", name)));
+
+ nspform = (Form_pg_namespace) GETSTRUCT(tup);
+ nspOid = nspform->oid;
+
+ AlterSchemaOwner_internal(tup, rel, newOwnerId);
+
+ ObjectAddressSet(address, NamespaceRelationId, nspOid);
+
+ ReleaseSysCache(tup);
+
+ table_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+static void
+AlterSchemaOwner_internal(HeapTuple tup, Relation rel, Oid newOwnerId)
+{
+ Form_pg_namespace nspForm;
+
+ Assert(tup->t_tableOid == NamespaceRelationId);
+ Assert(RelationGetRelid(rel) == NamespaceRelationId);
+
+ nspForm = (Form_pg_namespace) GETSTRUCT(tup);
+
+ /*
+ * If the new owner is the same as the existing owner, consider the
+ * command to have succeeded. This is for dump restoration purposes.
+ */
+ if (nspForm->nspowner != newOwnerId)
+ {
+ Datum repl_val[Natts_pg_namespace];
+ bool repl_null[Natts_pg_namespace];
+ bool repl_repl[Natts_pg_namespace];
+ Acl *newAcl;
+ Datum aclDatum;
+ bool isNull;
+ HeapTuple newtuple;
+ AclResult aclresult;
+
+ /* Otherwise, must be owner of the existing object */
+ if (!pg_namespace_ownercheck(nspForm->oid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA,
+ NameStr(nspForm->nspname));
+
+ /* Must be able to become new owner */
+ check_is_member_of_role(GetUserId(), newOwnerId);
+
+ /*
+ * must have create-schema rights
+ *
+ * NOTE: This is different from other alter-owner checks in that the
+ * current user is checked for create privileges instead of the
+ * destination owner. This is consistent with the CREATE case for
+ * schemas. Because superusers will always have this right, we need
+ * no special case for them.
+ */
+ aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(),
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_DATABASE,
+ get_database_name(MyDatabaseId));
+
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ repl_repl[Anum_pg_namespace_nspowner - 1] = true;
+ repl_val[Anum_pg_namespace_nspowner - 1] = ObjectIdGetDatum(newOwnerId);
+
+ /*
+ * Determine the modified ACL for the new owner. This is only
+ * necessary when the ACL is non-null.
+ */
+ aclDatum = SysCacheGetAttr(NAMESPACENAME, tup,
+ Anum_pg_namespace_nspacl,
+ &isNull);
+ if (!isNull)
+ {
+ newAcl = aclnewowner(DatumGetAclP(aclDatum),
+ nspForm->nspowner, newOwnerId);
+ repl_repl[Anum_pg_namespace_nspacl - 1] = true;
+ repl_val[Anum_pg_namespace_nspacl - 1] = PointerGetDatum(newAcl);
+ }
+
+ newtuple = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, repl_null, repl_repl);
+
+ CatalogTupleUpdate(rel, &newtuple->t_self, newtuple);
+
+ heap_freetuple(newtuple);
+
+ /* Update owner dependency reference */
+ changeDependencyOnOwner(NamespaceRelationId, nspForm->oid,
+ newOwnerId);
+ }
+
+ InvokeObjectPostAlterHook(NamespaceRelationId,
+ nspForm->oid, 0);
+}
diff --git a/src/backend/commands/seclabel.c b/src/backend/commands/seclabel.c
new file mode 100644
index 0000000..7ae19b9
--- /dev/null
+++ b/src/backend/commands/seclabel.c
@@ -0,0 +1,581 @@
+/* -------------------------------------------------------------------------
+ *
+ * seclabel.c
+ * routines to support security label feature.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * -------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/relation.h"
+#include "access/table.h"
+#include "catalog/catalog.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_seclabel.h"
+#include "catalog/pg_shseclabel.h"
+#include "commands/seclabel.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+typedef struct
+{
+ const char *provider_name;
+ check_object_relabel_type hook;
+} LabelProvider;
+
+static List *label_provider_list = NIL;
+
+static bool
+SecLabelSupportsObjectType(ObjectType objtype)
+{
+ switch (objtype)
+ {
+ case OBJECT_AGGREGATE:
+ case OBJECT_COLUMN:
+ case OBJECT_DATABASE:
+ case OBJECT_DOMAIN:
+ case OBJECT_EVENT_TRIGGER:
+ case OBJECT_FOREIGN_TABLE:
+ case OBJECT_FUNCTION:
+ case OBJECT_LANGUAGE:
+ case OBJECT_LARGEOBJECT:
+ case OBJECT_MATVIEW:
+ case OBJECT_PROCEDURE:
+ case OBJECT_PUBLICATION:
+ case OBJECT_ROLE:
+ case OBJECT_ROUTINE:
+ case OBJECT_SCHEMA:
+ case OBJECT_SEQUENCE:
+ case OBJECT_SUBSCRIPTION:
+ case OBJECT_TABLE:
+ case OBJECT_TABLESPACE:
+ case OBJECT_TYPE:
+ case OBJECT_VIEW:
+ return true;
+
+ case OBJECT_ACCESS_METHOD:
+ case OBJECT_AMOP:
+ case OBJECT_AMPROC:
+ case OBJECT_ATTRIBUTE:
+ case OBJECT_CAST:
+ case OBJECT_COLLATION:
+ case OBJECT_CONVERSION:
+ case OBJECT_DEFAULT:
+ case OBJECT_DEFACL:
+ case OBJECT_DOMCONSTRAINT:
+ case OBJECT_EXTENSION:
+ case OBJECT_FDW:
+ case OBJECT_FOREIGN_SERVER:
+ case OBJECT_INDEX:
+ case OBJECT_OPCLASS:
+ case OBJECT_OPERATOR:
+ case OBJECT_OPFAMILY:
+ case OBJECT_PARAMETER_ACL:
+ case OBJECT_POLICY:
+ case OBJECT_PUBLICATION_NAMESPACE:
+ case OBJECT_PUBLICATION_REL:
+ case OBJECT_RULE:
+ case OBJECT_STATISTIC_EXT:
+ case OBJECT_TABCONSTRAINT:
+ case OBJECT_TRANSFORM:
+ case OBJECT_TRIGGER:
+ case OBJECT_TSCONFIGURATION:
+ case OBJECT_TSDICTIONARY:
+ case OBJECT_TSPARSER:
+ case OBJECT_TSTEMPLATE:
+ case OBJECT_USER_MAPPING:
+ return false;
+
+ /*
+ * There's intentionally no default: case here; we want the
+ * compiler to warn if a new ObjectType hasn't been handled above.
+ */
+ }
+
+ /* Shouldn't get here, but if we do, say "no support" */
+ return false;
+}
+
+/*
+ * ExecSecLabelStmt --
+ *
+ * Apply a security label to a database object.
+ *
+ * Returns the ObjectAddress of the object to which the policy was applied.
+ */
+ObjectAddress
+ExecSecLabelStmt(SecLabelStmt *stmt)
+{
+ LabelProvider *provider = NULL;
+ ObjectAddress address;
+ Relation relation;
+ ListCell *lc;
+
+ /*
+ * Find the named label provider, or if none specified, check whether
+ * there's exactly one, and if so use it.
+ */
+ if (stmt->provider == NULL)
+ {
+ if (label_provider_list == NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("no security label providers have been loaded")));
+ if (list_length(label_provider_list) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("must specify provider when multiple security label providers have been loaded")));
+ provider = (LabelProvider *) linitial(label_provider_list);
+ }
+ else
+ {
+ foreach(lc, label_provider_list)
+ {
+ LabelProvider *lp = lfirst(lc);
+
+ if (strcmp(stmt->provider, lp->provider_name) == 0)
+ {
+ provider = lp;
+ break;
+ }
+ }
+ if (provider == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("security label provider \"%s\" is not loaded",
+ stmt->provider)));
+ }
+
+ if (!SecLabelSupportsObjectType(stmt->objtype))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("security labels are not supported for this type of object")));
+
+ /*
+ * Translate the parser representation which identifies this object into
+ * an ObjectAddress. get_object_address() will throw an error if the
+ * object does not exist, and will also acquire a lock on the target to
+ * guard against concurrent modifications.
+ */
+ address = get_object_address(stmt->objtype, stmt->object,
+ &relation, ShareUpdateExclusiveLock, false);
+
+ /* Require ownership of the target object. */
+ check_object_ownership(GetUserId(), stmt->objtype, address,
+ stmt->object, relation);
+
+ /* Perform other integrity checks as needed. */
+ switch (stmt->objtype)
+ {
+ case OBJECT_COLUMN:
+
+ /*
+ * Allow security labels only on columns of tables, views,
+ * materialized views, composite types, and foreign tables (which
+ * are the only relkinds for which pg_dump will dump labels).
+ */
+ if (relation->rd_rel->relkind != RELKIND_RELATION &&
+ relation->rd_rel->relkind != RELKIND_VIEW &&
+ relation->rd_rel->relkind != RELKIND_MATVIEW &&
+ relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE &&
+ relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+ relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot set security label on relation \"%s\"",
+ RelationGetRelationName(relation)),
+ errdetail_relkind_not_supported(relation->rd_rel->relkind)));
+ break;
+ default:
+ break;
+ }
+
+ /* Provider gets control here, may throw ERROR to veto new label. */
+ provider->hook(&address, stmt->label);
+
+ /* Apply new label. */
+ SetSecurityLabel(&address, provider->provider_name, stmt->label);
+
+ /*
+ * If get_object_address() opened the relation for us, we close it to keep
+ * the reference count correct - but we retain any locks acquired by
+ * get_object_address() until commit time, to guard against concurrent
+ * activity.
+ */
+ if (relation != NULL)
+ relation_close(relation, NoLock);
+
+ return address;
+}
+
+/*
+ * GetSharedSecurityLabel returns the security label for a shared object for
+ * a given provider, or NULL if there is no such label.
+ */
+static char *
+GetSharedSecurityLabel(const ObjectAddress *object, const char *provider)
+{
+ Relation pg_shseclabel;
+ ScanKeyData keys[3];
+ SysScanDesc scan;
+ HeapTuple tuple;
+ Datum datum;
+ bool isnull;
+ char *seclabel = NULL;
+
+ ScanKeyInit(&keys[0],
+ Anum_pg_shseclabel_objoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(object->objectId));
+ ScanKeyInit(&keys[1],
+ Anum_pg_shseclabel_classoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(object->classId));
+ ScanKeyInit(&keys[2],
+ Anum_pg_shseclabel_provider,
+ BTEqualStrategyNumber, F_TEXTEQ,
+ CStringGetTextDatum(provider));
+
+ pg_shseclabel = table_open(SharedSecLabelRelationId, AccessShareLock);
+
+ scan = systable_beginscan(pg_shseclabel, SharedSecLabelObjectIndexId,
+ criticalSharedRelcachesBuilt, NULL, 3, keys);
+
+ tuple = systable_getnext(scan);
+ if (HeapTupleIsValid(tuple))
+ {
+ datum = heap_getattr(tuple, Anum_pg_shseclabel_label,
+ RelationGetDescr(pg_shseclabel), &isnull);
+ if (!isnull)
+ seclabel = TextDatumGetCString(datum);
+ }
+ systable_endscan(scan);
+
+ table_close(pg_shseclabel, AccessShareLock);
+
+ return seclabel;
+}
+
+/*
+ * GetSecurityLabel returns the security label for a shared or database object
+ * for a given provider, or NULL if there is no such label.
+ */
+char *
+GetSecurityLabel(const ObjectAddress *object, const char *provider)
+{
+ Relation pg_seclabel;
+ ScanKeyData keys[4];
+ SysScanDesc scan;
+ HeapTuple tuple;
+ Datum datum;
+ bool isnull;
+ char *seclabel = NULL;
+
+ /* Shared objects have their own security label catalog. */
+ if (IsSharedRelation(object->classId))
+ return GetSharedSecurityLabel(object, provider);
+
+ /* Must be an unshared object, so examine pg_seclabel. */
+ ScanKeyInit(&keys[0],
+ Anum_pg_seclabel_objoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(object->objectId));
+ ScanKeyInit(&keys[1],
+ Anum_pg_seclabel_classoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(object->classId));
+ ScanKeyInit(&keys[2],
+ Anum_pg_seclabel_objsubid,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(object->objectSubId));
+ ScanKeyInit(&keys[3],
+ Anum_pg_seclabel_provider,
+ BTEqualStrategyNumber, F_TEXTEQ,
+ CStringGetTextDatum(provider));
+
+ pg_seclabel = table_open(SecLabelRelationId, AccessShareLock);
+
+ scan = systable_beginscan(pg_seclabel, SecLabelObjectIndexId, true,
+ NULL, 4, keys);
+
+ tuple = systable_getnext(scan);
+ if (HeapTupleIsValid(tuple))
+ {
+ datum = heap_getattr(tuple, Anum_pg_seclabel_label,
+ RelationGetDescr(pg_seclabel), &isnull);
+ if (!isnull)
+ seclabel = TextDatumGetCString(datum);
+ }
+ systable_endscan(scan);
+
+ table_close(pg_seclabel, AccessShareLock);
+
+ return seclabel;
+}
+
+/*
+ * SetSharedSecurityLabel is a helper function of SetSecurityLabel to
+ * handle shared database objects.
+ */
+static void
+SetSharedSecurityLabel(const ObjectAddress *object,
+ const char *provider, const char *label)
+{
+ Relation pg_shseclabel;
+ ScanKeyData keys[4];
+ SysScanDesc scan;
+ HeapTuple oldtup;
+ HeapTuple newtup = NULL;
+ Datum values[Natts_pg_shseclabel];
+ bool nulls[Natts_pg_shseclabel];
+ bool replaces[Natts_pg_shseclabel];
+
+ /* Prepare to form or update a tuple, if necessary. */
+ memset(nulls, false, sizeof(nulls));
+ memset(replaces, false, sizeof(replaces));
+ values[Anum_pg_shseclabel_objoid - 1] = ObjectIdGetDatum(object->objectId);
+ values[Anum_pg_shseclabel_classoid - 1] = ObjectIdGetDatum(object->classId);
+ values[Anum_pg_shseclabel_provider - 1] = CStringGetTextDatum(provider);
+ if (label != NULL)
+ values[Anum_pg_shseclabel_label - 1] = CStringGetTextDatum(label);
+
+ /* Use the index to search for a matching old tuple */
+ ScanKeyInit(&keys[0],
+ Anum_pg_shseclabel_objoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(object->objectId));
+ ScanKeyInit(&keys[1],
+ Anum_pg_shseclabel_classoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(object->classId));
+ ScanKeyInit(&keys[2],
+ Anum_pg_shseclabel_provider,
+ BTEqualStrategyNumber, F_TEXTEQ,
+ CStringGetTextDatum(provider));
+
+ pg_shseclabel = table_open(SharedSecLabelRelationId, RowExclusiveLock);
+
+ scan = systable_beginscan(pg_shseclabel, SharedSecLabelObjectIndexId, true,
+ NULL, 3, keys);
+
+ oldtup = systable_getnext(scan);
+ if (HeapTupleIsValid(oldtup))
+ {
+ if (label == NULL)
+ CatalogTupleDelete(pg_shseclabel, &oldtup->t_self);
+ else
+ {
+ replaces[Anum_pg_shseclabel_label - 1] = true;
+ newtup = heap_modify_tuple(oldtup, RelationGetDescr(pg_shseclabel),
+ values, nulls, replaces);
+ CatalogTupleUpdate(pg_shseclabel, &oldtup->t_self, newtup);
+ }
+ }
+ systable_endscan(scan);
+
+ /* If we didn't find an old tuple, insert a new one */
+ if (newtup == NULL && label != NULL)
+ {
+ newtup = heap_form_tuple(RelationGetDescr(pg_shseclabel),
+ values, nulls);
+ CatalogTupleInsert(pg_shseclabel, newtup);
+ }
+
+ if (newtup != NULL)
+ heap_freetuple(newtup);
+
+ table_close(pg_shseclabel, RowExclusiveLock);
+}
+
+/*
+ * SetSecurityLabel attempts to set the security label for the specified
+ * provider on the specified object to the given value. NULL means that any
+ * existing label should be deleted.
+ */
+void
+SetSecurityLabel(const ObjectAddress *object,
+ const char *provider, const char *label)
+{
+ Relation pg_seclabel;
+ ScanKeyData keys[4];
+ SysScanDesc scan;
+ HeapTuple oldtup;
+ HeapTuple newtup = NULL;
+ Datum values[Natts_pg_seclabel];
+ bool nulls[Natts_pg_seclabel];
+ bool replaces[Natts_pg_seclabel];
+
+ /* Shared objects have their own security label catalog. */
+ if (IsSharedRelation(object->classId))
+ {
+ SetSharedSecurityLabel(object, provider, label);
+ return;
+ }
+
+ /* Prepare to form or update a tuple, if necessary. */
+ memset(nulls, false, sizeof(nulls));
+ memset(replaces, false, sizeof(replaces));
+ values[Anum_pg_seclabel_objoid - 1] = ObjectIdGetDatum(object->objectId);
+ values[Anum_pg_seclabel_classoid - 1] = ObjectIdGetDatum(object->classId);
+ values[Anum_pg_seclabel_objsubid - 1] = Int32GetDatum(object->objectSubId);
+ values[Anum_pg_seclabel_provider - 1] = CStringGetTextDatum(provider);
+ if (label != NULL)
+ values[Anum_pg_seclabel_label - 1] = CStringGetTextDatum(label);
+
+ /* Use the index to search for a matching old tuple */
+ ScanKeyInit(&keys[0],
+ Anum_pg_seclabel_objoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(object->objectId));
+ ScanKeyInit(&keys[1],
+ Anum_pg_seclabel_classoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(object->classId));
+ ScanKeyInit(&keys[2],
+ Anum_pg_seclabel_objsubid,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(object->objectSubId));
+ ScanKeyInit(&keys[3],
+ Anum_pg_seclabel_provider,
+ BTEqualStrategyNumber, F_TEXTEQ,
+ CStringGetTextDatum(provider));
+
+ pg_seclabel = table_open(SecLabelRelationId, RowExclusiveLock);
+
+ scan = systable_beginscan(pg_seclabel, SecLabelObjectIndexId, true,
+ NULL, 4, keys);
+
+ oldtup = systable_getnext(scan);
+ if (HeapTupleIsValid(oldtup))
+ {
+ if (label == NULL)
+ CatalogTupleDelete(pg_seclabel, &oldtup->t_self);
+ else
+ {
+ replaces[Anum_pg_seclabel_label - 1] = true;
+ newtup = heap_modify_tuple(oldtup, RelationGetDescr(pg_seclabel),
+ values, nulls, replaces);
+ CatalogTupleUpdate(pg_seclabel, &oldtup->t_self, newtup);
+ }
+ }
+ systable_endscan(scan);
+
+ /* If we didn't find an old tuple, insert a new one */
+ if (newtup == NULL && label != NULL)
+ {
+ newtup = heap_form_tuple(RelationGetDescr(pg_seclabel),
+ values, nulls);
+ CatalogTupleInsert(pg_seclabel, newtup);
+ }
+
+ /* Update indexes, if necessary */
+ if (newtup != NULL)
+ heap_freetuple(newtup);
+
+ table_close(pg_seclabel, RowExclusiveLock);
+}
+
+/*
+ * DeleteSharedSecurityLabel is a helper function of DeleteSecurityLabel
+ * to handle shared database objects.
+ */
+void
+DeleteSharedSecurityLabel(Oid objectId, Oid classId)
+{
+ Relation pg_shseclabel;
+ ScanKeyData skey[2];
+ SysScanDesc scan;
+ HeapTuple oldtup;
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_shseclabel_objoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(objectId));
+ ScanKeyInit(&skey[1],
+ Anum_pg_shseclabel_classoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(classId));
+
+ pg_shseclabel = table_open(SharedSecLabelRelationId, RowExclusiveLock);
+
+ scan = systable_beginscan(pg_shseclabel, SharedSecLabelObjectIndexId, true,
+ NULL, 2, skey);
+ while (HeapTupleIsValid(oldtup = systable_getnext(scan)))
+ CatalogTupleDelete(pg_shseclabel, &oldtup->t_self);
+ systable_endscan(scan);
+
+ table_close(pg_shseclabel, RowExclusiveLock);
+}
+
+/*
+ * DeleteSecurityLabel removes all security labels for an object (and any
+ * sub-objects, if applicable).
+ */
+void
+DeleteSecurityLabel(const ObjectAddress *object)
+{
+ Relation pg_seclabel;
+ ScanKeyData skey[3];
+ SysScanDesc scan;
+ HeapTuple oldtup;
+ int nkeys;
+
+ /* Shared objects have their own security label catalog. */
+ if (IsSharedRelation(object->classId))
+ {
+ Assert(object->objectSubId == 0);
+ DeleteSharedSecurityLabel(object->objectId, object->classId);
+ return;
+ }
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_seclabel_objoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(object->objectId));
+ ScanKeyInit(&skey[1],
+ Anum_pg_seclabel_classoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(object->classId));
+ if (object->objectSubId != 0)
+ {
+ ScanKeyInit(&skey[2],
+ Anum_pg_seclabel_objsubid,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(object->objectSubId));
+ nkeys = 3;
+ }
+ else
+ nkeys = 2;
+
+ pg_seclabel = table_open(SecLabelRelationId, RowExclusiveLock);
+
+ scan = systable_beginscan(pg_seclabel, SecLabelObjectIndexId, true,
+ NULL, nkeys, skey);
+ while (HeapTupleIsValid(oldtup = systable_getnext(scan)))
+ CatalogTupleDelete(pg_seclabel, &oldtup->t_self);
+ systable_endscan(scan);
+
+ table_close(pg_seclabel, RowExclusiveLock);
+}
+
+void
+register_label_provider(const char *provider_name, check_object_relabel_type hook)
+{
+ LabelProvider *provider;
+ MemoryContext oldcxt;
+
+ oldcxt = MemoryContextSwitchTo(TopMemoryContext);
+ provider = palloc(sizeof(LabelProvider));
+ provider->provider_name = pstrdup(provider_name);
+ provider->hook = hook;
+ label_provider_list = lappend(label_provider_list, provider);
+ MemoryContextSwitchTo(oldcxt);
+}
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
new file mode 100644
index 0000000..acaf660
--- /dev/null
+++ b/src/backend/commands/sequence.c
@@ -0,0 +1,1917 @@
+/*-------------------------------------------------------------------------
+ *
+ * sequence.c
+ * PostgreSQL sequences support code.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/sequence.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/bufmask.h"
+#include "access/htup_details.h"
+#include "access/multixact.h"
+#include "access/relation.h"
+#include "access/table.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "access/xloginsert.h"
+#include "access/xlogutils.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_sequence.h"
+#include "catalog/pg_type.h"
+#include "catalog/storage_xlog.h"
+#include "commands/defrem.h"
+#include "commands/sequence.h"
+#include "commands/tablecmds.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "parser/parse_type.h"
+#include "storage/lmgr.h"
+#include "storage/proc.h"
+#include "storage/smgr.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/resowner.h"
+#include "utils/syscache.h"
+#include "utils/varlena.h"
+
+
+/*
+ * We don't want to log each fetching of a value from a sequence,
+ * so we pre-log a few fetches in advance. In the event of
+ * crash we can lose (skip over) as many values as we pre-logged.
+ */
+#define SEQ_LOG_VALS 32
+
+/*
+ * The "special area" of a sequence's buffer page looks like this.
+ */
+#define SEQ_MAGIC 0x1717
+
+typedef struct sequence_magic
+{
+ uint32 magic;
+} sequence_magic;
+
+/*
+ * We store a SeqTable item for every sequence we have touched in the current
+ * session. This is needed to hold onto nextval/currval state. (We can't
+ * rely on the relcache, since it's only, well, a cache, and may decide to
+ * discard entries.)
+ */
+typedef struct SeqTableData
+{
+ Oid relid; /* pg_class OID of this sequence (hash key) */
+ Oid filenode; /* last seen relfilenode of this sequence */
+ LocalTransactionId lxid; /* xact in which we last did a seq op */
+ bool last_valid; /* do we have a valid "last" value? */
+ int64 last; /* value last returned by nextval */
+ int64 cached; /* last value already cached for nextval */
+ /* if last != cached, we have not used up all the cached values */
+ int64 increment; /* copy of sequence's increment field */
+ /* note that increment is zero until we first do nextval_internal() */
+} SeqTableData;
+
+typedef SeqTableData *SeqTable;
+
+static HTAB *seqhashtab = NULL; /* hash table for SeqTable items */
+
+/*
+ * last_used_seq is updated by nextval() to point to the last used
+ * sequence.
+ */
+static SeqTableData *last_used_seq = NULL;
+
+static void fill_seq_with_data(Relation rel, HeapTuple tuple);
+static void fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum);
+static Relation lock_and_open_sequence(SeqTable seq);
+static void create_seq_hashtable(void);
+static void init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel);
+static Form_pg_sequence_data read_seq_tuple(Relation rel,
+ Buffer *buf, HeapTuple seqdatatuple);
+static void init_params(ParseState *pstate, List *options, bool for_identity,
+ bool isInit,
+ Form_pg_sequence seqform,
+ Form_pg_sequence_data seqdataform,
+ bool *need_seq_rewrite,
+ List **owned_by);
+static void do_setval(Oid relid, int64 next, bool iscalled);
+static void process_owned_by(Relation seqrel, List *owned_by, bool for_identity);
+
+
+/*
+ * DefineSequence
+ * Creates a new sequence relation
+ */
+ObjectAddress
+DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
+{
+ FormData_pg_sequence seqform;
+ FormData_pg_sequence_data seqdataform;
+ bool need_seq_rewrite;
+ List *owned_by;
+ CreateStmt *stmt = makeNode(CreateStmt);
+ Oid seqoid;
+ ObjectAddress address;
+ Relation rel;
+ HeapTuple tuple;
+ TupleDesc tupDesc;
+ Datum value[SEQ_COL_LASTCOL];
+ bool null[SEQ_COL_LASTCOL];
+ Datum pgs_values[Natts_pg_sequence];
+ bool pgs_nulls[Natts_pg_sequence];
+ int i;
+
+ /*
+ * If if_not_exists was given and a relation with the same name already
+ * exists, bail out. (Note: we needn't check this when not if_not_exists,
+ * because DefineRelation will complain anyway.)
+ */
+ if (seq->if_not_exists)
+ {
+ RangeVarGetAndCheckCreationNamespace(seq->sequence, NoLock, &seqoid);
+ if (OidIsValid(seqoid))
+ {
+ /*
+ * If we are in an extension script, insist that the pre-existing
+ * object be a member of the extension, to avoid security risks.
+ */
+ ObjectAddressSet(address, RelationRelationId, seqoid);
+ checkMembershipInCurrentExtension(&address);
+
+ /* OK to skip */
+ ereport(NOTICE,
+ (errcode(ERRCODE_DUPLICATE_TABLE),
+ errmsg("relation \"%s\" already exists, skipping",
+ seq->sequence->relname)));
+ return InvalidObjectAddress;
+ }
+ }
+
+ /* Check and set all option values */
+ init_params(pstate, seq->options, seq->for_identity, true,
+ &seqform, &seqdataform,
+ &need_seq_rewrite, &owned_by);
+
+ /*
+ * Create relation (and fill value[] and null[] for the tuple)
+ */
+ stmt->tableElts = NIL;
+ for (i = SEQ_COL_FIRSTCOL; i <= SEQ_COL_LASTCOL; i++)
+ {
+ ColumnDef *coldef = makeNode(ColumnDef);
+
+ coldef->inhcount = 0;
+ coldef->is_local = true;
+ coldef->is_not_null = true;
+ coldef->is_from_type = false;
+ coldef->storage = 0;
+ coldef->raw_default = NULL;
+ coldef->cooked_default = NULL;
+ coldef->collClause = NULL;
+ coldef->collOid = InvalidOid;
+ coldef->constraints = NIL;
+ coldef->location = -1;
+
+ null[i - 1] = false;
+
+ switch (i)
+ {
+ case SEQ_COL_LASTVAL:
+ coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
+ coldef->colname = "last_value";
+ value[i - 1] = Int64GetDatumFast(seqdataform.last_value);
+ break;
+ case SEQ_COL_LOG:
+ coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
+ coldef->colname = "log_cnt";
+ value[i - 1] = Int64GetDatum((int64) 0);
+ break;
+ case SEQ_COL_CALLED:
+ coldef->typeName = makeTypeNameFromOid(BOOLOID, -1);
+ coldef->colname = "is_called";
+ value[i - 1] = BoolGetDatum(false);
+ break;
+ }
+ stmt->tableElts = lappend(stmt->tableElts, coldef);
+ }
+
+ stmt->relation = seq->sequence;
+ stmt->inhRelations = NIL;
+ stmt->constraints = NIL;
+ stmt->options = NIL;
+ stmt->oncommit = ONCOMMIT_NOOP;
+ stmt->tablespacename = NULL;
+ stmt->if_not_exists = seq->if_not_exists;
+
+ address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL, NULL);
+ seqoid = address.objectId;
+ Assert(seqoid != InvalidOid);
+
+ rel = table_open(seqoid, AccessExclusiveLock);
+ tupDesc = RelationGetDescr(rel);
+
+ /* now initialize the sequence's data */
+ tuple = heap_form_tuple(tupDesc, value, null);
+ fill_seq_with_data(rel, tuple);
+
+ /* process OWNED BY if given */
+ if (owned_by)
+ process_owned_by(rel, owned_by, seq->for_identity);
+
+ table_close(rel, NoLock);
+
+ /* fill in pg_sequence */
+ rel = table_open(SequenceRelationId, RowExclusiveLock);
+ tupDesc = RelationGetDescr(rel);
+
+ memset(pgs_nulls, 0, sizeof(pgs_nulls));
+
+ pgs_values[Anum_pg_sequence_seqrelid - 1] = ObjectIdGetDatum(seqoid);
+ pgs_values[Anum_pg_sequence_seqtypid - 1] = ObjectIdGetDatum(seqform.seqtypid);
+ pgs_values[Anum_pg_sequence_seqstart - 1] = Int64GetDatumFast(seqform.seqstart);
+ pgs_values[Anum_pg_sequence_seqincrement - 1] = Int64GetDatumFast(seqform.seqincrement);
+ pgs_values[Anum_pg_sequence_seqmax - 1] = Int64GetDatumFast(seqform.seqmax);
+ pgs_values[Anum_pg_sequence_seqmin - 1] = Int64GetDatumFast(seqform.seqmin);
+ pgs_values[Anum_pg_sequence_seqcache - 1] = Int64GetDatumFast(seqform.seqcache);
+ pgs_values[Anum_pg_sequence_seqcycle - 1] = BoolGetDatum(seqform.seqcycle);
+
+ tuple = heap_form_tuple(tupDesc, pgs_values, pgs_nulls);
+ CatalogTupleInsert(rel, tuple);
+
+ heap_freetuple(tuple);
+ table_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * Reset a sequence to its initial value.
+ *
+ * The change is made transactionally, so that on failure of the current
+ * transaction, the sequence will be restored to its previous state.
+ * We do that by creating a whole new relfilenode for the sequence; so this
+ * works much like the rewriting forms of ALTER TABLE.
+ *
+ * Caller is assumed to have acquired AccessExclusiveLock on the sequence,
+ * which must not be released until end of transaction. Caller is also
+ * responsible for permissions checking.
+ */
+void
+ResetSequence(Oid seq_relid)
+{
+ Relation seq_rel;
+ SeqTable elm;
+ Form_pg_sequence_data seq;
+ Buffer buf;
+ HeapTupleData seqdatatuple;
+ HeapTuple tuple;
+ HeapTuple pgstuple;
+ Form_pg_sequence pgsform;
+ int64 startv;
+
+ /*
+ * Read the old sequence. This does a bit more work than really
+ * necessary, but it's simple, and we do want to double-check that it's
+ * indeed a sequence.
+ */
+ init_sequence(seq_relid, &elm, &seq_rel);
+ (void) read_seq_tuple(seq_rel, &buf, &seqdatatuple);
+
+ pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(seq_relid));
+ if (!HeapTupleIsValid(pgstuple))
+ elog(ERROR, "cache lookup failed for sequence %u", seq_relid);
+ pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple);
+ startv = pgsform->seqstart;
+ ReleaseSysCache(pgstuple);
+
+ /*
+ * Copy the existing sequence tuple.
+ */
+ tuple = heap_copytuple(&seqdatatuple);
+
+ /* Now we're done with the old page */
+ UnlockReleaseBuffer(buf);
+
+ /*
+ * Modify the copied tuple to execute the restart (compare the RESTART
+ * action in AlterSequence)
+ */
+ seq = (Form_pg_sequence_data) GETSTRUCT(tuple);
+ seq->last_value = startv;
+ seq->is_called = false;
+ seq->log_cnt = 0;
+
+ /*
+ * Create a new storage file for the sequence.
+ */
+ RelationSetNewRelfilenode(seq_rel, seq_rel->rd_rel->relpersistence);
+
+ /*
+ * Ensure sequence's relfrozenxid is at 0, since it won't contain any
+ * unfrozen XIDs. Same with relminmxid, since a sequence will never
+ * contain multixacts.
+ */
+ Assert(seq_rel->rd_rel->relfrozenxid == InvalidTransactionId);
+ Assert(seq_rel->rd_rel->relminmxid == InvalidMultiXactId);
+
+ /*
+ * Insert the modified tuple into the new storage file.
+ */
+ fill_seq_with_data(seq_rel, tuple);
+
+ /* Clear local cache so that we don't think we have cached numbers */
+ /* Note that we do not change the currval() state */
+ elm->cached = elm->last;
+
+ relation_close(seq_rel, NoLock);
+}
+
+/*
+ * Initialize a sequence's relation with the specified tuple as content
+ *
+ * This handles unlogged sequences by writing to both the main and the init
+ * fork as necessary.
+ */
+static void
+fill_seq_with_data(Relation rel, HeapTuple tuple)
+{
+ fill_seq_fork_with_data(rel, tuple, MAIN_FORKNUM);
+
+ if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
+ {
+ SMgrRelation srel;
+
+ srel = smgropen(rel->rd_node, InvalidBackendId);
+ smgrcreate(srel, INIT_FORKNUM, false);
+ log_smgrcreate(&rel->rd_node, INIT_FORKNUM);
+ fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM);
+ FlushRelationBuffers(rel);
+ smgrclose(srel);
+ }
+}
+
+/*
+ * Initialize a sequence's relation fork with the specified tuple as content
+ */
+static void
+fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum)
+{
+ Buffer buf;
+ Page page;
+ sequence_magic *sm;
+ OffsetNumber offnum;
+
+ /* Initialize first page of relation with special magic number */
+
+ buf = ReadBufferExtended(rel, forkNum, P_NEW, RBM_NORMAL, NULL);
+ Assert(BufferGetBlockNumber(buf) == 0);
+
+ page = BufferGetPage(buf);
+
+ PageInit(page, BufferGetPageSize(buf), sizeof(sequence_magic));
+ sm = (sequence_magic *) PageGetSpecialPointer(page);
+ sm->magic = SEQ_MAGIC;
+
+ /* Now insert sequence tuple */
+
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+
+ /*
+ * Since VACUUM does not process sequences, we have to force the tuple to
+ * have xmin = FrozenTransactionId now. Otherwise it would become
+ * invisible to SELECTs after 2G transactions. It is okay to do this
+ * because if the current transaction aborts, no other xact will ever
+ * examine the sequence tuple anyway.
+ */
+ HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId);
+ HeapTupleHeaderSetXminFrozen(tuple->t_data);
+ HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId);
+ HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId);
+ tuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
+ ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber);
+
+ /* check the comment above nextval_internal()'s equivalent call. */
+ if (RelationNeedsWAL(rel))
+ GetTopTransactionId();
+
+ START_CRIT_SECTION();
+
+ MarkBufferDirty(buf);
+
+ offnum = PageAddItem(page, (Item) tuple->t_data, tuple->t_len,
+ InvalidOffsetNumber, false, false);
+ if (offnum != FirstOffsetNumber)
+ elog(ERROR, "failed to add sequence tuple to page");
+
+ /* XLOG stuff */
+ if (RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM)
+ {
+ xl_seq_rec xlrec;
+ XLogRecPtr recptr;
+
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+
+ xlrec.node = rel->rd_node;
+
+ XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+ XLogRegisterData((char *) tuple->t_data, tuple->t_len);
+
+ recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
+
+ PageSetLSN(page, recptr);
+ }
+
+ END_CRIT_SECTION();
+
+ UnlockReleaseBuffer(buf);
+}
+
+/*
+ * AlterSequence
+ *
+ * Modify the definition of a sequence relation
+ */
+ObjectAddress
+AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
+{
+ Oid relid;
+ SeqTable elm;
+ Relation seqrel;
+ Buffer buf;
+ HeapTupleData datatuple;
+ Form_pg_sequence seqform;
+ Form_pg_sequence_data newdataform;
+ bool need_seq_rewrite;
+ List *owned_by;
+ ObjectAddress address;
+ Relation rel;
+ HeapTuple seqtuple;
+ HeapTuple newdatatuple;
+
+ /* Open and lock sequence, and check for ownership along the way. */
+ relid = RangeVarGetRelidExtended(stmt->sequence,
+ ShareRowExclusiveLock,
+ stmt->missing_ok ? RVR_MISSING_OK : 0,
+ RangeVarCallbackOwnsRelation,
+ NULL);
+ if (relid == InvalidOid)
+ {
+ ereport(NOTICE,
+ (errmsg("relation \"%s\" does not exist, skipping",
+ stmt->sequence->relname)));
+ return InvalidObjectAddress;
+ }
+
+ init_sequence(relid, &elm, &seqrel);
+
+ rel = table_open(SequenceRelationId, RowExclusiveLock);
+ seqtuple = SearchSysCacheCopy1(SEQRELID,
+ ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(seqtuple))
+ elog(ERROR, "cache lookup failed for sequence %u",
+ relid);
+
+ seqform = (Form_pg_sequence) GETSTRUCT(seqtuple);
+
+ /* lock page's buffer and read tuple into new sequence structure */
+ (void) read_seq_tuple(seqrel, &buf, &datatuple);
+
+ /* copy the existing sequence data tuple, so it can be modified locally */
+ newdatatuple = heap_copytuple(&datatuple);
+ newdataform = (Form_pg_sequence_data) GETSTRUCT(newdatatuple);
+
+ UnlockReleaseBuffer(buf);
+
+ /* Check and set new values */
+ init_params(pstate, stmt->options, stmt->for_identity, false,
+ seqform, newdataform,
+ &need_seq_rewrite, &owned_by);
+
+ /* Clear local cache so that we don't think we have cached numbers */
+ /* Note that we do not change the currval() state */
+ elm->cached = elm->last;
+
+ /* If needed, rewrite the sequence relation itself */
+ if (need_seq_rewrite)
+ {
+ /* check the comment above nextval_internal()'s equivalent call. */
+ if (RelationNeedsWAL(seqrel))
+ GetTopTransactionId();
+
+ /*
+ * Create a new storage file for the sequence, making the state
+ * changes transactional.
+ */
+ RelationSetNewRelfilenode(seqrel, seqrel->rd_rel->relpersistence);
+
+ /*
+ * Ensure sequence's relfrozenxid is at 0, since it won't contain any
+ * unfrozen XIDs. Same with relminmxid, since a sequence will never
+ * contain multixacts.
+ */
+ Assert(seqrel->rd_rel->relfrozenxid == InvalidTransactionId);
+ Assert(seqrel->rd_rel->relminmxid == InvalidMultiXactId);
+
+ /*
+ * Insert the modified tuple into the new storage file.
+ */
+ fill_seq_with_data(seqrel, newdatatuple);
+ }
+
+ /* process OWNED BY if given */
+ if (owned_by)
+ process_owned_by(seqrel, owned_by, stmt->for_identity);
+
+ /* update the pg_sequence tuple (we could skip this in some cases...) */
+ CatalogTupleUpdate(rel, &seqtuple->t_self, seqtuple);
+
+ InvokeObjectPostAlterHook(RelationRelationId, relid, 0);
+
+ ObjectAddressSet(address, RelationRelationId, relid);
+
+ table_close(rel, RowExclusiveLock);
+ relation_close(seqrel, NoLock);
+
+ return address;
+}
+
+void
+SequenceChangePersistence(Oid relid, char newrelpersistence)
+{
+ SeqTable elm;
+ Relation seqrel;
+ Buffer buf;
+ HeapTupleData seqdatatuple;
+
+ init_sequence(relid, &elm, &seqrel);
+
+ /* check the comment above nextval_internal()'s equivalent call. */
+ if (RelationNeedsWAL(seqrel))
+ GetTopTransactionId();
+
+ (void) read_seq_tuple(seqrel, &buf, &seqdatatuple);
+ RelationSetNewRelfilenode(seqrel, newrelpersistence);
+ fill_seq_with_data(seqrel, &seqdatatuple);
+ UnlockReleaseBuffer(buf);
+
+ relation_close(seqrel, NoLock);
+}
+
+void
+DeleteSequenceTuple(Oid relid)
+{
+ Relation rel;
+ HeapTuple tuple;
+
+ rel = table_open(SequenceRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for sequence %u", relid);
+
+ CatalogTupleDelete(rel, &tuple->t_self);
+
+ ReleaseSysCache(tuple);
+ table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Note: nextval with a text argument is no longer exported as a pg_proc
+ * entry, but we keep it around to ease porting of C code that may have
+ * called the function directly.
+ */
+Datum
+nextval(PG_FUNCTION_ARGS)
+{
+ text *seqin = PG_GETARG_TEXT_PP(0);
+ RangeVar *sequence;
+ Oid relid;
+
+ sequence = makeRangeVarFromNameList(textToQualifiedNameList(seqin));
+
+ /*
+ * XXX: This is not safe in the presence of concurrent DDL, but acquiring
+ * a lock here is more expensive than letting nextval_internal do it,
+ * since the latter maintains a cache that keeps us from hitting the lock
+ * manager more than once per transaction. It's not clear whether the
+ * performance penalty is material in practice, but for now, we do it this
+ * way.
+ */
+ relid = RangeVarGetRelid(sequence, NoLock, false);
+
+ PG_RETURN_INT64(nextval_internal(relid, true));
+}
+
+Datum
+nextval_oid(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+
+ PG_RETURN_INT64(nextval_internal(relid, true));
+}
+
+int64
+nextval_internal(Oid relid, bool check_permissions)
+{
+ SeqTable elm;
+ Relation seqrel;
+ Buffer buf;
+ Page page;
+ HeapTuple pgstuple;
+ Form_pg_sequence pgsform;
+ HeapTupleData seqdatatuple;
+ Form_pg_sequence_data seq;
+ int64 incby,
+ maxv,
+ minv,
+ cache,
+ log,
+ fetch,
+ last;
+ int64 result,
+ next,
+ rescnt = 0;
+ bool cycle;
+ bool logit = false;
+
+ /* open and lock sequence */
+ init_sequence(relid, &elm, &seqrel);
+
+ if (check_permissions &&
+ pg_class_aclcheck(elm->relid, GetUserId(),
+ ACL_USAGE | ACL_UPDATE) != ACLCHECK_OK)
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied for sequence %s",
+ RelationGetRelationName(seqrel))));
+
+ /* read-only transactions may only modify temp sequences */
+ if (!seqrel->rd_islocaltemp)
+ PreventCommandIfReadOnly("nextval()");
+
+ /*
+ * Forbid this during parallel operation because, to make it work, the
+ * cooperating backends would need to share the backend-local cached
+ * sequence information. Currently, we don't support that.
+ */
+ PreventCommandIfParallelMode("nextval()");
+
+ if (elm->last != elm->cached) /* some numbers were cached */
+ {
+ Assert(elm->last_valid);
+ Assert(elm->increment != 0);
+ elm->last += elm->increment;
+ relation_close(seqrel, NoLock);
+ last_used_seq = elm;
+ return elm->last;
+ }
+
+ pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(pgstuple))
+ elog(ERROR, "cache lookup failed for sequence %u", relid);
+ pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple);
+ incby = pgsform->seqincrement;
+ maxv = pgsform->seqmax;
+ minv = pgsform->seqmin;
+ cache = pgsform->seqcache;
+ cycle = pgsform->seqcycle;
+ ReleaseSysCache(pgstuple);
+
+ /* lock page' buffer and read tuple */
+ seq = read_seq_tuple(seqrel, &buf, &seqdatatuple);
+ page = BufferGetPage(buf);
+
+ elm->increment = incby;
+ last = next = result = seq->last_value;
+ fetch = cache;
+ log = seq->log_cnt;
+
+ if (!seq->is_called)
+ {
+ rescnt++; /* return last_value if not is_called */
+ fetch--;
+ }
+
+ /*
+ * Decide whether we should emit a WAL log record. If so, force up the
+ * fetch count to grab SEQ_LOG_VALS more values than we actually need to
+ * cache. (These will then be usable without logging.)
+ *
+ * If this is the first nextval after a checkpoint, we must force a new
+ * WAL record to be written anyway, else replay starting from the
+ * checkpoint would fail to advance the sequence past the logged values.
+ * In this case we may as well fetch extra values.
+ */
+ if (log < fetch || !seq->is_called)
+ {
+ /* forced log to satisfy local demand for values */
+ fetch = log = fetch + SEQ_LOG_VALS;
+ logit = true;
+ }
+ else
+ {
+ XLogRecPtr redoptr = GetRedoRecPtr();
+
+ if (PageGetLSN(page) <= redoptr)
+ {
+ /* last update of seq was before checkpoint */
+ fetch = log = fetch + SEQ_LOG_VALS;
+ logit = true;
+ }
+ }
+
+ while (fetch) /* try to fetch cache [+ log ] numbers */
+ {
+ /*
+ * Check MAXVALUE for ascending sequences and MINVALUE for descending
+ * sequences
+ */
+ if (incby > 0)
+ {
+ /* ascending sequence */
+ if ((maxv >= 0 && next > maxv - incby) ||
+ (maxv < 0 && next + incby > maxv))
+ {
+ if (rescnt > 0)
+ break; /* stop fetching */
+ if (!cycle)
+ ereport(ERROR,
+ (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED),
+ errmsg("nextval: reached maximum value of sequence \"%s\" (%lld)",
+ RelationGetRelationName(seqrel),
+ (long long) maxv)));
+ next = minv;
+ }
+ else
+ next += incby;
+ }
+ else
+ {
+ /* descending sequence */
+ if ((minv < 0 && next < minv - incby) ||
+ (minv >= 0 && next + incby < minv))
+ {
+ if (rescnt > 0)
+ break; /* stop fetching */
+ if (!cycle)
+ ereport(ERROR,
+ (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED),
+ errmsg("nextval: reached minimum value of sequence \"%s\" (%lld)",
+ RelationGetRelationName(seqrel),
+ (long long) minv)));
+ next = maxv;
+ }
+ else
+ next += incby;
+ }
+ fetch--;
+ if (rescnt < cache)
+ {
+ log--;
+ rescnt++;
+ last = next;
+ if (rescnt == 1) /* if it's first result - */
+ result = next; /* it's what to return */
+ }
+ }
+
+ log -= fetch; /* adjust for any unfetched numbers */
+ Assert(log >= 0);
+
+ /* save info in local cache */
+ elm->last = result; /* last returned number */
+ elm->cached = last; /* last fetched number */
+ elm->last_valid = true;
+
+ last_used_seq = elm;
+
+ /*
+ * If something needs to be WAL logged, acquire an xid, so this
+ * transaction's commit will trigger a WAL flush and wait for syncrep.
+ * It's sufficient to ensure the toplevel transaction has an xid, no need
+ * to assign xids subxacts, that'll already trigger an appropriate wait.
+ * (Have to do that here, so we're outside the critical section)
+ */
+ if (logit && RelationNeedsWAL(seqrel))
+ GetTopTransactionId();
+
+ /* ready to change the on-disk (or really, in-buffer) tuple */
+ START_CRIT_SECTION();
+
+ /*
+ * We must mark the buffer dirty before doing XLogInsert(); see notes in
+ * SyncOneBuffer(). However, we don't apply the desired changes just yet.
+ * This looks like a violation of the buffer update protocol, but it is in
+ * fact safe because we hold exclusive lock on the buffer. Any other
+ * process, including a checkpoint, that tries to examine the buffer
+ * contents will block until we release the lock, and then will see the
+ * final state that we install below.
+ */
+ MarkBufferDirty(buf);
+
+ /* XLOG stuff */
+ if (logit && RelationNeedsWAL(seqrel))
+ {
+ xl_seq_rec xlrec;
+ XLogRecPtr recptr;
+
+ /*
+ * We don't log the current state of the tuple, but rather the state
+ * as it would appear after "log" more fetches. This lets us skip
+ * that many future WAL records, at the cost that we lose those
+ * sequence values if we crash.
+ */
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+
+ /* set values that will be saved in xlog */
+ seq->last_value = next;
+ seq->is_called = true;
+ seq->log_cnt = 0;
+
+ xlrec.node = seqrel->rd_node;
+
+ XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+ XLogRegisterData((char *) seqdatatuple.t_data, seqdatatuple.t_len);
+
+ recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
+
+ PageSetLSN(page, recptr);
+ }
+
+ /* Now update sequence tuple to the intended final state */
+ seq->last_value = last; /* last fetched number */
+ seq->is_called = true;
+ seq->log_cnt = log; /* how much is logged */
+
+ END_CRIT_SECTION();
+
+ UnlockReleaseBuffer(buf);
+
+ relation_close(seqrel, NoLock);
+
+ return result;
+}
+
+Datum
+currval_oid(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ SeqTable elm;
+ Relation seqrel;
+
+ /* open and lock sequence */
+ init_sequence(relid, &elm, &seqrel);
+
+ if (pg_class_aclcheck(elm->relid, GetUserId(),
+ ACL_SELECT | ACL_USAGE) != ACLCHECK_OK)
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied for sequence %s",
+ RelationGetRelationName(seqrel))));
+
+ if (!elm->last_valid)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("currval of sequence \"%s\" is not yet defined in this session",
+ RelationGetRelationName(seqrel))));
+
+ result = elm->last;
+
+ relation_close(seqrel, NoLock);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+lastval(PG_FUNCTION_ARGS)
+{
+ Relation seqrel;
+ int64 result;
+
+ if (last_used_seq == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("lastval is not yet defined in this session")));
+
+ /* Someone may have dropped the sequence since the last nextval() */
+ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(last_used_seq->relid)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("lastval is not yet defined in this session")));
+
+ seqrel = lock_and_open_sequence(last_used_seq);
+
+ /* nextval() must have already been called for this sequence */
+ Assert(last_used_seq->last_valid);
+
+ if (pg_class_aclcheck(last_used_seq->relid, GetUserId(),
+ ACL_SELECT | ACL_USAGE) != ACLCHECK_OK)
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied for sequence %s",
+ RelationGetRelationName(seqrel))));
+
+ result = last_used_seq->last;
+ relation_close(seqrel, NoLock);
+
+ PG_RETURN_INT64(result);
+}
+
+/*
+ * Main internal procedure that handles 2 & 3 arg forms of SETVAL.
+ *
+ * Note that the 3 arg version (which sets the is_called flag) is
+ * only for use in pg_dump, and setting the is_called flag may not
+ * work if multiple users are attached to the database and referencing
+ * the sequence (unlikely if pg_dump is restoring it).
+ *
+ * It is necessary to have the 3 arg version so that pg_dump can
+ * restore the state of a sequence exactly during data-only restores -
+ * it is the only way to clear the is_called flag in an existing
+ * sequence.
+ */
+static void
+do_setval(Oid relid, int64 next, bool iscalled)
+{
+ SeqTable elm;
+ Relation seqrel;
+ Buffer buf;
+ HeapTupleData seqdatatuple;
+ Form_pg_sequence_data seq;
+ HeapTuple pgstuple;
+ Form_pg_sequence pgsform;
+ int64 maxv,
+ minv;
+
+ /* open and lock sequence */
+ init_sequence(relid, &elm, &seqrel);
+
+ if (pg_class_aclcheck(elm->relid, GetUserId(), ACL_UPDATE) != ACLCHECK_OK)
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied for sequence %s",
+ RelationGetRelationName(seqrel))));
+
+ pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(pgstuple))
+ elog(ERROR, "cache lookup failed for sequence %u", relid);
+ pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple);
+ maxv = pgsform->seqmax;
+ minv = pgsform->seqmin;
+ ReleaseSysCache(pgstuple);
+
+ /* read-only transactions may only modify temp sequences */
+ if (!seqrel->rd_islocaltemp)
+ PreventCommandIfReadOnly("setval()");
+
+ /*
+ * Forbid this during parallel operation because, to make it work, the
+ * cooperating backends would need to share the backend-local cached
+ * sequence information. Currently, we don't support that.
+ */
+ PreventCommandIfParallelMode("setval()");
+
+ /* lock page' buffer and read tuple */
+ seq = read_seq_tuple(seqrel, &buf, &seqdatatuple);
+
+ if ((next < minv) || (next > maxv))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("setval: value %lld is out of bounds for sequence \"%s\" (%lld..%lld)",
+ (long long) next, RelationGetRelationName(seqrel),
+ (long long) minv, (long long) maxv)));
+
+ /* Set the currval() state only if iscalled = true */
+ if (iscalled)
+ {
+ elm->last = next; /* last returned number */
+ elm->last_valid = true;
+ }
+
+ /* In any case, forget any future cached numbers */
+ elm->cached = elm->last;
+
+ /* check the comment above nextval_internal()'s equivalent call. */
+ if (RelationNeedsWAL(seqrel))
+ GetTopTransactionId();
+
+ /* ready to change the on-disk (or really, in-buffer) tuple */
+ START_CRIT_SECTION();
+
+ seq->last_value = next; /* last fetched number */
+ seq->is_called = iscalled;
+ seq->log_cnt = 0;
+
+ MarkBufferDirty(buf);
+
+ /* XLOG stuff */
+ if (RelationNeedsWAL(seqrel))
+ {
+ xl_seq_rec xlrec;
+ XLogRecPtr recptr;
+ Page page = BufferGetPage(buf);
+
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+
+ xlrec.node = seqrel->rd_node;
+ XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+ XLogRegisterData((char *) seqdatatuple.t_data, seqdatatuple.t_len);
+
+ recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
+
+ PageSetLSN(page, recptr);
+ }
+
+ END_CRIT_SECTION();
+
+ UnlockReleaseBuffer(buf);
+
+ relation_close(seqrel, NoLock);
+}
+
+/*
+ * Implement the 2 arg setval procedure.
+ * See do_setval for discussion.
+ */
+Datum
+setval_oid(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 next = PG_GETARG_INT64(1);
+
+ do_setval(relid, next, true);
+
+ PG_RETURN_INT64(next);
+}
+
+/*
+ * Implement the 3 arg setval procedure.
+ * See do_setval for discussion.
+ */
+Datum
+setval3_oid(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 next = PG_GETARG_INT64(1);
+ bool iscalled = PG_GETARG_BOOL(2);
+
+ do_setval(relid, next, iscalled);
+
+ PG_RETURN_INT64(next);
+}
+
+
+/*
+ * Open the sequence and acquire lock if needed
+ *
+ * If we haven't touched the sequence already in this transaction,
+ * we need to acquire a lock. We arrange for the lock to
+ * be owned by the top transaction, so that we don't need to do it
+ * more than once per xact.
+ */
+static Relation
+lock_and_open_sequence(SeqTable seq)
+{
+ LocalTransactionId thislxid = MyProc->lxid;
+
+ /* Get the lock if not already held in this xact */
+ if (seq->lxid != thislxid)
+ {
+ ResourceOwner currentOwner;
+
+ currentOwner = CurrentResourceOwner;
+ CurrentResourceOwner = TopTransactionResourceOwner;
+
+ LockRelationOid(seq->relid, RowExclusiveLock);
+
+ CurrentResourceOwner = currentOwner;
+
+ /* Flag that we have a lock in the current xact */
+ seq->lxid = thislxid;
+ }
+
+ /* We now know we have the lock, and can safely open the rel */
+ return relation_open(seq->relid, NoLock);
+}
+
+/*
+ * Creates the hash table for storing sequence data
+ */
+static void
+create_seq_hashtable(void)
+{
+ HASHCTL ctl;
+
+ ctl.keysize = sizeof(Oid);
+ ctl.entrysize = sizeof(SeqTableData);
+
+ seqhashtab = hash_create("Sequence values", 16, &ctl,
+ HASH_ELEM | HASH_BLOBS);
+}
+
+/*
+ * Given a relation OID, open and lock the sequence. p_elm and p_rel are
+ * output parameters.
+ */
+static void
+init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel)
+{
+ SeqTable elm;
+ Relation seqrel;
+ bool found;
+
+ /* Find or create a hash table entry for this sequence */
+ if (seqhashtab == NULL)
+ create_seq_hashtable();
+
+ elm = (SeqTable) hash_search(seqhashtab, &relid, HASH_ENTER, &found);
+
+ /*
+ * Initialize the new hash table entry if it did not exist already.
+ *
+ * NOTE: seqhashtab entries are stored for the life of a backend (unless
+ * explicitly discarded with DISCARD). If the sequence itself is deleted
+ * then the entry becomes wasted memory, but it's small enough that this
+ * should not matter.
+ */
+ if (!found)
+ {
+ /* relid already filled in */
+ elm->filenode = InvalidOid;
+ elm->lxid = InvalidLocalTransactionId;
+ elm->last_valid = false;
+ elm->last = elm->cached = 0;
+ }
+
+ /*
+ * Open the sequence relation.
+ */
+ seqrel = lock_and_open_sequence(elm);
+
+ if (seqrel->rd_rel->relkind != RELKIND_SEQUENCE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a sequence",
+ RelationGetRelationName(seqrel))));
+
+ /*
+ * If the sequence has been transactionally replaced since we last saw it,
+ * discard any cached-but-unissued values. We do not touch the currval()
+ * state, however.
+ */
+ if (seqrel->rd_rel->relfilenode != elm->filenode)
+ {
+ elm->filenode = seqrel->rd_rel->relfilenode;
+ elm->cached = elm->last;
+ }
+
+ /* Return results */
+ *p_elm = elm;
+ *p_rel = seqrel;
+}
+
+
+/*
+ * Given an opened sequence relation, lock the page buffer and find the tuple
+ *
+ * *buf receives the reference to the pinned-and-ex-locked buffer
+ * *seqdatatuple receives the reference to the sequence tuple proper
+ * (this arg should point to a local variable of type HeapTupleData)
+ *
+ * Function's return value points to the data payload of the tuple
+ */
+static Form_pg_sequence_data
+read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple)
+{
+ Page page;
+ ItemId lp;
+ sequence_magic *sm;
+ Form_pg_sequence_data seq;
+
+ *buf = ReadBuffer(rel, 0);
+ LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
+
+ page = BufferGetPage(*buf);
+ sm = (sequence_magic *) PageGetSpecialPointer(page);
+
+ if (sm->magic != SEQ_MAGIC)
+ elog(ERROR, "bad magic number in sequence \"%s\": %08X",
+ RelationGetRelationName(rel), sm->magic);
+
+ lp = PageGetItemId(page, FirstOffsetNumber);
+ Assert(ItemIdIsNormal(lp));
+
+ /* Note we currently only bother to set these two fields of *seqdatatuple */
+ seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ seqdatatuple->t_len = ItemIdGetLength(lp);
+
+ /*
+ * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on
+ * a sequence, which would leave a non-frozen XID in the sequence tuple's
+ * xmax, which eventually leads to clog access failures or worse. If we
+ * see this has happened, clean up after it. We treat this like a hint
+ * bit update, ie, don't bother to WAL-log it, since we can certainly do
+ * this again if the update gets lost.
+ */
+ Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
+ if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId)
+ {
+ HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId);
+ seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED;
+ seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
+ MarkBufferDirtyHint(*buf, true);
+ }
+
+ seq = (Form_pg_sequence_data) GETSTRUCT(seqdatatuple);
+
+ return seq;
+}
+
+/*
+ * init_params: process the options list of CREATE or ALTER SEQUENCE, and
+ * store the values into appropriate fields of seqform, for changes that go
+ * into the pg_sequence catalog, and fields of seqdataform for changes to the
+ * sequence relation itself. Set *need_seq_rewrite to true if we changed any
+ * parameters that require rewriting the sequence's relation (interesting for
+ * ALTER SEQUENCE). Also set *owned_by to any OWNED BY option, or to NIL if
+ * there is none.
+ *
+ * If isInit is true, fill any unspecified options with default values;
+ * otherwise, do not change existing options that aren't explicitly overridden.
+ *
+ * Note: we force a sequence rewrite whenever we change parameters that affect
+ * generation of future sequence values, even if the seqdataform per se is not
+ * changed. This allows ALTER SEQUENCE to behave transactionally. Currently,
+ * the only option that doesn't cause that is OWNED BY. It's *necessary* for
+ * ALTER SEQUENCE OWNED BY to not rewrite the sequence, because that would
+ * break pg_upgrade by causing unwanted changes in the sequence's relfilenode.
+ */
+static void
+init_params(ParseState *pstate, List *options, bool for_identity,
+ bool isInit,
+ Form_pg_sequence seqform,
+ Form_pg_sequence_data seqdataform,
+ bool *need_seq_rewrite,
+ List **owned_by)
+{
+ DefElem *as_type = NULL;
+ DefElem *start_value = NULL;
+ DefElem *restart_value = NULL;
+ DefElem *increment_by = NULL;
+ DefElem *max_value = NULL;
+ DefElem *min_value = NULL;
+ DefElem *cache_value = NULL;
+ DefElem *is_cycled = NULL;
+ ListCell *option;
+ bool reset_max_value = false;
+ bool reset_min_value = false;
+
+ *need_seq_rewrite = false;
+ *owned_by = NIL;
+
+ foreach(option, options)
+ {
+ DefElem *defel = (DefElem *) lfirst(option);
+
+ if (strcmp(defel->defname, "as") == 0)
+ {
+ if (as_type)
+ errorConflictingDefElem(defel, pstate);
+ as_type = defel;
+ *need_seq_rewrite = true;
+ }
+ else if (strcmp(defel->defname, "increment") == 0)
+ {
+ if (increment_by)
+ errorConflictingDefElem(defel, pstate);
+ increment_by = defel;
+ *need_seq_rewrite = true;
+ }
+ else if (strcmp(defel->defname, "start") == 0)
+ {
+ if (start_value)
+ errorConflictingDefElem(defel, pstate);
+ start_value = defel;
+ *need_seq_rewrite = true;
+ }
+ else if (strcmp(defel->defname, "restart") == 0)
+ {
+ if (restart_value)
+ errorConflictingDefElem(defel, pstate);
+ restart_value = defel;
+ *need_seq_rewrite = true;
+ }
+ else if (strcmp(defel->defname, "maxvalue") == 0)
+ {
+ if (max_value)
+ errorConflictingDefElem(defel, pstate);
+ max_value = defel;
+ *need_seq_rewrite = true;
+ }
+ else if (strcmp(defel->defname, "minvalue") == 0)
+ {
+ if (min_value)
+ errorConflictingDefElem(defel, pstate);
+ min_value = defel;
+ *need_seq_rewrite = true;
+ }
+ else if (strcmp(defel->defname, "cache") == 0)
+ {
+ if (cache_value)
+ errorConflictingDefElem(defel, pstate);
+ cache_value = defel;
+ *need_seq_rewrite = true;
+ }
+ else if (strcmp(defel->defname, "cycle") == 0)
+ {
+ if (is_cycled)
+ errorConflictingDefElem(defel, pstate);
+ is_cycled = defel;
+ *need_seq_rewrite = true;
+ }
+ else if (strcmp(defel->defname, "owned_by") == 0)
+ {
+ if (*owned_by)
+ errorConflictingDefElem(defel, pstate);
+ *owned_by = defGetQualifiedName(defel);
+ }
+ else if (strcmp(defel->defname, "sequence_name") == 0)
+ {
+ /*
+ * The parser allows this, but it is only for identity columns, in
+ * which case it is filtered out in parse_utilcmd.c. We only get
+ * here if someone puts it into a CREATE SEQUENCE.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid sequence option SEQUENCE NAME"),
+ parser_errposition(pstate, defel->location)));
+ }
+ else
+ elog(ERROR, "option \"%s\" not recognized",
+ defel->defname);
+ }
+
+ /*
+ * We must reset log_cnt when isInit or when changing any parameters that
+ * would affect future nextval allocations.
+ */
+ if (isInit)
+ seqdataform->log_cnt = 0;
+
+ /* AS type */
+ if (as_type != NULL)
+ {
+ Oid newtypid = typenameTypeId(pstate, defGetTypeName(as_type));
+
+ if (newtypid != INT2OID &&
+ newtypid != INT4OID &&
+ newtypid != INT8OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ for_identity
+ ? errmsg("identity column type must be smallint, integer, or bigint")
+ : errmsg("sequence type must be smallint, integer, or bigint")));
+
+ if (!isInit)
+ {
+ /*
+ * When changing type and the old sequence min/max values were the
+ * min/max of the old type, adjust sequence min/max values to
+ * min/max of new type. (Otherwise, the user chose explicit
+ * min/max values, which we'll leave alone.)
+ */
+ if ((seqform->seqtypid == INT2OID && seqform->seqmax == PG_INT16_MAX) ||
+ (seqform->seqtypid == INT4OID && seqform->seqmax == PG_INT32_MAX) ||
+ (seqform->seqtypid == INT8OID && seqform->seqmax == PG_INT64_MAX))
+ reset_max_value = true;
+ if ((seqform->seqtypid == INT2OID && seqform->seqmin == PG_INT16_MIN) ||
+ (seqform->seqtypid == INT4OID && seqform->seqmin == PG_INT32_MIN) ||
+ (seqform->seqtypid == INT8OID && seqform->seqmin == PG_INT64_MIN))
+ reset_min_value = true;
+ }
+
+ seqform->seqtypid = newtypid;
+ }
+ else if (isInit)
+ {
+ seqform->seqtypid = INT8OID;
+ }
+
+ /* INCREMENT BY */
+ if (increment_by != NULL)
+ {
+ seqform->seqincrement = defGetInt64(increment_by);
+ if (seqform->seqincrement == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("INCREMENT must not be zero")));
+ seqdataform->log_cnt = 0;
+ }
+ else if (isInit)
+ {
+ seqform->seqincrement = 1;
+ }
+
+ /* CYCLE */
+ if (is_cycled != NULL)
+ {
+ seqform->seqcycle = boolVal(is_cycled->arg);
+ Assert(BoolIsValid(seqform->seqcycle));
+ seqdataform->log_cnt = 0;
+ }
+ else if (isInit)
+ {
+ seqform->seqcycle = false;
+ }
+
+ /* MAXVALUE (null arg means NO MAXVALUE) */
+ if (max_value != NULL && max_value->arg)
+ {
+ seqform->seqmax = defGetInt64(max_value);
+ seqdataform->log_cnt = 0;
+ }
+ else if (isInit || max_value != NULL || reset_max_value)
+ {
+ if (seqform->seqincrement > 0 || reset_max_value)
+ {
+ /* ascending seq */
+ if (seqform->seqtypid == INT2OID)
+ seqform->seqmax = PG_INT16_MAX;
+ else if (seqform->seqtypid == INT4OID)
+ seqform->seqmax = PG_INT32_MAX;
+ else
+ seqform->seqmax = PG_INT64_MAX;
+ }
+ else
+ seqform->seqmax = -1; /* descending seq */
+ seqdataform->log_cnt = 0;
+ }
+
+ /* Validate maximum value. No need to check INT8 as seqmax is an int64 */
+ if ((seqform->seqtypid == INT2OID && (seqform->seqmax < PG_INT16_MIN || seqform->seqmax > PG_INT16_MAX))
+ || (seqform->seqtypid == INT4OID && (seqform->seqmax < PG_INT32_MIN || seqform->seqmax > PG_INT32_MAX)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("MAXVALUE (%lld) is out of range for sequence data type %s",
+ (long long) seqform->seqmax,
+ format_type_be(seqform->seqtypid))));
+
+ /* MINVALUE (null arg means NO MINVALUE) */
+ if (min_value != NULL && min_value->arg)
+ {
+ seqform->seqmin = defGetInt64(min_value);
+ seqdataform->log_cnt = 0;
+ }
+ else if (isInit || min_value != NULL || reset_min_value)
+ {
+ if (seqform->seqincrement < 0 || reset_min_value)
+ {
+ /* descending seq */
+ if (seqform->seqtypid == INT2OID)
+ seqform->seqmin = PG_INT16_MIN;
+ else if (seqform->seqtypid == INT4OID)
+ seqform->seqmin = PG_INT32_MIN;
+ else
+ seqform->seqmin = PG_INT64_MIN;
+ }
+ else
+ seqform->seqmin = 1; /* ascending seq */
+ seqdataform->log_cnt = 0;
+ }
+
+ /* Validate minimum value. No need to check INT8 as seqmin is an int64 */
+ if ((seqform->seqtypid == INT2OID && (seqform->seqmin < PG_INT16_MIN || seqform->seqmin > PG_INT16_MAX))
+ || (seqform->seqtypid == INT4OID && (seqform->seqmin < PG_INT32_MIN || seqform->seqmin > PG_INT32_MAX)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("MINVALUE (%lld) is out of range for sequence data type %s",
+ (long long) seqform->seqmin,
+ format_type_be(seqform->seqtypid))));
+
+ /* crosscheck min/max */
+ if (seqform->seqmin >= seqform->seqmax)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("MINVALUE (%lld) must be less than MAXVALUE (%lld)",
+ (long long) seqform->seqmin,
+ (long long) seqform->seqmax)));
+
+ /* START WITH */
+ if (start_value != NULL)
+ {
+ seqform->seqstart = defGetInt64(start_value);
+ }
+ else if (isInit)
+ {
+ if (seqform->seqincrement > 0)
+ seqform->seqstart = seqform->seqmin; /* ascending seq */
+ else
+ seqform->seqstart = seqform->seqmax; /* descending seq */
+ }
+
+ /* crosscheck START */
+ if (seqform->seqstart < seqform->seqmin)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("START value (%lld) cannot be less than MINVALUE (%lld)",
+ (long long) seqform->seqstart,
+ (long long) seqform->seqmin)));
+ if (seqform->seqstart > seqform->seqmax)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("START value (%lld) cannot be greater than MAXVALUE (%lld)",
+ (long long) seqform->seqstart,
+ (long long) seqform->seqmax)));
+
+ /* RESTART [WITH] */
+ if (restart_value != NULL)
+ {
+ if (restart_value->arg != NULL)
+ seqdataform->last_value = defGetInt64(restart_value);
+ else
+ seqdataform->last_value = seqform->seqstart;
+ seqdataform->is_called = false;
+ seqdataform->log_cnt = 0;
+ }
+ else if (isInit)
+ {
+ seqdataform->last_value = seqform->seqstart;
+ seqdataform->is_called = false;
+ }
+
+ /* crosscheck RESTART (or current value, if changing MIN/MAX) */
+ if (seqdataform->last_value < seqform->seqmin)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("RESTART value (%lld) cannot be less than MINVALUE (%lld)",
+ (long long) seqdataform->last_value,
+ (long long) seqform->seqmin)));
+ if (seqdataform->last_value > seqform->seqmax)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("RESTART value (%lld) cannot be greater than MAXVALUE (%lld)",
+ (long long) seqdataform->last_value,
+ (long long) seqform->seqmax)));
+
+ /* CACHE */
+ if (cache_value != NULL)
+ {
+ seqform->seqcache = defGetInt64(cache_value);
+ if (seqform->seqcache <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("CACHE (%lld) must be greater than zero",
+ (long long) seqform->seqcache)));
+ seqdataform->log_cnt = 0;
+ }
+ else if (isInit)
+ {
+ seqform->seqcache = 1;
+ }
+}
+
+/*
+ * Process an OWNED BY option for CREATE/ALTER SEQUENCE
+ *
+ * Ownership permissions on the sequence are already checked,
+ * but if we are establishing a new owned-by dependency, we must
+ * enforce that the referenced table has the same owner and namespace
+ * as the sequence.
+ */
+static void
+process_owned_by(Relation seqrel, List *owned_by, bool for_identity)
+{
+ DependencyType deptype;
+ int nnames;
+ Relation tablerel;
+ AttrNumber attnum;
+
+ deptype = for_identity ? DEPENDENCY_INTERNAL : DEPENDENCY_AUTO;
+
+ nnames = list_length(owned_by);
+ Assert(nnames > 0);
+ if (nnames == 1)
+ {
+ /* Must be OWNED BY NONE */
+ if (strcmp(strVal(linitial(owned_by)), "none") != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid OWNED BY option"),
+ errhint("Specify OWNED BY table.column or OWNED BY NONE.")));
+ tablerel = NULL;
+ attnum = 0;
+ }
+ else
+ {
+ List *relname;
+ char *attrname;
+ RangeVar *rel;
+
+ /* Separate relname and attr name */
+ relname = list_truncate(list_copy(owned_by), nnames - 1);
+ attrname = strVal(llast(owned_by));
+
+ /* Open and lock rel to ensure it won't go away meanwhile */
+ rel = makeRangeVarFromNameList(relname);
+ tablerel = relation_openrv(rel, AccessShareLock);
+
+ /* Must be a regular or foreign table */
+ if (!(tablerel->rd_rel->relkind == RELKIND_RELATION ||
+ tablerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE ||
+ tablerel->rd_rel->relkind == RELKIND_VIEW ||
+ tablerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("sequence cannot be owned by relation \"%s\"",
+ RelationGetRelationName(tablerel)),
+ errdetail_relkind_not_supported(tablerel->rd_rel->relkind)));
+
+ /* We insist on same owner and schema */
+ if (seqrel->rd_rel->relowner != tablerel->rd_rel->relowner)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("sequence must have same owner as table it is linked to")));
+ if (RelationGetNamespace(seqrel) != RelationGetNamespace(tablerel))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("sequence must be in same schema as table it is linked to")));
+
+ /* Now, fetch the attribute number from the system cache */
+ attnum = get_attnum(RelationGetRelid(tablerel), attrname);
+ if (attnum == InvalidAttrNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ attrname, RelationGetRelationName(tablerel))));
+ }
+
+ /*
+ * Catch user explicitly running OWNED BY on identity sequence.
+ */
+ if (deptype == DEPENDENCY_AUTO)
+ {
+ Oid tableId;
+ int32 colId;
+
+ if (sequenceIsOwned(RelationGetRelid(seqrel), DEPENDENCY_INTERNAL, &tableId, &colId))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot change ownership of identity sequence"),
+ errdetail("Sequence \"%s\" is linked to table \"%s\".",
+ RelationGetRelationName(seqrel),
+ get_rel_name(tableId))));
+ }
+
+ /*
+ * OK, we are ready to update pg_depend. First remove any existing
+ * dependencies for the sequence, then optionally add a new one.
+ */
+ deleteDependencyRecordsForClass(RelationRelationId, RelationGetRelid(seqrel),
+ RelationRelationId, deptype);
+
+ if (tablerel)
+ {
+ ObjectAddress refobject,
+ depobject;
+
+ refobject.classId = RelationRelationId;
+ refobject.objectId = RelationGetRelid(tablerel);
+ refobject.objectSubId = attnum;
+ depobject.classId = RelationRelationId;
+ depobject.objectId = RelationGetRelid(seqrel);
+ depobject.objectSubId = 0;
+ recordDependencyOn(&depobject, &refobject, deptype);
+ }
+
+ /* Done, but hold lock until commit */
+ if (tablerel)
+ relation_close(tablerel, NoLock);
+}
+
+
+/*
+ * Return sequence parameters in a list of the form created by the parser.
+ */
+List *
+sequence_options(Oid relid)
+{
+ HeapTuple pgstuple;
+ Form_pg_sequence pgsform;
+ List *options = NIL;
+
+ pgstuple = SearchSysCache1(SEQRELID, relid);
+ if (!HeapTupleIsValid(pgstuple))
+ elog(ERROR, "cache lookup failed for sequence %u", relid);
+ pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple);
+
+ /* Use makeFloat() for 64-bit integers, like gram.y does. */
+ options = lappend(options,
+ makeDefElem("cache", (Node *) makeFloat(psprintf(INT64_FORMAT, pgsform->seqcache)), -1));
+ options = lappend(options,
+ makeDefElem("cycle", (Node *) makeBoolean(pgsform->seqcycle), -1));
+ options = lappend(options,
+ makeDefElem("increment", (Node *) makeFloat(psprintf(INT64_FORMAT, pgsform->seqincrement)), -1));
+ options = lappend(options,
+ makeDefElem("maxvalue", (Node *) makeFloat(psprintf(INT64_FORMAT, pgsform->seqmax)), -1));
+ options = lappend(options,
+ makeDefElem("minvalue", (Node *) makeFloat(psprintf(INT64_FORMAT, pgsform->seqmin)), -1));
+ options = lappend(options,
+ makeDefElem("start", (Node *) makeFloat(psprintf(INT64_FORMAT, pgsform->seqstart)), -1));
+
+ ReleaseSysCache(pgstuple);
+
+ return options;
+}
+
+/*
+ * Return sequence parameters (formerly for use by information schema)
+ */
+Datum
+pg_sequence_parameters(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ TupleDesc tupdesc;
+ Datum values[7];
+ bool isnull[7];
+ HeapTuple pgstuple;
+ Form_pg_sequence pgsform;
+
+ if (pg_class_aclcheck(relid, GetUserId(), ACL_SELECT | ACL_UPDATE | ACL_USAGE) != ACLCHECK_OK)
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied for sequence %s",
+ get_rel_name(relid))));
+
+ tupdesc = CreateTemplateTupleDesc(7);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "start_value",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "minimum_value",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "maximum_value",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "increment",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 5, "cycle_option",
+ BOOLOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 6, "cache_size",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 7, "data_type",
+ OIDOID, -1, 0);
+
+ BlessTupleDesc(tupdesc);
+
+ memset(isnull, 0, sizeof(isnull));
+
+ pgstuple = SearchSysCache1(SEQRELID, relid);
+ if (!HeapTupleIsValid(pgstuple))
+ elog(ERROR, "cache lookup failed for sequence %u", relid);
+ pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple);
+
+ values[0] = Int64GetDatum(pgsform->seqstart);
+ values[1] = Int64GetDatum(pgsform->seqmin);
+ values[2] = Int64GetDatum(pgsform->seqmax);
+ values[3] = Int64GetDatum(pgsform->seqincrement);
+ values[4] = BoolGetDatum(pgsform->seqcycle);
+ values[5] = Int64GetDatum(pgsform->seqcache);
+ values[6] = ObjectIdGetDatum(pgsform->seqtypid);
+
+ ReleaseSysCache(pgstuple);
+
+ return HeapTupleGetDatum(heap_form_tuple(tupdesc, values, isnull));
+}
+
+/*
+ * Return the last value from the sequence
+ *
+ * Note: This has a completely different meaning than lastval().
+ */
+Datum
+pg_sequence_last_value(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ SeqTable elm;
+ Relation seqrel;
+ Buffer buf;
+ HeapTupleData seqtuple;
+ Form_pg_sequence_data seq;
+ bool is_called;
+ int64 result;
+
+ /* open and lock sequence */
+ init_sequence(relid, &elm, &seqrel);
+
+ if (pg_class_aclcheck(relid, GetUserId(), ACL_SELECT | ACL_USAGE) != ACLCHECK_OK)
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied for sequence %s",
+ RelationGetRelationName(seqrel))));
+
+ seq = read_seq_tuple(seqrel, &buf, &seqtuple);
+
+ is_called = seq->is_called;
+ result = seq->last_value;
+
+ UnlockReleaseBuffer(buf);
+ relation_close(seqrel, NoLock);
+
+ if (is_called)
+ PG_RETURN_INT64(result);
+ else
+ PG_RETURN_NULL();
+}
+
+
+void
+seq_redo(XLogReaderState *record)
+{
+ XLogRecPtr lsn = record->EndRecPtr;
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+ Buffer buffer;
+ Page page;
+ Page localpage;
+ char *item;
+ Size itemsz;
+ xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record);
+ sequence_magic *sm;
+
+ if (info != XLOG_SEQ_LOG)
+ elog(PANIC, "seq_redo: unknown op code %u", info);
+
+ buffer = XLogInitBufferForRedo(record, 0);
+ page = (Page) BufferGetPage(buffer);
+
+ /*
+ * We always reinit the page. However, since this WAL record type is also
+ * used for updating sequences, it's possible that a hot-standby backend
+ * is examining the page concurrently; so we mustn't transiently trash the
+ * buffer. The solution is to build the correct new page contents in
+ * local workspace and then memcpy into the buffer. Then only bytes that
+ * are supposed to change will change, even transiently. We must palloc
+ * the local page for alignment reasons.
+ */
+ localpage = (Page) palloc(BufferGetPageSize(buffer));
+
+ PageInit(localpage, BufferGetPageSize(buffer), sizeof(sequence_magic));
+ sm = (sequence_magic *) PageGetSpecialPointer(localpage);
+ sm->magic = SEQ_MAGIC;
+
+ item = (char *) xlrec + sizeof(xl_seq_rec);
+ itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_rec);
+
+ if (PageAddItem(localpage, (Item) item, itemsz,
+ FirstOffsetNumber, false, false) == InvalidOffsetNumber)
+ elog(PANIC, "seq_redo: failed to add item to page");
+
+ PageSetLSN(localpage, lsn);
+
+ memcpy(page, localpage, BufferGetPageSize(buffer));
+ MarkBufferDirty(buffer);
+ UnlockReleaseBuffer(buffer);
+
+ pfree(localpage);
+}
+
+/*
+ * Flush cached sequence information.
+ */
+void
+ResetSequenceCaches(void)
+{
+ if (seqhashtab)
+ {
+ hash_destroy(seqhashtab);
+ seqhashtab = NULL;
+ }
+
+ last_used_seq = NULL;
+}
+
+/*
+ * Mask a Sequence page before performing consistency checks on it.
+ */
+void
+seq_mask(char *page, BlockNumber blkno)
+{
+ mask_page_lsn_and_checksum(page);
+
+ mask_unused_space(page);
+}
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
new file mode 100644
index 0000000..f442d85
--- /dev/null
+++ b/src/backend/commands/statscmds.c
@@ -0,0 +1,898 @@
+/*-------------------------------------------------------------------------
+ *
+ * statscmds.c
+ * Commands for creating and altering extended statistics objects
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/statscmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/relation.h"
+#include "access/relscan.h"
+#include "access/table.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_statistic_ext.h"
+#include "catalog/pg_statistic_ext_data.h"
+#include "commands/comment.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
+#include "statistics/statistics.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+
+static char *ChooseExtendedStatisticName(const char *name1, const char *name2,
+ const char *label, Oid namespaceid);
+static char *ChooseExtendedStatisticNameAddition(List *exprs);
+
+
+/* qsort comparator for the attnums in CreateStatistics */
+static int
+compare_int16(const void *a, const void *b)
+{
+ int av = *(const int16 *) a;
+ int bv = *(const int16 *) b;
+
+ /* this can't overflow if int is wider than int16 */
+ return (av - bv);
+}
+
+/*
+ * CREATE STATISTICS
+ */
+ObjectAddress
+CreateStatistics(CreateStatsStmt *stmt)
+{
+ int16 attnums[STATS_MAX_DIMENSIONS];
+ int nattnums = 0;
+ int numcols;
+ char *namestr;
+ NameData stxname;
+ Oid statoid;
+ Oid namespaceId;
+ Oid stxowner = GetUserId();
+ HeapTuple htup;
+ Datum values[Natts_pg_statistic_ext];
+ bool nulls[Natts_pg_statistic_ext];
+ int2vector *stxkeys;
+ List *stxexprs = NIL;
+ Datum exprsDatum;
+ Relation statrel;
+ Relation rel = NULL;
+ Oid relid;
+ ObjectAddress parentobject,
+ myself;
+ Datum types[4]; /* one for each possible type of statistic */
+ int ntypes;
+ ArrayType *stxkind;
+ bool build_ndistinct;
+ bool build_dependencies;
+ bool build_mcv;
+ bool build_expressions;
+ bool requested_type = false;
+ int i;
+ ListCell *cell;
+ ListCell *cell2;
+
+ Assert(IsA(stmt, CreateStatsStmt));
+
+ /*
+ * Examine the FROM clause. Currently, we only allow it to be a single
+ * simple table, but later we'll probably allow multiple tables and JOIN
+ * syntax. The grammar is already prepared for that, so we have to check
+ * here that what we got is what we can support.
+ */
+ if (list_length(stmt->relations) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("only a single relation is allowed in CREATE STATISTICS")));
+
+ foreach(cell, stmt->relations)
+ {
+ Node *rln = (Node *) lfirst(cell);
+
+ if (!IsA(rln, RangeVar))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("only a single relation is allowed in CREATE STATISTICS")));
+
+ /*
+ * CREATE STATISTICS will influence future execution plans but does
+ * not interfere with currently executing plans. So it should be
+ * enough to take only ShareUpdateExclusiveLock on relation,
+ * conflicting with ANALYZE and other DDL that sets statistical
+ * information, but not with normal queries.
+ */
+ rel = relation_openrv((RangeVar *) rln, ShareUpdateExclusiveLock);
+
+ /* Restrict to allowed relation types */
+ if (rel->rd_rel->relkind != RELKIND_RELATION &&
+ rel->rd_rel->relkind != RELKIND_MATVIEW &&
+ rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot define statistics for relation \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_relkind_not_supported(rel->rd_rel->relkind)));
+
+ /* You must own the relation to create stats on it */
+ if (!pg_class_ownercheck(RelationGetRelid(rel), stxowner))
+ aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind),
+ RelationGetRelationName(rel));
+
+ /* Creating statistics on system catalogs is not allowed */
+ if (!allowSystemTableMods && IsSystemRelation(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied: \"%s\" is a system catalog",
+ RelationGetRelationName(rel))));
+ }
+
+ Assert(rel);
+ relid = RelationGetRelid(rel);
+
+ /*
+ * If the node has a name, split it up and determine creation namespace.
+ * If not (a possibility not considered by the grammar, but one which can
+ * occur via the "CREATE TABLE ... (LIKE)" command), then we put the
+ * object in the same namespace as the relation, and cons up a name for
+ * it.
+ */
+ if (stmt->defnames)
+ namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames,
+ &namestr);
+ else
+ {
+ namespaceId = RelationGetNamespace(rel);
+ namestr = ChooseExtendedStatisticName(RelationGetRelationName(rel),
+ ChooseExtendedStatisticNameAddition(stmt->exprs),
+ "stat",
+ namespaceId);
+ }
+ namestrcpy(&stxname, namestr);
+
+ /*
+ * Deal with the possibility that the statistics object already exists.
+ */
+ if (SearchSysCacheExists2(STATEXTNAMENSP,
+ CStringGetDatum(namestr),
+ ObjectIdGetDatum(namespaceId)))
+ {
+ if (stmt->if_not_exists)
+ {
+ /*
+ * Since stats objects aren't members of extensions (see comments
+ * below), no need for checkMembershipInCurrentExtension here.
+ */
+ ereport(NOTICE,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("statistics object \"%s\" already exists, skipping",
+ namestr)));
+ relation_close(rel, NoLock);
+ return InvalidObjectAddress;
+ }
+
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("statistics object \"%s\" already exists", namestr)));
+ }
+
+ /*
+ * Make sure no more than STATS_MAX_DIMENSIONS columns are used. There
+ * might be duplicates and so on, but we'll deal with those later.
+ */
+ numcols = list_length(stmt->exprs);
+ if (numcols > STATS_MAX_DIMENSIONS)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_COLUMNS),
+ errmsg("cannot have more than %d columns in statistics",
+ STATS_MAX_DIMENSIONS)));
+
+ /*
+ * Convert the expression list to a simple array of attnums, but also keep
+ * a list of more complex expressions. While at it, enforce some
+ * constraints - we don't allow extended statistics on system attributes,
+ * and we require the data type to have a less-than operator.
+ *
+ * There are many ways to "mask" a simple attribute reference as an
+ * expression, for example "(a+0)" etc. We can't possibly detect all of
+ * them, but we handle at least the simple case with the attribute in
+ * parens. There'll always be a way around this, if the user is determined
+ * (like the "(a+0)" example), but this makes it somewhat consistent with
+ * how indexes treat attributes/expressions.
+ */
+ foreach(cell, stmt->exprs)
+ {
+ StatsElem *selem = lfirst_node(StatsElem, cell);
+
+ if (selem->name) /* column reference */
+ {
+ char *attname;
+ HeapTuple atttuple;
+ Form_pg_attribute attForm;
+ TypeCacheEntry *type;
+
+ attname = selem->name;
+
+ atttuple = SearchSysCacheAttName(relid, attname);
+ if (!HeapTupleIsValid(atttuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" does not exist",
+ attname)));
+ attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
+
+ /* Disallow use of system attributes in extended stats */
+ if (attForm->attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("statistics creation on system columns is not supported")));
+
+ /* Disallow data types without a less-than operator */
+ type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
+ if (type->lt_opr == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
+ attname, format_type_be(attForm->atttypid))));
+
+ attnums[nattnums] = attForm->attnum;
+ nattnums++;
+ ReleaseSysCache(atttuple);
+ }
+ else if (IsA(selem->expr, Var)) /* column reference in parens */
+ {
+ Var *var = (Var *) selem->expr;
+ TypeCacheEntry *type;
+
+ /* Disallow use of system attributes in extended stats */
+ if (var->varattno <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("statistics creation on system columns is not supported")));
+
+ /* Disallow data types without a less-than operator */
+ type = lookup_type_cache(var->vartype, TYPECACHE_LT_OPR);
+ if (type->lt_opr == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
+ get_attname(relid, var->varattno, false), format_type_be(var->vartype))));
+
+ attnums[nattnums] = var->varattno;
+ nattnums++;
+ }
+ else /* expression */
+ {
+ Node *expr = selem->expr;
+ Oid atttype;
+ TypeCacheEntry *type;
+ Bitmapset *attnums = NULL;
+ int k;
+
+ Assert(expr != NULL);
+
+ /* Disallow expressions referencing system attributes. */
+ pull_varattnos(expr, 1, &attnums);
+
+ k = -1;
+ while ((k = bms_next_member(attnums, k)) >= 0)
+ {
+ AttrNumber attnum = k + FirstLowInvalidHeapAttributeNumber;
+
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("statistics creation on system columns is not supported")));
+ }
+
+ /*
+ * Disallow data types without a less-than operator.
+ *
+ * We ignore this for statistics on a single expression, in which
+ * case we'll build the regular statistics only (and that code can
+ * deal with such data types).
+ */
+ if (list_length(stmt->exprs) > 1)
+ {
+ atttype = exprType(expr);
+ type = lookup_type_cache(atttype, TYPECACHE_LT_OPR);
+ if (type->lt_opr == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("expression cannot be used in multivariate statistics because its type %s has no default btree operator class",
+ format_type_be(atttype))));
+ }
+
+ stxexprs = lappend(stxexprs, expr);
+ }
+ }
+
+ /*
+ * Parse the statistics kinds.
+ *
+ * First check that if this is the case with a single expression, there
+ * are no statistics kinds specified (we don't allow that for the simple
+ * CREATE STATISTICS form).
+ */
+ if ((list_length(stmt->exprs) == 1) && (list_length(stxexprs) == 1))
+ {
+ /* statistics kinds not specified */
+ if (list_length(stmt->stat_types) > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("when building statistics on a single expression, statistics kinds may not be specified")));
+ }
+
+ /* OK, let's check that we recognize the statistics kinds. */
+ build_ndistinct = false;
+ build_dependencies = false;
+ build_mcv = false;
+ foreach(cell, stmt->stat_types)
+ {
+ char *type = strVal(lfirst(cell));
+
+ if (strcmp(type, "ndistinct") == 0)
+ {
+ build_ndistinct = true;
+ requested_type = true;
+ }
+ else if (strcmp(type, "dependencies") == 0)
+ {
+ build_dependencies = true;
+ requested_type = true;
+ }
+ else if (strcmp(type, "mcv") == 0)
+ {
+ build_mcv = true;
+ requested_type = true;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized statistics kind \"%s\"",
+ type)));
+ }
+
+ /*
+ * If no statistic type was specified, build them all (but only when the
+ * statistics is defined on more than one column/expression).
+ */
+ if ((!requested_type) && (numcols >= 2))
+ {
+ build_ndistinct = true;
+ build_dependencies = true;
+ build_mcv = true;
+ }
+
+ /*
+ * When there are non-trivial expressions, build the expression stats
+ * automatically. This allows calculating good estimates for stats that
+ * consider per-clause estimates (e.g. functional dependencies).
+ */
+ build_expressions = (list_length(stxexprs) > 0);
+
+ /*
+ * Check that at least two columns were specified in the statement, or
+ * that we're building statistics on a single expression.
+ */
+ if ((numcols < 2) && (list_length(stxexprs) != 1))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("extended statistics require at least 2 columns")));
+
+ /*
+ * Sort the attnums, which makes detecting duplicates somewhat easier, and
+ * it does not hurt (it does not matter for the contents, unlike for
+ * indexes, for example).
+ */
+ qsort(attnums, nattnums, sizeof(int16), compare_int16);
+
+ /*
+ * Check for duplicates in the list of columns. The attnums are sorted so
+ * just check consecutive elements.
+ */
+ for (i = 1; i < nattnums; i++)
+ {
+ if (attnums[i] == attnums[i - 1])
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_COLUMN),
+ errmsg("duplicate column name in statistics definition")));
+ }
+
+ /*
+ * Check for duplicate expressions. We do two loops, counting the
+ * occurrences of each expression. This is O(N^2) but we only allow small
+ * number of expressions and it's not executed often.
+ *
+ * XXX We don't cross-check attributes and expressions, because it does
+ * not seem worth it. In principle we could check that expressions don't
+ * contain trivial attribute references like "(a)", but the reasoning is
+ * similar to why we don't bother with extracting columns from
+ * expressions. It's either expensive or very easy to defeat for
+ * determined user, and there's no risk if we allow such statistics (the
+ * statistics is useless, but harmless).
+ */
+ foreach(cell, stxexprs)
+ {
+ Node *expr1 = (Node *) lfirst(cell);
+ int cnt = 0;
+
+ foreach(cell2, stxexprs)
+ {
+ Node *expr2 = (Node *) lfirst(cell2);
+
+ if (equal(expr1, expr2))
+ cnt += 1;
+ }
+
+ /* every expression should find at least itself */
+ Assert(cnt >= 1);
+
+ if (cnt > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_COLUMN),
+ errmsg("duplicate expression in statistics definition")));
+ }
+
+ /* Form an int2vector representation of the sorted column list */
+ stxkeys = buildint2vector(attnums, nattnums);
+
+ /* construct the char array of enabled statistic types */
+ ntypes = 0;
+ if (build_ndistinct)
+ types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT);
+ if (build_dependencies)
+ types[ntypes++] = CharGetDatum(STATS_EXT_DEPENDENCIES);
+ if (build_mcv)
+ types[ntypes++] = CharGetDatum(STATS_EXT_MCV);
+ if (build_expressions)
+ types[ntypes++] = CharGetDatum(STATS_EXT_EXPRESSIONS);
+ Assert(ntypes > 0 && ntypes <= lengthof(types));
+ stxkind = construct_array(types, ntypes, CHAROID, 1, true, TYPALIGN_CHAR);
+
+ /* convert the expressions (if any) to a text datum */
+ if (stxexprs != NIL)
+ {
+ char *exprsString;
+
+ exprsString = nodeToString(stxexprs);
+ exprsDatum = CStringGetTextDatum(exprsString);
+ pfree(exprsString);
+ }
+ else
+ exprsDatum = (Datum) 0;
+
+ statrel = table_open(StatisticExtRelationId, RowExclusiveLock);
+
+ /*
+ * Everything seems fine, so let's build the pg_statistic_ext tuple.
+ */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ statoid = GetNewOidWithIndex(statrel, StatisticExtOidIndexId,
+ Anum_pg_statistic_ext_oid);
+ values[Anum_pg_statistic_ext_oid - 1] = ObjectIdGetDatum(statoid);
+ values[Anum_pg_statistic_ext_stxrelid - 1] = ObjectIdGetDatum(relid);
+ values[Anum_pg_statistic_ext_stxname - 1] = NameGetDatum(&stxname);
+ values[Anum_pg_statistic_ext_stxnamespace - 1] = ObjectIdGetDatum(namespaceId);
+ values[Anum_pg_statistic_ext_stxstattarget - 1] = Int32GetDatum(-1);
+ values[Anum_pg_statistic_ext_stxowner - 1] = ObjectIdGetDatum(stxowner);
+ values[Anum_pg_statistic_ext_stxkeys - 1] = PointerGetDatum(stxkeys);
+ values[Anum_pg_statistic_ext_stxkind - 1] = PointerGetDatum(stxkind);
+
+ values[Anum_pg_statistic_ext_stxexprs - 1] = exprsDatum;
+ if (exprsDatum == (Datum) 0)
+ nulls[Anum_pg_statistic_ext_stxexprs - 1] = true;
+
+ /* insert it into pg_statistic_ext */
+ htup = heap_form_tuple(statrel->rd_att, values, nulls);
+ CatalogTupleInsert(statrel, htup);
+ heap_freetuple(htup);
+
+ relation_close(statrel, RowExclusiveLock);
+
+ /*
+ * We used to create the pg_statistic_ext_data tuple too, but it's not
+ * clear what value should the stxdinherit flag have (it depends on
+ * whether the rel is partitioned, contains data, etc.)
+ */
+
+ InvokeObjectPostCreateHook(StatisticExtRelationId, statoid, 0);
+
+ /*
+ * Invalidate relcache so that others see the new statistics object.
+ */
+ CacheInvalidateRelcache(rel);
+
+ relation_close(rel, NoLock);
+
+ /*
+ * Add an AUTO dependency on each column used in the stats, so that the
+ * stats object goes away if any or all of them get dropped.
+ */
+ ObjectAddressSet(myself, StatisticExtRelationId, statoid);
+
+ /* add dependencies for plain column references */
+ for (i = 0; i < nattnums; i++)
+ {
+ ObjectAddressSubSet(parentobject, RelationRelationId, relid, attnums[i]);
+ recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
+ }
+
+ /*
+ * If there are no dependencies on a column, give the statistics object an
+ * auto dependency on the whole table. In most cases, this will be
+ * redundant, but it might not be if the statistics expressions contain no
+ * Vars (which might seem strange but possible). This is consistent with
+ * what we do for indexes in index_create.
+ *
+ * XXX We intentionally don't consider the expressions before adding this
+ * dependency, because recordDependencyOnSingleRelExpr may not create any
+ * dependencies for whole-row Vars.
+ */
+ if (!nattnums)
+ {
+ ObjectAddressSet(parentobject, RelationRelationId, relid);
+ recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
+ }
+
+ /*
+ * Store dependencies on anything mentioned in statistics expressions,
+ * just like we do for index expressions.
+ */
+ if (stxexprs)
+ recordDependencyOnSingleRelExpr(&myself,
+ (Node *) stxexprs,
+ relid,
+ DEPENDENCY_NORMAL,
+ DEPENDENCY_AUTO, false);
+
+ /*
+ * Also add dependencies on namespace and owner. These are required
+ * because the stats object might have a different namespace and/or owner
+ * than the underlying table(s).
+ */
+ ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId);
+ recordDependencyOn(&myself, &parentobject, DEPENDENCY_NORMAL);
+
+ recordDependencyOnOwner(StatisticExtRelationId, statoid, stxowner);
+
+ /*
+ * XXX probably there should be a recordDependencyOnCurrentExtension call
+ * here too, but we'd have to add support for ALTER EXTENSION ADD/DROP
+ * STATISTICS, which is more work than it seems worth.
+ */
+
+ /* Add any requested comment */
+ if (stmt->stxcomment != NULL)
+ CreateComments(statoid, StatisticExtRelationId, 0,
+ stmt->stxcomment);
+
+ /* Return stats object's address */
+ return myself;
+}
+
+/*
+ * ALTER STATISTICS
+ */
+ObjectAddress
+AlterStatistics(AlterStatsStmt *stmt)
+{
+ Relation rel;
+ Oid stxoid;
+ HeapTuple oldtup;
+ HeapTuple newtup;
+ Datum repl_val[Natts_pg_statistic_ext];
+ bool repl_null[Natts_pg_statistic_ext];
+ bool repl_repl[Natts_pg_statistic_ext];
+ ObjectAddress address;
+ int newtarget = stmt->stxstattarget;
+
+ /* Limit statistics target to a sane range */
+ if (newtarget < -1)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("statistics target %d is too low",
+ newtarget)));
+ }
+ else if (newtarget > 10000)
+ {
+ newtarget = 10000;
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("lowering statistics target to %d",
+ newtarget)));
+ }
+
+ /* lookup OID of the statistics object */
+ stxoid = get_statistics_object_oid(stmt->defnames, stmt->missing_ok);
+
+ /*
+ * If we got here and the OID is not valid, it means the statistics object
+ * does not exist, but the command specified IF EXISTS. So report this as
+ * a simple NOTICE and we're done.
+ */
+ if (!OidIsValid(stxoid))
+ {
+ char *schemaname;
+ char *statname;
+
+ Assert(stmt->missing_ok);
+
+ DeconstructQualifiedName(stmt->defnames, &schemaname, &statname);
+
+ if (schemaname)
+ ereport(NOTICE,
+ (errmsg("statistics object \"%s.%s\" does not exist, skipping",
+ schemaname, statname)));
+ else
+ ereport(NOTICE,
+ (errmsg("statistics object \"%s\" does not exist, skipping",
+ statname)));
+
+ return InvalidObjectAddress;
+ }
+
+ /* Search pg_statistic_ext */
+ rel = table_open(StatisticExtRelationId, RowExclusiveLock);
+
+ oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(stxoid));
+ if (!HeapTupleIsValid(oldtup))
+ elog(ERROR, "cache lookup failed for extended statistics object %u", stxoid);
+
+ /* Must be owner of the existing statistics object */
+ if (!pg_statistics_object_ownercheck(stxoid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_STATISTIC_EXT,
+ NameListToString(stmt->defnames));
+
+ /* Build new tuple. */
+ memset(repl_val, 0, sizeof(repl_val));
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ /* replace the stxstattarget column */
+ repl_repl[Anum_pg_statistic_ext_stxstattarget - 1] = true;
+ repl_val[Anum_pg_statistic_ext_stxstattarget - 1] = Int32GetDatum(newtarget);
+
+ newtup = heap_modify_tuple(oldtup, RelationGetDescr(rel),
+ repl_val, repl_null, repl_repl);
+
+ /* Update system catalog. */
+ CatalogTupleUpdate(rel, &newtup->t_self, newtup);
+
+ InvokeObjectPostAlterHook(StatisticExtRelationId, stxoid, 0);
+
+ ObjectAddressSet(address, StatisticExtRelationId, stxoid);
+
+ /*
+ * NOTE: because we only support altering the statistics target, not the
+ * other fields, there is no need to update dependencies.
+ */
+
+ heap_freetuple(newtup);
+ ReleaseSysCache(oldtup);
+
+ table_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * Delete entry in pg_statistic_ext_data catalog. We don't know if the row
+ * exists, so don't error out.
+ */
+void
+RemoveStatisticsDataById(Oid statsOid, bool inh)
+{
+ Relation relation;
+ HeapTuple tup;
+
+ relation = table_open(StatisticExtDataRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache2(STATEXTDATASTXOID, ObjectIdGetDatum(statsOid),
+ BoolGetDatum(inh));
+
+ /* We don't know if the data row for inh value exists. */
+ if (HeapTupleIsValid(tup))
+ {
+ CatalogTupleDelete(relation, &tup->t_self);
+
+ ReleaseSysCache(tup);
+ }
+
+ table_close(relation, RowExclusiveLock);
+}
+
+/*
+ * Guts of statistics object deletion.
+ */
+void
+RemoveStatisticsById(Oid statsOid)
+{
+ Relation relation;
+ HeapTuple tup;
+ Form_pg_statistic_ext statext;
+ Oid relid;
+
+ /*
+ * First delete the pg_statistic_ext_data tuples holding the actual
+ * statistical data. There might be data with/without inheritance, so
+ * attempt deleting both.
+ */
+ RemoveStatisticsDataById(statsOid, true);
+ RemoveStatisticsDataById(statsOid, false);
+
+ /*
+ * Delete the pg_statistic_ext tuple. Also send out a cache inval on the
+ * associated table, so that dependent plans will be rebuilt.
+ */
+ relation = table_open(StatisticExtRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid));
+
+ if (!HeapTupleIsValid(tup)) /* should not happen */
+ elog(ERROR, "cache lookup failed for statistics object %u", statsOid);
+
+ statext = (Form_pg_statistic_ext) GETSTRUCT(tup);
+ relid = statext->stxrelid;
+
+ CacheInvalidateRelcacheByRelid(relid);
+
+ CatalogTupleDelete(relation, &tup->t_self);
+
+ ReleaseSysCache(tup);
+
+ table_close(relation, RowExclusiveLock);
+}
+
+/*
+ * Select a nonconflicting name for a new statistics object.
+ *
+ * name1, name2, and label are used the same way as for makeObjectName(),
+ * except that the label can't be NULL; digits will be appended to the label
+ * if needed to create a name that is unique within the specified namespace.
+ *
+ * Returns a palloc'd string.
+ *
+ * Note: it is theoretically possible to get a collision anyway, if someone
+ * else chooses the same name concurrently. This is fairly unlikely to be
+ * a problem in practice, especially if one is holding a share update
+ * exclusive lock on the relation identified by name1. However, if choosing
+ * multiple names within a single command, you'd better create the new object
+ * and do CommandCounterIncrement before choosing the next one!
+ */
+static char *
+ChooseExtendedStatisticName(const char *name1, const char *name2,
+ const char *label, Oid namespaceid)
+{
+ int pass = 0;
+ char *stxname = NULL;
+ char modlabel[NAMEDATALEN];
+
+ /* try the unmodified label first */
+ strlcpy(modlabel, label, sizeof(modlabel));
+
+ for (;;)
+ {
+ Oid existingstats;
+
+ stxname = makeObjectName(name1, name2, modlabel);
+
+ existingstats = GetSysCacheOid2(STATEXTNAMENSP, Anum_pg_statistic_ext_oid,
+ PointerGetDatum(stxname),
+ ObjectIdGetDatum(namespaceid));
+ if (!OidIsValid(existingstats))
+ break;
+
+ /* found a conflict, so try a new name component */
+ pfree(stxname);
+ snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
+ }
+
+ return stxname;
+}
+
+/*
+ * Generate "name2" for a new statistics object given the list of column
+ * names for it. This will be passed to ChooseExtendedStatisticName along
+ * with the parent table name and a suitable label.
+ *
+ * We know that less than NAMEDATALEN characters will actually be used,
+ * so we can truncate the result once we've generated that many.
+ *
+ * XXX see also ChooseForeignKeyConstraintNameAddition and
+ * ChooseIndexNameAddition.
+ */
+static char *
+ChooseExtendedStatisticNameAddition(List *exprs)
+{
+ char buf[NAMEDATALEN * 2];
+ int buflen = 0;
+ ListCell *lc;
+
+ buf[0] = '\0';
+ foreach(lc, exprs)
+ {
+ StatsElem *selem = (StatsElem *) lfirst(lc);
+ const char *name;
+
+ /* It should be one of these, but just skip if it happens not to be */
+ if (!IsA(selem, StatsElem))
+ continue;
+
+ name = selem->name;
+
+ if (buflen > 0)
+ buf[buflen++] = '_'; /* insert _ between names */
+
+ /*
+ * We use fixed 'expr' for expressions, which have empty column names.
+ * For indexes this is handled in ChooseIndexColumnNames, but we have
+ * no such function for stats and it does not seem worth adding. If a
+ * better name is needed, the user can specify it explicitly.
+ */
+ if (!name)
+ name = "expr";
+
+ /*
+ * At this point we have buflen <= NAMEDATALEN. name should be less
+ * than NAMEDATALEN already, but use strlcpy for paranoia.
+ */
+ strlcpy(buf + buflen, name, NAMEDATALEN);
+ buflen += strlen(buf + buflen);
+ if (buflen >= NAMEDATALEN)
+ break;
+ }
+ return pstrdup(buf);
+}
+
+/*
+ * StatisticsGetRelation: given a statistics object's OID, get the OID of
+ * the relation it is defined on. Uses the system cache.
+ */
+Oid
+StatisticsGetRelation(Oid statId, bool missing_ok)
+{
+ HeapTuple tuple;
+ Form_pg_statistic_ext stx;
+ Oid result;
+
+ tuple = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statId));
+ if (!HeapTupleIsValid(tuple))
+ {
+ if (missing_ok)
+ return InvalidOid;
+ elog(ERROR, "cache lookup failed for statistics object %u", statId);
+ }
+ stx = (Form_pg_statistic_ext) GETSTRUCT(tuple);
+ Assert(stx->oid == statId);
+
+ result = stx->stxrelid;
+ ReleaseSysCache(tuple);
+ return result;
+}
diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
new file mode 100644
index 0000000..334717c
--- /dev/null
+++ b/src/backend/commands/subscriptioncmds.c
@@ -0,0 +1,1966 @@
+/*-------------------------------------------------------------------------
+ *
+ * subscriptioncmds.c
+ * subscription catalog manipulation functions
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/commands/subscriptioncmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/objectaddress.h"
+#include "catalog/pg_subscription.h"
+#include "catalog/pg_subscription_rel.h"
+#include "catalog/pg_type.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/subscriptioncmds.h"
+#include "executor/executor.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "pgstat.h"
+#include "replication/logicallauncher.h"
+#include "replication/origin.h"
+#include "replication/slot.h"
+#include "replication/walreceiver.h"
+#include "replication/walsender.h"
+#include "replication/worker_internal.h"
+#include "storage/lmgr.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/guc.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/pg_lsn.h"
+#include "utils/syscache.h"
+
+/*
+ * Options that can be specified by the user in CREATE/ALTER SUBSCRIPTION
+ * command.
+ */
+#define SUBOPT_CONNECT 0x00000001
+#define SUBOPT_ENABLED 0x00000002
+#define SUBOPT_CREATE_SLOT 0x00000004
+#define SUBOPT_SLOT_NAME 0x00000008
+#define SUBOPT_COPY_DATA 0x00000010
+#define SUBOPT_SYNCHRONOUS_COMMIT 0x00000020
+#define SUBOPT_REFRESH 0x00000040
+#define SUBOPT_BINARY 0x00000080
+#define SUBOPT_STREAMING 0x00000100
+#define SUBOPT_TWOPHASE_COMMIT 0x00000200
+#define SUBOPT_DISABLE_ON_ERR 0x00000400
+#define SUBOPT_LSN 0x00000800
+
+/* check if the 'val' has 'bits' set */
+#define IsSet(val, bits) (((val) & (bits)) == (bits))
+
+/*
+ * Structure to hold a bitmap representing the user-provided CREATE/ALTER
+ * SUBSCRIPTION command options and the parsed/default values of each of them.
+ */
+typedef struct SubOpts
+{
+ bits32 specified_opts;
+ char *slot_name;
+ char *synchronous_commit;
+ bool connect;
+ bool enabled;
+ bool create_slot;
+ bool copy_data;
+ bool refresh;
+ bool binary;
+ bool streaming;
+ bool twophase;
+ bool disableonerr;
+ XLogRecPtr lsn;
+} SubOpts;
+
+static List *fetch_table_list(WalReceiverConn *wrconn, List *publications);
+static void check_duplicates_in_publist(List *publist, Datum *datums);
+static List *merge_publications(List *oldpublist, List *newpublist, bool addpub, const char *subname);
+static void ReportSlotConnectionError(List *rstates, Oid subid, char *slotname, char *err);
+
+
+/*
+ * Common option parsing function for CREATE and ALTER SUBSCRIPTION commands.
+ *
+ * Since not all options can be specified in both commands, this function
+ * will report an error if mutually exclusive options are specified.
+ */
+static void
+parse_subscription_options(ParseState *pstate, List *stmt_options,
+ bits32 supported_opts, SubOpts *opts)
+{
+ ListCell *lc;
+
+ /* Start out with cleared opts. */
+ memset(opts, 0, sizeof(SubOpts));
+
+ /* caller must expect some option */
+ Assert(supported_opts != 0);
+
+ /* If connect option is supported, these others also need to be. */
+ Assert(!IsSet(supported_opts, SUBOPT_CONNECT) ||
+ IsSet(supported_opts, SUBOPT_ENABLED | SUBOPT_CREATE_SLOT |
+ SUBOPT_COPY_DATA));
+
+ /* Set default values for the boolean supported options. */
+ if (IsSet(supported_opts, SUBOPT_CONNECT))
+ opts->connect = true;
+ if (IsSet(supported_opts, SUBOPT_ENABLED))
+ opts->enabled = true;
+ if (IsSet(supported_opts, SUBOPT_CREATE_SLOT))
+ opts->create_slot = true;
+ if (IsSet(supported_opts, SUBOPT_COPY_DATA))
+ opts->copy_data = true;
+ if (IsSet(supported_opts, SUBOPT_REFRESH))
+ opts->refresh = true;
+ if (IsSet(supported_opts, SUBOPT_BINARY))
+ opts->binary = false;
+ if (IsSet(supported_opts, SUBOPT_STREAMING))
+ opts->streaming = false;
+ if (IsSet(supported_opts, SUBOPT_TWOPHASE_COMMIT))
+ opts->twophase = false;
+ if (IsSet(supported_opts, SUBOPT_DISABLE_ON_ERR))
+ opts->disableonerr = false;
+
+ /* Parse options */
+ foreach(lc, stmt_options)
+ {
+ DefElem *defel = (DefElem *) lfirst(lc);
+
+ if (IsSet(supported_opts, SUBOPT_CONNECT) &&
+ strcmp(defel->defname, "connect") == 0)
+ {
+ if (IsSet(opts->specified_opts, SUBOPT_CONNECT))
+ errorConflictingDefElem(defel, pstate);
+
+ opts->specified_opts |= SUBOPT_CONNECT;
+ opts->connect = defGetBoolean(defel);
+ }
+ else if (IsSet(supported_opts, SUBOPT_ENABLED) &&
+ strcmp(defel->defname, "enabled") == 0)
+ {
+ if (IsSet(opts->specified_opts, SUBOPT_ENABLED))
+ errorConflictingDefElem(defel, pstate);
+
+ opts->specified_opts |= SUBOPT_ENABLED;
+ opts->enabled = defGetBoolean(defel);
+ }
+ else if (IsSet(supported_opts, SUBOPT_CREATE_SLOT) &&
+ strcmp(defel->defname, "create_slot") == 0)
+ {
+ if (IsSet(opts->specified_opts, SUBOPT_CREATE_SLOT))
+ errorConflictingDefElem(defel, pstate);
+
+ opts->specified_opts |= SUBOPT_CREATE_SLOT;
+ opts->create_slot = defGetBoolean(defel);
+ }
+ else if (IsSet(supported_opts, SUBOPT_SLOT_NAME) &&
+ strcmp(defel->defname, "slot_name") == 0)
+ {
+ if (IsSet(opts->specified_opts, SUBOPT_SLOT_NAME))
+ errorConflictingDefElem(defel, pstate);
+
+ opts->specified_opts |= SUBOPT_SLOT_NAME;
+ opts->slot_name = defGetString(defel);
+
+ /* Setting slot_name = NONE is treated as no slot name. */
+ if (strcmp(opts->slot_name, "none") == 0)
+ opts->slot_name = NULL;
+ else
+ ReplicationSlotValidateName(opts->slot_name, ERROR);
+ }
+ else if (IsSet(supported_opts, SUBOPT_COPY_DATA) &&
+ strcmp(defel->defname, "copy_data") == 0)
+ {
+ if (IsSet(opts->specified_opts, SUBOPT_COPY_DATA))
+ errorConflictingDefElem(defel, pstate);
+
+ opts->specified_opts |= SUBOPT_COPY_DATA;
+ opts->copy_data = defGetBoolean(defel);
+ }
+ else if (IsSet(supported_opts, SUBOPT_SYNCHRONOUS_COMMIT) &&
+ strcmp(defel->defname, "synchronous_commit") == 0)
+ {
+ if (IsSet(opts->specified_opts, SUBOPT_SYNCHRONOUS_COMMIT))
+ errorConflictingDefElem(defel, pstate);
+
+ opts->specified_opts |= SUBOPT_SYNCHRONOUS_COMMIT;
+ opts->synchronous_commit = defGetString(defel);
+
+ /* Test if the given value is valid for synchronous_commit GUC. */
+ (void) set_config_option("synchronous_commit", opts->synchronous_commit,
+ PGC_BACKEND, PGC_S_TEST, GUC_ACTION_SET,
+ false, 0, false);
+ }
+ else if (IsSet(supported_opts, SUBOPT_REFRESH) &&
+ strcmp(defel->defname, "refresh") == 0)
+ {
+ if (IsSet(opts->specified_opts, SUBOPT_REFRESH))
+ errorConflictingDefElem(defel, pstate);
+
+ opts->specified_opts |= SUBOPT_REFRESH;
+ opts->refresh = defGetBoolean(defel);
+ }
+ else if (IsSet(supported_opts, SUBOPT_BINARY) &&
+ strcmp(defel->defname, "binary") == 0)
+ {
+ if (IsSet(opts->specified_opts, SUBOPT_BINARY))
+ errorConflictingDefElem(defel, pstate);
+
+ opts->specified_opts |= SUBOPT_BINARY;
+ opts->binary = defGetBoolean(defel);
+ }
+ else if (IsSet(supported_opts, SUBOPT_STREAMING) &&
+ strcmp(defel->defname, "streaming") == 0)
+ {
+ if (IsSet(opts->specified_opts, SUBOPT_STREAMING))
+ errorConflictingDefElem(defel, pstate);
+
+ opts->specified_opts |= SUBOPT_STREAMING;
+ opts->streaming = defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "two_phase") == 0)
+ {
+ /*
+ * Do not allow toggling of two_phase option. Doing so could cause
+ * missing of transactions and lead to an inconsistent replica.
+ * See comments atop worker.c
+ *
+ * Note: Unsupported twophase indicates that this call originated
+ * from AlterSubscription.
+ */
+ if (!IsSet(supported_opts, SUBOPT_TWOPHASE_COMMIT))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized subscription parameter: \"%s\"", defel->defname)));
+
+ if (IsSet(opts->specified_opts, SUBOPT_TWOPHASE_COMMIT))
+ errorConflictingDefElem(defel, pstate);
+
+ opts->specified_opts |= SUBOPT_TWOPHASE_COMMIT;
+ opts->twophase = defGetBoolean(defel);
+ }
+ else if (IsSet(supported_opts, SUBOPT_DISABLE_ON_ERR) &&
+ strcmp(defel->defname, "disable_on_error") == 0)
+ {
+ if (IsSet(opts->specified_opts, SUBOPT_DISABLE_ON_ERR))
+ errorConflictingDefElem(defel, pstate);
+
+ opts->specified_opts |= SUBOPT_DISABLE_ON_ERR;
+ opts->disableonerr = defGetBoolean(defel);
+ }
+ else if (IsSet(supported_opts, SUBOPT_LSN) &&
+ strcmp(defel->defname, "lsn") == 0)
+ {
+ char *lsn_str = defGetString(defel);
+ XLogRecPtr lsn;
+
+ if (IsSet(opts->specified_opts, SUBOPT_LSN))
+ errorConflictingDefElem(defel, pstate);
+
+ /* Setting lsn = NONE is treated as resetting LSN */
+ if (strcmp(lsn_str, "none") == 0)
+ lsn = InvalidXLogRecPtr;
+ else
+ {
+ /* Parse the argument as LSN */
+ lsn = DatumGetLSN(DirectFunctionCall1(pg_lsn_in,
+ CStringGetDatum(lsn_str)));
+
+ if (XLogRecPtrIsInvalid(lsn))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid WAL location (LSN): %s", lsn_str)));
+ }
+
+ opts->specified_opts |= SUBOPT_LSN;
+ opts->lsn = lsn;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized subscription parameter: \"%s\"", defel->defname)));
+ }
+
+ /*
+ * We've been explicitly asked to not connect, that requires some
+ * additional processing.
+ */
+ if (!opts->connect && IsSet(supported_opts, SUBOPT_CONNECT))
+ {
+ /* Check for incompatible options from the user. */
+ if (opts->enabled &&
+ IsSet(opts->specified_opts, SUBOPT_ENABLED))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ /*- translator: both %s are strings of the form "option = value" */
+ errmsg("%s and %s are mutually exclusive options",
+ "connect = false", "enabled = true")));
+
+ if (opts->create_slot &&
+ IsSet(opts->specified_opts, SUBOPT_CREATE_SLOT))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s and %s are mutually exclusive options",
+ "connect = false", "create_slot = true")));
+
+ if (opts->copy_data &&
+ IsSet(opts->specified_opts, SUBOPT_COPY_DATA))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s and %s are mutually exclusive options",
+ "connect = false", "copy_data = true")));
+
+ /* Change the defaults of other options. */
+ opts->enabled = false;
+ opts->create_slot = false;
+ opts->copy_data = false;
+ }
+
+ /*
+ * Do additional checking for disallowed combination when slot_name = NONE
+ * was used.
+ */
+ if (!opts->slot_name &&
+ IsSet(opts->specified_opts, SUBOPT_SLOT_NAME))
+ {
+ if (opts->enabled)
+ {
+ if (IsSet(opts->specified_opts, SUBOPT_ENABLED))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ /*- translator: both %s are strings of the form "option = value" */
+ errmsg("%s and %s are mutually exclusive options",
+ "slot_name = NONE", "enabled = true")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ /*- translator: both %s are strings of the form "option = value" */
+ errmsg("subscription with %s must also set %s",
+ "slot_name = NONE", "enabled = false")));
+ }
+
+ if (opts->create_slot)
+ {
+ if (IsSet(opts->specified_opts, SUBOPT_CREATE_SLOT))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ /*- translator: both %s are strings of the form "option = value" */
+ errmsg("%s and %s are mutually exclusive options",
+ "slot_name = NONE", "create_slot = true")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ /*- translator: both %s are strings of the form "option = value" */
+ errmsg("subscription with %s must also set %s",
+ "slot_name = NONE", "create_slot = false")));
+ }
+ }
+}
+
+/*
+ * Add publication names from the list to a string.
+ */
+static void
+get_publications_str(List *publications, StringInfo dest, bool quote_literal)
+{
+ ListCell *lc;
+ bool first = true;
+
+ Assert(list_length(publications) > 0);
+
+ foreach(lc, publications)
+ {
+ char *pubname = strVal(lfirst(lc));
+
+ if (first)
+ first = false;
+ else
+ appendStringInfoString(dest, ", ");
+
+ if (quote_literal)
+ appendStringInfoString(dest, quote_literal_cstr(pubname));
+ else
+ {
+ appendStringInfoChar(dest, '"');
+ appendStringInfoString(dest, pubname);
+ appendStringInfoChar(dest, '"');
+ }
+ }
+}
+
+/*
+ * Check that the specified publications are present on the publisher.
+ */
+static void
+check_publications(WalReceiverConn *wrconn, List *publications)
+{
+ WalRcvExecResult *res;
+ StringInfo cmd;
+ TupleTableSlot *slot;
+ List *publicationsCopy = NIL;
+ Oid tableRow[1] = {TEXTOID};
+
+ cmd = makeStringInfo();
+ appendStringInfoString(cmd, "SELECT t.pubname FROM\n"
+ " pg_catalog.pg_publication t WHERE\n"
+ " t.pubname IN (");
+ get_publications_str(publications, cmd, true);
+ appendStringInfoChar(cmd, ')');
+
+ res = walrcv_exec(wrconn, cmd->data, 1, tableRow);
+ pfree(cmd->data);
+ pfree(cmd);
+
+ if (res->status != WALRCV_OK_TUPLES)
+ ereport(ERROR,
+ errmsg("could not receive list of publications from the publisher: %s",
+ res->err));
+
+ publicationsCopy = list_copy(publications);
+
+ /* Process publication(s). */
+ slot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple);
+ while (tuplestore_gettupleslot(res->tuplestore, true, false, slot))
+ {
+ char *pubname;
+ bool isnull;
+
+ pubname = TextDatumGetCString(slot_getattr(slot, 1, &isnull));
+ Assert(!isnull);
+
+ /* Delete the publication present in publisher from the list. */
+ publicationsCopy = list_delete(publicationsCopy, makeString(pubname));
+ ExecClearTuple(slot);
+ }
+
+ ExecDropSingleTupleTableSlot(slot);
+
+ walrcv_clear_result(res);
+
+ if (list_length(publicationsCopy))
+ {
+ /* Prepare the list of non-existent publication(s) for error message. */
+ StringInfo pubnames = makeStringInfo();
+
+ get_publications_str(publicationsCopy, pubnames, false);
+ ereport(WARNING,
+ errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg_plural("publication %s does not exist on the publisher",
+ "publications %s do not exist on the publisher",
+ list_length(publicationsCopy),
+ pubnames->data));
+ }
+}
+
+/*
+ * Auxiliary function to build a text array out of a list of String nodes.
+ */
+static Datum
+publicationListToArray(List *publist)
+{
+ ArrayType *arr;
+ Datum *datums;
+ MemoryContext memcxt;
+ MemoryContext oldcxt;
+
+ /* Create memory context for temporary allocations. */
+ memcxt = AllocSetContextCreate(CurrentMemoryContext,
+ "publicationListToArray to array",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcxt = MemoryContextSwitchTo(memcxt);
+
+ datums = (Datum *) palloc(sizeof(Datum) * list_length(publist));
+
+ check_duplicates_in_publist(publist, datums);
+
+ MemoryContextSwitchTo(oldcxt);
+
+ arr = construct_array(datums, list_length(publist),
+ TEXTOID, -1, false, TYPALIGN_INT);
+
+ MemoryContextDelete(memcxt);
+
+ return PointerGetDatum(arr);
+}
+
+/*
+ * Create new subscription.
+ */
+ObjectAddress
+CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt,
+ bool isTopLevel)
+{
+ Relation rel;
+ ObjectAddress myself;
+ Oid subid;
+ bool nulls[Natts_pg_subscription];
+ Datum values[Natts_pg_subscription];
+ Oid owner = GetUserId();
+ HeapTuple tup;
+ char *conninfo;
+ char originname[NAMEDATALEN];
+ List *publications;
+ bits32 supported_opts;
+ SubOpts opts = {0};
+
+ /*
+ * Parse and check options.
+ *
+ * Connection and publication should not be specified here.
+ */
+ supported_opts = (SUBOPT_CONNECT | SUBOPT_ENABLED | SUBOPT_CREATE_SLOT |
+ SUBOPT_SLOT_NAME | SUBOPT_COPY_DATA |
+ SUBOPT_SYNCHRONOUS_COMMIT | SUBOPT_BINARY |
+ SUBOPT_STREAMING | SUBOPT_TWOPHASE_COMMIT |
+ SUBOPT_DISABLE_ON_ERR);
+ parse_subscription_options(pstate, stmt->options, supported_opts, &opts);
+
+ /*
+ * Since creating a replication slot is not transactional, rolling back
+ * the transaction leaves the created replication slot. So we cannot run
+ * CREATE SUBSCRIPTION inside a transaction block if creating a
+ * replication slot.
+ */
+ if (opts.create_slot)
+ PreventInTransactionBlock(isTopLevel, "CREATE SUBSCRIPTION ... WITH (create_slot = true)");
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to create subscriptions")));
+
+ /*
+ * If built with appropriate switch, whine when regression-testing
+ * conventions for subscription names are violated.
+ */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+ if (strncmp(stmt->subname, "regress_", 8) != 0)
+ elog(WARNING, "subscriptions created by regression test cases should have names starting with \"regress_\"");
+#endif
+
+ rel = table_open(SubscriptionRelationId, RowExclusiveLock);
+
+ /* Check if name is used */
+ subid = GetSysCacheOid2(SUBSCRIPTIONNAME, Anum_pg_subscription_oid,
+ MyDatabaseId, CStringGetDatum(stmt->subname));
+ if (OidIsValid(subid))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("subscription \"%s\" already exists",
+ stmt->subname)));
+ }
+
+ if (!IsSet(opts.specified_opts, SUBOPT_SLOT_NAME) &&
+ opts.slot_name == NULL)
+ opts.slot_name = stmt->subname;
+
+ /* The default for synchronous_commit of subscriptions is off. */
+ if (opts.synchronous_commit == NULL)
+ opts.synchronous_commit = "off";
+
+ conninfo = stmt->conninfo;
+ publications = stmt->publication;
+
+ /* Load the library providing us libpq calls. */
+ load_file("libpqwalreceiver", false);
+
+ /* Check the connection info string. */
+ walrcv_check_conninfo(conninfo);
+
+ /* Everything ok, form a new tuple. */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ subid = GetNewOidWithIndex(rel, SubscriptionObjectIndexId,
+ Anum_pg_subscription_oid);
+ values[Anum_pg_subscription_oid - 1] = ObjectIdGetDatum(subid);
+ values[Anum_pg_subscription_subdbid - 1] = ObjectIdGetDatum(MyDatabaseId);
+ values[Anum_pg_subscription_subskiplsn - 1] = LSNGetDatum(InvalidXLogRecPtr);
+ values[Anum_pg_subscription_subname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(stmt->subname));
+ values[Anum_pg_subscription_subowner - 1] = ObjectIdGetDatum(owner);
+ values[Anum_pg_subscription_subenabled - 1] = BoolGetDatum(opts.enabled);
+ values[Anum_pg_subscription_subbinary - 1] = BoolGetDatum(opts.binary);
+ values[Anum_pg_subscription_substream - 1] = BoolGetDatum(opts.streaming);
+ values[Anum_pg_subscription_subtwophasestate - 1] =
+ CharGetDatum(opts.twophase ?
+ LOGICALREP_TWOPHASE_STATE_PENDING :
+ LOGICALREP_TWOPHASE_STATE_DISABLED);
+ values[Anum_pg_subscription_subdisableonerr - 1] = BoolGetDatum(opts.disableonerr);
+ values[Anum_pg_subscription_subconninfo - 1] =
+ CStringGetTextDatum(conninfo);
+ if (opts.slot_name)
+ values[Anum_pg_subscription_subslotname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(opts.slot_name));
+ else
+ nulls[Anum_pg_subscription_subslotname - 1] = true;
+ values[Anum_pg_subscription_subsynccommit - 1] =
+ CStringGetTextDatum(opts.synchronous_commit);
+ values[Anum_pg_subscription_subpublications - 1] =
+ publicationListToArray(publications);
+
+ tup = heap_form_tuple(RelationGetDescr(rel), values, nulls);
+
+ /* Insert tuple into catalog. */
+ CatalogTupleInsert(rel, tup);
+ heap_freetuple(tup);
+
+ recordDependencyOnOwner(SubscriptionRelationId, subid, owner);
+
+ snprintf(originname, sizeof(originname), "pg_%u", subid);
+ replorigin_create(originname);
+
+ /*
+ * Connect to remote side to execute requested commands and fetch table
+ * info.
+ */
+ if (opts.connect)
+ {
+ char *err;
+ WalReceiverConn *wrconn;
+ List *tables;
+ ListCell *lc;
+ char table_state;
+
+ /* Try to connect to the publisher. */
+ wrconn = walrcv_connect(conninfo, true, stmt->subname, &err);
+ if (!wrconn)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONNECTION_FAILURE),
+ errmsg("could not connect to the publisher: %s", err)));
+
+ PG_TRY();
+ {
+ check_publications(wrconn, publications);
+
+ /*
+ * Set sync state based on if we were asked to do data copy or
+ * not.
+ */
+ table_state = opts.copy_data ? SUBREL_STATE_INIT : SUBREL_STATE_READY;
+
+ /*
+ * Get the table list from publisher and build local table status
+ * info.
+ */
+ tables = fetch_table_list(wrconn, publications);
+ foreach(lc, tables)
+ {
+ RangeVar *rv = (RangeVar *) lfirst(lc);
+ Oid relid;
+
+ relid = RangeVarGetRelid(rv, AccessShareLock, false);
+
+ /* Check for supported relkind. */
+ CheckSubscriptionRelkind(get_rel_relkind(relid),
+ rv->schemaname, rv->relname);
+
+ AddSubscriptionRelState(subid, relid, table_state,
+ InvalidXLogRecPtr);
+ }
+
+ /*
+ * If requested, create permanent slot for the subscription. We
+ * won't use the initial snapshot for anything, so no need to
+ * export it.
+ */
+ if (opts.create_slot)
+ {
+ bool twophase_enabled = false;
+
+ Assert(opts.slot_name);
+
+ /*
+ * Even if two_phase is set, don't create the slot with
+ * two-phase enabled. Will enable it once all the tables are
+ * synced and ready. This avoids race-conditions like prepared
+ * transactions being skipped due to changes not being applied
+ * due to checks in should_apply_changes_for_rel() when
+ * tablesync for the corresponding tables are in progress. See
+ * comments atop worker.c.
+ *
+ * Note that if tables were specified but copy_data is false
+ * then it is safe to enable two_phase up-front because those
+ * tables are already initially in READY state. When the
+ * subscription has no tables, we leave the twophase state as
+ * PENDING, to allow ALTER SUBSCRIPTION ... REFRESH
+ * PUBLICATION to work.
+ */
+ if (opts.twophase && !opts.copy_data && tables != NIL)
+ twophase_enabled = true;
+
+ walrcv_create_slot(wrconn, opts.slot_name, false, twophase_enabled,
+ CRS_NOEXPORT_SNAPSHOT, NULL);
+
+ if (twophase_enabled)
+ UpdateTwoPhaseState(subid, LOGICALREP_TWOPHASE_STATE_ENABLED);
+
+ ereport(NOTICE,
+ (errmsg("created replication slot \"%s\" on publisher",
+ opts.slot_name)));
+ }
+ }
+ PG_FINALLY();
+ {
+ walrcv_disconnect(wrconn);
+ }
+ PG_END_TRY();
+ }
+ else
+ ereport(WARNING,
+ /* translator: %s is an SQL ALTER statement */
+ (errmsg("tables were not subscribed, you will have to run %s to subscribe the tables",
+ "ALTER SUBSCRIPTION ... REFRESH PUBLICATION")));
+
+ table_close(rel, RowExclusiveLock);
+
+ pgstat_create_subscription(subid);
+
+ if (opts.enabled)
+ ApplyLauncherWakeupAtCommit();
+
+ ObjectAddressSet(myself, SubscriptionRelationId, subid);
+
+ InvokeObjectPostCreateHook(SubscriptionRelationId, subid, 0);
+
+ return myself;
+}
+
+static void
+AlterSubscription_refresh(Subscription *sub, bool copy_data,
+ List *validate_publications)
+{
+ char *err;
+ List *pubrel_names;
+ List *subrel_states;
+ Oid *subrel_local_oids;
+ Oid *pubrel_local_oids;
+ ListCell *lc;
+ int off;
+ int remove_rel_len;
+ Relation rel = NULL;
+ typedef struct SubRemoveRels
+ {
+ Oid relid;
+ char state;
+ } SubRemoveRels;
+ SubRemoveRels *sub_remove_rels;
+ WalReceiverConn *wrconn;
+
+ /* Load the library providing us libpq calls. */
+ load_file("libpqwalreceiver", false);
+
+ /* Try to connect to the publisher. */
+ wrconn = walrcv_connect(sub->conninfo, true, sub->name, &err);
+ if (!wrconn)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONNECTION_FAILURE),
+ errmsg("could not connect to the publisher: %s", err)));
+
+ PG_TRY();
+ {
+ if (validate_publications)
+ check_publications(wrconn, validate_publications);
+
+ /* Get the table list from publisher. */
+ pubrel_names = fetch_table_list(wrconn, sub->publications);
+
+ /* Get local table list. */
+ subrel_states = GetSubscriptionRelations(sub->oid);
+
+ /*
+ * Build qsorted array of local table oids for faster lookup. This can
+ * potentially contain all tables in the database so speed of lookup
+ * is important.
+ */
+ subrel_local_oids = palloc(list_length(subrel_states) * sizeof(Oid));
+ off = 0;
+ foreach(lc, subrel_states)
+ {
+ SubscriptionRelState *relstate = (SubscriptionRelState *) lfirst(lc);
+
+ subrel_local_oids[off++] = relstate->relid;
+ }
+ qsort(subrel_local_oids, list_length(subrel_states),
+ sizeof(Oid), oid_cmp);
+
+ /*
+ * Rels that we want to remove from subscription and drop any slots
+ * and origins corresponding to them.
+ */
+ sub_remove_rels = palloc(list_length(subrel_states) * sizeof(SubRemoveRels));
+
+ /*
+ * Walk over the remote tables and try to match them to locally known
+ * tables. If the table is not known locally create a new state for
+ * it.
+ *
+ * Also builds array of local oids of remote tables for the next step.
+ */
+ off = 0;
+ pubrel_local_oids = palloc(list_length(pubrel_names) * sizeof(Oid));
+
+ foreach(lc, pubrel_names)
+ {
+ RangeVar *rv = (RangeVar *) lfirst(lc);
+ Oid relid;
+
+ relid = RangeVarGetRelid(rv, AccessShareLock, false);
+
+ /* Check for supported relkind. */
+ CheckSubscriptionRelkind(get_rel_relkind(relid),
+ rv->schemaname, rv->relname);
+
+ pubrel_local_oids[off++] = relid;
+
+ if (!bsearch(&relid, subrel_local_oids,
+ list_length(subrel_states), sizeof(Oid), oid_cmp))
+ {
+ AddSubscriptionRelState(sub->oid, relid,
+ copy_data ? SUBREL_STATE_INIT : SUBREL_STATE_READY,
+ InvalidXLogRecPtr);
+ ereport(DEBUG1,
+ (errmsg_internal("table \"%s.%s\" added to subscription \"%s\"",
+ rv->schemaname, rv->relname, sub->name)));
+ }
+ }
+
+ /*
+ * Next remove state for tables we should not care about anymore using
+ * the data we collected above
+ */
+ qsort(pubrel_local_oids, list_length(pubrel_names),
+ sizeof(Oid), oid_cmp);
+
+ remove_rel_len = 0;
+ for (off = 0; off < list_length(subrel_states); off++)
+ {
+ Oid relid = subrel_local_oids[off];
+
+ if (!bsearch(&relid, pubrel_local_oids,
+ list_length(pubrel_names), sizeof(Oid), oid_cmp))
+ {
+ char state;
+ XLogRecPtr statelsn;
+
+ /*
+ * Lock pg_subscription_rel with AccessExclusiveLock to
+ * prevent any race conditions with the apply worker
+ * re-launching workers at the same time this code is trying
+ * to remove those tables.
+ *
+ * Even if new worker for this particular rel is restarted it
+ * won't be able to make any progress as we hold exclusive
+ * lock on subscription_rel till the transaction end. It will
+ * simply exit as there is no corresponding rel entry.
+ *
+ * This locking also ensures that the state of rels won't
+ * change till we are done with this refresh operation.
+ */
+ if (!rel)
+ rel = table_open(SubscriptionRelRelationId, AccessExclusiveLock);
+
+ /* Last known rel state. */
+ state = GetSubscriptionRelState(sub->oid, relid, &statelsn);
+
+ sub_remove_rels[remove_rel_len].relid = relid;
+ sub_remove_rels[remove_rel_len++].state = state;
+
+ RemoveSubscriptionRel(sub->oid, relid);
+
+ logicalrep_worker_stop(sub->oid, relid);
+
+ /*
+ * For READY state, we would have already dropped the
+ * tablesync origin.
+ */
+ if (state != SUBREL_STATE_READY)
+ {
+ char originname[NAMEDATALEN];
+
+ /*
+ * Drop the tablesync's origin tracking if exists.
+ *
+ * It is possible that the origin is not yet created for
+ * tablesync worker, this can happen for the states before
+ * SUBREL_STATE_FINISHEDCOPY. The apply worker can also
+ * concurrently try to drop the origin and by this time
+ * the origin might be already removed. For these reasons,
+ * passing missing_ok = true.
+ */
+ ReplicationOriginNameForTablesync(sub->oid, relid, originname,
+ sizeof(originname));
+ replorigin_drop_by_name(originname, true, false);
+ }
+
+ ereport(DEBUG1,
+ (errmsg_internal("table \"%s.%s\" removed from subscription \"%s\"",
+ get_namespace_name(get_rel_namespace(relid)),
+ get_rel_name(relid),
+ sub->name)));
+ }
+ }
+
+ /*
+ * Drop the tablesync slots associated with removed tables. This has
+ * to be at the end because otherwise if there is an error while doing
+ * the database operations we won't be able to rollback dropped slots.
+ */
+ for (off = 0; off < remove_rel_len; off++)
+ {
+ if (sub_remove_rels[off].state != SUBREL_STATE_READY &&
+ sub_remove_rels[off].state != SUBREL_STATE_SYNCDONE)
+ {
+ char syncslotname[NAMEDATALEN] = {0};
+
+ /*
+ * For READY/SYNCDONE states we know the tablesync slot has
+ * already been dropped by the tablesync worker.
+ *
+ * For other states, there is no certainty, maybe the slot
+ * does not exist yet. Also, if we fail after removing some of
+ * the slots, next time, it will again try to drop already
+ * dropped slots and fail. For these reasons, we allow
+ * missing_ok = true for the drop.
+ */
+ ReplicationSlotNameForTablesync(sub->oid, sub_remove_rels[off].relid,
+ syncslotname, sizeof(syncslotname));
+ ReplicationSlotDropAtPubNode(wrconn, syncslotname, true);
+ }
+ }
+ }
+ PG_FINALLY();
+ {
+ walrcv_disconnect(wrconn);
+ }
+ PG_END_TRY();
+
+ if (rel)
+ table_close(rel, NoLock);
+}
+
+/*
+ * Alter the existing subscription.
+ */
+ObjectAddress
+AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt,
+ bool isTopLevel)
+{
+ Relation rel;
+ ObjectAddress myself;
+ bool nulls[Natts_pg_subscription];
+ bool replaces[Natts_pg_subscription];
+ Datum values[Natts_pg_subscription];
+ HeapTuple tup;
+ Oid subid;
+ bool update_tuple = false;
+ Subscription *sub;
+ Form_pg_subscription form;
+ bits32 supported_opts;
+ SubOpts opts = {0};
+
+ rel = table_open(SubscriptionRelationId, RowExclusiveLock);
+
+ /* Fetch the existing tuple. */
+ tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, MyDatabaseId,
+ CStringGetDatum(stmt->subname));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("subscription \"%s\" does not exist",
+ stmt->subname)));
+
+ form = (Form_pg_subscription) GETSTRUCT(tup);
+ subid = form->oid;
+
+ /* must be owner */
+ if (!pg_subscription_ownercheck(subid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SUBSCRIPTION,
+ stmt->subname);
+
+ sub = GetSubscription(subid, false);
+
+ /* Lock the subscription so nobody else can do anything with it. */
+ LockSharedObject(SubscriptionRelationId, subid, 0, AccessExclusiveLock);
+
+ /* Form a new tuple. */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+ memset(replaces, false, sizeof(replaces));
+
+ switch (stmt->kind)
+ {
+ case ALTER_SUBSCRIPTION_OPTIONS:
+ {
+ supported_opts = (SUBOPT_SLOT_NAME |
+ SUBOPT_SYNCHRONOUS_COMMIT | SUBOPT_BINARY |
+ SUBOPT_STREAMING | SUBOPT_DISABLE_ON_ERR);
+
+ parse_subscription_options(pstate, stmt->options,
+ supported_opts, &opts);
+
+ if (IsSet(opts.specified_opts, SUBOPT_SLOT_NAME))
+ {
+ /*
+ * The subscription must be disabled to allow slot_name as
+ * 'none', otherwise, the apply worker will repeatedly try
+ * to stream the data using that slot_name which neither
+ * exists on the publisher nor the user will be allowed to
+ * create it.
+ */
+ if (sub->enabled && !opts.slot_name)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot set %s for enabled subscription",
+ "slot_name = NONE")));
+
+ if (opts.slot_name)
+ values[Anum_pg_subscription_subslotname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(opts.slot_name));
+ else
+ nulls[Anum_pg_subscription_subslotname - 1] = true;
+ replaces[Anum_pg_subscription_subslotname - 1] = true;
+ }
+
+ if (opts.synchronous_commit)
+ {
+ values[Anum_pg_subscription_subsynccommit - 1] =
+ CStringGetTextDatum(opts.synchronous_commit);
+ replaces[Anum_pg_subscription_subsynccommit - 1] = true;
+ }
+
+ if (IsSet(opts.specified_opts, SUBOPT_BINARY))
+ {
+ values[Anum_pg_subscription_subbinary - 1] =
+ BoolGetDatum(opts.binary);
+ replaces[Anum_pg_subscription_subbinary - 1] = true;
+ }
+
+ if (IsSet(opts.specified_opts, SUBOPT_STREAMING))
+ {
+ values[Anum_pg_subscription_substream - 1] =
+ BoolGetDatum(opts.streaming);
+ replaces[Anum_pg_subscription_substream - 1] = true;
+ }
+
+ if (IsSet(opts.specified_opts, SUBOPT_DISABLE_ON_ERR))
+ {
+ values[Anum_pg_subscription_subdisableonerr - 1]
+ = BoolGetDatum(opts.disableonerr);
+ replaces[Anum_pg_subscription_subdisableonerr - 1]
+ = true;
+ }
+
+ update_tuple = true;
+ break;
+ }
+
+ case ALTER_SUBSCRIPTION_ENABLED:
+ {
+ parse_subscription_options(pstate, stmt->options,
+ SUBOPT_ENABLED, &opts);
+ Assert(IsSet(opts.specified_opts, SUBOPT_ENABLED));
+
+ if (!sub->slotname && opts.enabled)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot enable subscription that does not have a slot name")));
+
+ values[Anum_pg_subscription_subenabled - 1] =
+ BoolGetDatum(opts.enabled);
+ replaces[Anum_pg_subscription_subenabled - 1] = true;
+
+ if (opts.enabled)
+ ApplyLauncherWakeupAtCommit();
+
+ update_tuple = true;
+ break;
+ }
+
+ case ALTER_SUBSCRIPTION_CONNECTION:
+ /* Load the library providing us libpq calls. */
+ load_file("libpqwalreceiver", false);
+ /* Check the connection info string. */
+ walrcv_check_conninfo(stmt->conninfo);
+
+ values[Anum_pg_subscription_subconninfo - 1] =
+ CStringGetTextDatum(stmt->conninfo);
+ replaces[Anum_pg_subscription_subconninfo - 1] = true;
+ update_tuple = true;
+ break;
+
+ case ALTER_SUBSCRIPTION_SET_PUBLICATION:
+ {
+ supported_opts = SUBOPT_COPY_DATA | SUBOPT_REFRESH;
+ parse_subscription_options(pstate, stmt->options,
+ supported_opts, &opts);
+
+ values[Anum_pg_subscription_subpublications - 1] =
+ publicationListToArray(stmt->publication);
+ replaces[Anum_pg_subscription_subpublications - 1] = true;
+
+ update_tuple = true;
+
+ /* Refresh if user asked us to. */
+ if (opts.refresh)
+ {
+ if (!sub->enabled)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("ALTER SUBSCRIPTION with refresh is not allowed for disabled subscriptions"),
+ errhint("Use ALTER SUBSCRIPTION ... SET PUBLICATION ... WITH (refresh = false).")));
+
+ /*
+ * See ALTER_SUBSCRIPTION_REFRESH for details why this is
+ * not allowed.
+ */
+ if (sub->twophasestate == LOGICALREP_TWOPHASE_STATE_ENABLED && opts.copy_data)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("ALTER SUBSCRIPTION with refresh and copy_data is not allowed when two_phase is enabled"),
+ errhint("Use ALTER SUBSCRIPTION ... SET PUBLICATION with refresh = false, or with copy_data = false, or use DROP/CREATE SUBSCRIPTION.")));
+
+ PreventInTransactionBlock(isTopLevel, "ALTER SUBSCRIPTION with refresh");
+
+ /* Make sure refresh sees the new list of publications. */
+ sub->publications = stmt->publication;
+
+ AlterSubscription_refresh(sub, opts.copy_data,
+ stmt->publication);
+ }
+
+ break;
+ }
+
+ case ALTER_SUBSCRIPTION_ADD_PUBLICATION:
+ case ALTER_SUBSCRIPTION_DROP_PUBLICATION:
+ {
+ List *publist;
+ bool isadd = stmt->kind == ALTER_SUBSCRIPTION_ADD_PUBLICATION;
+
+ supported_opts = SUBOPT_REFRESH | SUBOPT_COPY_DATA;
+ parse_subscription_options(pstate, stmt->options,
+ supported_opts, &opts);
+
+ publist = merge_publications(sub->publications, stmt->publication, isadd, stmt->subname);
+ values[Anum_pg_subscription_subpublications - 1] =
+ publicationListToArray(publist);
+ replaces[Anum_pg_subscription_subpublications - 1] = true;
+
+ update_tuple = true;
+
+ /* Refresh if user asked us to. */
+ if (opts.refresh)
+ {
+ /* We only need to validate user specified publications. */
+ List *validate_publications = (isadd) ? stmt->publication : NULL;
+
+ if (!sub->enabled)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("ALTER SUBSCRIPTION with refresh is not allowed for disabled subscriptions"),
+ /* translator: %s is an SQL ALTER command */
+ errhint("Use %s instead.",
+ isadd ?
+ "ALTER SUBSCRIPTION ... ADD PUBLICATION ... WITH (refresh = false)" :
+ "ALTER SUBSCRIPTION ... DROP PUBLICATION ... WITH (refresh = false)")));
+
+ /*
+ * See ALTER_SUBSCRIPTION_REFRESH for details why this is
+ * not allowed.
+ */
+ if (sub->twophasestate == LOGICALREP_TWOPHASE_STATE_ENABLED && opts.copy_data)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("ALTER SUBSCRIPTION with refresh and copy_data is not allowed when two_phase is enabled"),
+ /* translator: %s is an SQL ALTER command */
+ errhint("Use %s with refresh = false, or with copy_data = false, or use DROP/CREATE SUBSCRIPTION.",
+ isadd ?
+ "ALTER SUBSCRIPTION ... ADD PUBLICATION" :
+ "ALTER SUBSCRIPTION ... DROP PUBLICATION")));
+
+ PreventInTransactionBlock(isTopLevel, "ALTER SUBSCRIPTION with refresh");
+
+ /* Refresh the new list of publications. */
+ sub->publications = publist;
+
+ AlterSubscription_refresh(sub, opts.copy_data,
+ validate_publications);
+ }
+
+ break;
+ }
+
+ case ALTER_SUBSCRIPTION_REFRESH:
+ {
+ if (!sub->enabled)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("ALTER SUBSCRIPTION ... REFRESH is not allowed for disabled subscriptions")));
+
+ parse_subscription_options(pstate, stmt->options,
+ SUBOPT_COPY_DATA, &opts);
+
+ /*
+ * The subscription option "two_phase" requires that
+ * replication has passed the initial table synchronization
+ * phase before the two_phase becomes properly enabled.
+ *
+ * But, having reached this two-phase commit "enabled" state
+ * we must not allow any subsequent table initialization to
+ * occur. So the ALTER SUBSCRIPTION ... REFRESH is disallowed
+ * when the user had requested two_phase = on mode.
+ *
+ * The exception to this restriction is when copy_data =
+ * false, because when copy_data is false the tablesync will
+ * start already in READY state and will exit directly without
+ * doing anything.
+ *
+ * For more details see comments atop worker.c.
+ */
+ if (sub->twophasestate == LOGICALREP_TWOPHASE_STATE_ENABLED && opts.copy_data)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("ALTER SUBSCRIPTION ... REFRESH with copy_data is not allowed when two_phase is enabled"),
+ errhint("Use ALTER SUBSCRIPTION ... REFRESH with copy_data = false, or use DROP/CREATE SUBSCRIPTION.")));
+
+ PreventInTransactionBlock(isTopLevel, "ALTER SUBSCRIPTION ... REFRESH");
+
+ AlterSubscription_refresh(sub, opts.copy_data, NULL);
+
+ break;
+ }
+
+ case ALTER_SUBSCRIPTION_SKIP:
+ {
+ parse_subscription_options(pstate, stmt->options, SUBOPT_LSN, &opts);
+
+ /* ALTER SUBSCRIPTION ... SKIP supports only LSN option */
+ Assert(IsSet(opts.specified_opts, SUBOPT_LSN));
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to skip transaction")));
+
+ /*
+ * If the user sets subskiplsn, we do a sanity check to make
+ * sure that the specified LSN is a probable value.
+ */
+ if (!XLogRecPtrIsInvalid(opts.lsn))
+ {
+ RepOriginId originid;
+ char originname[NAMEDATALEN];
+ XLogRecPtr remote_lsn;
+
+ snprintf(originname, sizeof(originname), "pg_%u", subid);
+ originid = replorigin_by_name(originname, false);
+ remote_lsn = replorigin_get_progress(originid, false);
+
+ /* Check the given LSN is at least a future LSN */
+ if (!XLogRecPtrIsInvalid(remote_lsn) && opts.lsn < remote_lsn)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("skip WAL location (LSN %X/%X) must be greater than origin LSN %X/%X",
+ LSN_FORMAT_ARGS(opts.lsn),
+ LSN_FORMAT_ARGS(remote_lsn))));
+ }
+
+ values[Anum_pg_subscription_subskiplsn - 1] = LSNGetDatum(opts.lsn);
+ replaces[Anum_pg_subscription_subskiplsn - 1] = true;
+
+ update_tuple = true;
+ break;
+ }
+
+ default:
+ elog(ERROR, "unrecognized ALTER SUBSCRIPTION kind %d",
+ stmt->kind);
+ }
+
+ /* Update the catalog if needed. */
+ if (update_tuple)
+ {
+ tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls,
+ replaces);
+
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+ heap_freetuple(tup);
+ }
+
+ table_close(rel, RowExclusiveLock);
+
+ ObjectAddressSet(myself, SubscriptionRelationId, subid);
+
+ InvokeObjectPostAlterHook(SubscriptionRelationId, subid, 0);
+
+ return myself;
+}
+
+/*
+ * Drop a subscription
+ */
+void
+DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel)
+{
+ Relation rel;
+ ObjectAddress myself;
+ HeapTuple tup;
+ Oid subid;
+ Datum datum;
+ bool isnull;
+ char *subname;
+ char *conninfo;
+ char *slotname;
+ List *subworkers;
+ ListCell *lc;
+ char originname[NAMEDATALEN];
+ char *err = NULL;
+ WalReceiverConn *wrconn;
+ Form_pg_subscription form;
+ List *rstates;
+
+ /*
+ * Lock pg_subscription with AccessExclusiveLock to ensure that the
+ * launcher doesn't restart new worker during dropping the subscription
+ */
+ rel = table_open(SubscriptionRelationId, AccessExclusiveLock);
+
+ tup = SearchSysCache2(SUBSCRIPTIONNAME, MyDatabaseId,
+ CStringGetDatum(stmt->subname));
+
+ if (!HeapTupleIsValid(tup))
+ {
+ table_close(rel, NoLock);
+
+ if (!stmt->missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("subscription \"%s\" does not exist",
+ stmt->subname)));
+ else
+ ereport(NOTICE,
+ (errmsg("subscription \"%s\" does not exist, skipping",
+ stmt->subname)));
+
+ return;
+ }
+
+ form = (Form_pg_subscription) GETSTRUCT(tup);
+ subid = form->oid;
+
+ /* must be owner */
+ if (!pg_subscription_ownercheck(subid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SUBSCRIPTION,
+ stmt->subname);
+
+ /* DROP hook for the subscription being removed */
+ InvokeObjectDropHook(SubscriptionRelationId, subid, 0);
+
+ /*
+ * Lock the subscription so nobody else can do anything with it (including
+ * the replication workers).
+ */
+ LockSharedObject(SubscriptionRelationId, subid, 0, AccessExclusiveLock);
+
+ /* Get subname */
+ datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup,
+ Anum_pg_subscription_subname, &isnull);
+ Assert(!isnull);
+ subname = pstrdup(NameStr(*DatumGetName(datum)));
+
+ /* Get conninfo */
+ datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup,
+ Anum_pg_subscription_subconninfo, &isnull);
+ Assert(!isnull);
+ conninfo = TextDatumGetCString(datum);
+
+ /* Get slotname */
+ datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup,
+ Anum_pg_subscription_subslotname, &isnull);
+ if (!isnull)
+ slotname = pstrdup(NameStr(*DatumGetName(datum)));
+ else
+ slotname = NULL;
+
+ /*
+ * Since dropping a replication slot is not transactional, the replication
+ * slot stays dropped even if the transaction rolls back. So we cannot
+ * run DROP SUBSCRIPTION inside a transaction block if dropping the
+ * replication slot. Also, in this case, we report a message for dropping
+ * the subscription to the cumulative stats system.
+ *
+ * XXX The command name should really be something like "DROP SUBSCRIPTION
+ * of a subscription that is associated with a replication slot", but we
+ * don't have the proper facilities for that.
+ */
+ if (slotname)
+ PreventInTransactionBlock(isTopLevel, "DROP SUBSCRIPTION");
+
+ ObjectAddressSet(myself, SubscriptionRelationId, subid);
+ EventTriggerSQLDropAddObject(&myself, true, true);
+
+ /* Remove the tuple from catalog. */
+ CatalogTupleDelete(rel, &tup->t_self);
+
+ ReleaseSysCache(tup);
+
+ /*
+ * Stop all the subscription workers immediately.
+ *
+ * This is necessary if we are dropping the replication slot, so that the
+ * slot becomes accessible.
+ *
+ * It is also necessary if the subscription is disabled and was disabled
+ * in the same transaction. Then the workers haven't seen the disabling
+ * yet and will still be running, leading to hangs later when we want to
+ * drop the replication origin. If the subscription was disabled before
+ * this transaction, then there shouldn't be any workers left, so this
+ * won't make a difference.
+ *
+ * New workers won't be started because we hold an exclusive lock on the
+ * subscription till the end of the transaction.
+ */
+ LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+ subworkers = logicalrep_workers_find(subid, false);
+ LWLockRelease(LogicalRepWorkerLock);
+ foreach(lc, subworkers)
+ {
+ LogicalRepWorker *w = (LogicalRepWorker *) lfirst(lc);
+
+ logicalrep_worker_stop(w->subid, w->relid);
+ }
+ list_free(subworkers);
+
+ /*
+ * Cleanup of tablesync replication origins.
+ *
+ * Any READY-state relations would already have dealt with clean-ups.
+ *
+ * Note that the state can't change because we have already stopped both
+ * the apply and tablesync workers and they can't restart because of
+ * exclusive lock on the subscription.
+ */
+ rstates = GetSubscriptionNotReadyRelations(subid);
+ foreach(lc, rstates)
+ {
+ SubscriptionRelState *rstate = (SubscriptionRelState *) lfirst(lc);
+ Oid relid = rstate->relid;
+
+ /* Only cleanup resources of tablesync workers */
+ if (!OidIsValid(relid))
+ continue;
+
+ /*
+ * Drop the tablesync's origin tracking if exists.
+ *
+ * It is possible that the origin is not yet created for tablesync
+ * worker so passing missing_ok = true. This can happen for the states
+ * before SUBREL_STATE_FINISHEDCOPY.
+ */
+ ReplicationOriginNameForTablesync(subid, relid, originname,
+ sizeof(originname));
+ replorigin_drop_by_name(originname, true, false);
+ }
+
+ /* Clean up dependencies */
+ deleteSharedDependencyRecordsFor(SubscriptionRelationId, subid, 0);
+
+ /* Remove any associated relation synchronization states. */
+ RemoveSubscriptionRel(subid, InvalidOid);
+
+ /* Remove the origin tracking if exists. */
+ snprintf(originname, sizeof(originname), "pg_%u", subid);
+ replorigin_drop_by_name(originname, true, false);
+
+ /*
+ * Tell the cumulative stats system that the subscription is getting
+ * dropped.
+ */
+ pgstat_drop_subscription(subid);
+
+ /*
+ * If there is no slot associated with the subscription, we can finish
+ * here.
+ */
+ if (!slotname && rstates == NIL)
+ {
+ table_close(rel, NoLock);
+ return;
+ }
+
+ /*
+ * Try to acquire the connection necessary for dropping slots.
+ *
+ * Note: If the slotname is NONE/NULL then we allow the command to finish
+ * and users need to manually cleanup the apply and tablesync worker slots
+ * later.
+ *
+ * This has to be at the end because otherwise if there is an error while
+ * doing the database operations we won't be able to rollback dropped
+ * slot.
+ */
+ load_file("libpqwalreceiver", false);
+
+ wrconn = walrcv_connect(conninfo, true, subname, &err);
+ if (wrconn == NULL)
+ {
+ if (!slotname)
+ {
+ /* be tidy */
+ list_free(rstates);
+ table_close(rel, NoLock);
+ return;
+ }
+ else
+ {
+ ReportSlotConnectionError(rstates, subid, slotname, err);
+ }
+ }
+
+ PG_TRY();
+ {
+ foreach(lc, rstates)
+ {
+ SubscriptionRelState *rstate = (SubscriptionRelState *) lfirst(lc);
+ Oid relid = rstate->relid;
+
+ /* Only cleanup resources of tablesync workers */
+ if (!OidIsValid(relid))
+ continue;
+
+ /*
+ * Drop the tablesync slots associated with removed tables.
+ *
+ * For SYNCDONE/READY states, the tablesync slot is known to have
+ * already been dropped by the tablesync worker.
+ *
+ * For other states, there is no certainty, maybe the slot does
+ * not exist yet. Also, if we fail after removing some of the
+ * slots, next time, it will again try to drop already dropped
+ * slots and fail. For these reasons, we allow missing_ok = true
+ * for the drop.
+ */
+ if (rstate->state != SUBREL_STATE_SYNCDONE)
+ {
+ char syncslotname[NAMEDATALEN] = {0};
+
+ ReplicationSlotNameForTablesync(subid, relid, syncslotname,
+ sizeof(syncslotname));
+ ReplicationSlotDropAtPubNode(wrconn, syncslotname, true);
+ }
+ }
+
+ list_free(rstates);
+
+ /*
+ * If there is a slot associated with the subscription, then drop the
+ * replication slot at the publisher.
+ */
+ if (slotname)
+ ReplicationSlotDropAtPubNode(wrconn, slotname, false);
+ }
+ PG_FINALLY();
+ {
+ walrcv_disconnect(wrconn);
+ }
+ PG_END_TRY();
+
+ table_close(rel, NoLock);
+}
+
+/*
+ * Drop the replication slot at the publisher node using the replication
+ * connection.
+ *
+ * missing_ok - if true then only issue a LOG message if the slot doesn't
+ * exist.
+ */
+void
+ReplicationSlotDropAtPubNode(WalReceiverConn *wrconn, char *slotname, bool missing_ok)
+{
+ StringInfoData cmd;
+
+ Assert(wrconn);
+
+ load_file("libpqwalreceiver", false);
+
+ initStringInfo(&cmd);
+ appendStringInfo(&cmd, "DROP_REPLICATION_SLOT %s WAIT", quote_identifier(slotname));
+
+ PG_TRY();
+ {
+ WalRcvExecResult *res;
+
+ res = walrcv_exec(wrconn, cmd.data, 0, NULL);
+
+ if (res->status == WALRCV_OK_COMMAND)
+ {
+ /* NOTICE. Success. */
+ ereport(NOTICE,
+ (errmsg("dropped replication slot \"%s\" on publisher",
+ slotname)));
+ }
+ else if (res->status == WALRCV_ERROR &&
+ missing_ok &&
+ res->sqlstate == ERRCODE_UNDEFINED_OBJECT)
+ {
+ /* LOG. Error, but missing_ok = true. */
+ ereport(LOG,
+ (errmsg("could not drop replication slot \"%s\" on publisher: %s",
+ slotname, res->err)));
+ }
+ else
+ {
+ /* ERROR. */
+ ereport(ERROR,
+ (errcode(ERRCODE_CONNECTION_FAILURE),
+ errmsg("could not drop replication slot \"%s\" on publisher: %s",
+ slotname, res->err)));
+ }
+
+ walrcv_clear_result(res);
+ }
+ PG_FINALLY();
+ {
+ pfree(cmd.data);
+ }
+ PG_END_TRY();
+}
+
+/*
+ * Internal workhorse for changing a subscription owner
+ */
+static void
+AlterSubscriptionOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId)
+{
+ Form_pg_subscription form;
+
+ form = (Form_pg_subscription) GETSTRUCT(tup);
+
+ if (form->subowner == newOwnerId)
+ return;
+
+ if (!pg_subscription_ownercheck(form->oid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SUBSCRIPTION,
+ NameStr(form->subname));
+
+ /* New owner must be a superuser */
+ if (!superuser_arg(newOwnerId))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to change owner of subscription \"%s\"",
+ NameStr(form->subname)),
+ errhint("The owner of a subscription must be a superuser.")));
+
+ form->subowner = newOwnerId;
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+ /* Update owner dependency reference */
+ changeDependencyOnOwner(SubscriptionRelationId,
+ form->oid,
+ newOwnerId);
+
+ InvokeObjectPostAlterHook(SubscriptionRelationId,
+ form->oid, 0);
+
+ ApplyLauncherWakeupAtCommit();
+}
+
+/*
+ * Change subscription owner -- by name
+ */
+ObjectAddress
+AlterSubscriptionOwner(const char *name, Oid newOwnerId)
+{
+ Oid subid;
+ HeapTuple tup;
+ Relation rel;
+ ObjectAddress address;
+ Form_pg_subscription form;
+
+ rel = table_open(SubscriptionRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, MyDatabaseId,
+ CStringGetDatum(name));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("subscription \"%s\" does not exist", name)));
+
+ form = (Form_pg_subscription) GETSTRUCT(tup);
+ subid = form->oid;
+
+ AlterSubscriptionOwner_internal(rel, tup, newOwnerId);
+
+ ObjectAddressSet(address, SubscriptionRelationId, subid);
+
+ heap_freetuple(tup);
+
+ table_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * Change subscription owner -- by OID
+ */
+void
+AlterSubscriptionOwner_oid(Oid subid, Oid newOwnerId)
+{
+ HeapTuple tup;
+ Relation rel;
+
+ rel = table_open(SubscriptionRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(SUBSCRIPTIONOID, ObjectIdGetDatum(subid));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("subscription with OID %u does not exist", subid)));
+
+ AlterSubscriptionOwner_internal(rel, tup, newOwnerId);
+
+ heap_freetuple(tup);
+
+ table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Get the list of tables which belong to specified publications on the
+ * publisher connection.
+ *
+ * Note that we don't support the case where the column list is different for
+ * the same table in different publications to avoid sending unwanted column
+ * information for some of the rows. This can happen when both the column
+ * list and row filter are specified for different publications.
+ */
+static List *
+fetch_table_list(WalReceiverConn *wrconn, List *publications)
+{
+ WalRcvExecResult *res;
+ StringInfoData cmd;
+ TupleTableSlot *slot;
+ Oid tableRow[3] = {TEXTOID, TEXTOID, NAMEARRAYOID};
+ List *tablelist = NIL;
+ bool check_columnlist = (walrcv_server_version(wrconn) >= 150000);
+
+ initStringInfo(&cmd);
+ appendStringInfoString(&cmd, "SELECT DISTINCT t.schemaname, t.tablename \n");
+
+ /* Get column lists for each relation if the publisher supports it */
+ if (check_columnlist)
+ appendStringInfoString(&cmd, ", t.attnames\n");
+
+ appendStringInfoString(&cmd, "FROM pg_catalog.pg_publication_tables t\n"
+ " WHERE t.pubname IN (");
+ get_publications_str(publications, &cmd, true);
+ appendStringInfoChar(&cmd, ')');
+
+ res = walrcv_exec(wrconn, cmd.data, check_columnlist ? 3 : 2, tableRow);
+ pfree(cmd.data);
+
+ if (res->status != WALRCV_OK_TUPLES)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONNECTION_FAILURE),
+ errmsg("could not receive list of replicated tables from the publisher: %s",
+ res->err)));
+
+ /* Process tables. */
+ slot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple);
+ while (tuplestore_gettupleslot(res->tuplestore, true, false, slot))
+ {
+ char *nspname;
+ char *relname;
+ bool isnull;
+ RangeVar *rv;
+
+ nspname = TextDatumGetCString(slot_getattr(slot, 1, &isnull));
+ Assert(!isnull);
+ relname = TextDatumGetCString(slot_getattr(slot, 2, &isnull));
+ Assert(!isnull);
+
+ rv = makeRangeVar(nspname, relname, -1);
+
+ if (check_columnlist && list_member(tablelist, rv))
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot use different column lists for table \"%s.%s\" in different publications",
+ nspname, relname));
+ else
+ tablelist = lappend(tablelist, rv);
+
+ ExecClearTuple(slot);
+ }
+ ExecDropSingleTupleTableSlot(slot);
+
+ walrcv_clear_result(res);
+
+ return tablelist;
+}
+
+/*
+ * This is to report the connection failure while dropping replication slots.
+ * Here, we report the WARNING for all tablesync slots so that user can drop
+ * them manually, if required.
+ */
+static void
+ReportSlotConnectionError(List *rstates, Oid subid, char *slotname, char *err)
+{
+ ListCell *lc;
+
+ foreach(lc, rstates)
+ {
+ SubscriptionRelState *rstate = (SubscriptionRelState *) lfirst(lc);
+ Oid relid = rstate->relid;
+
+ /* Only cleanup resources of tablesync workers */
+ if (!OidIsValid(relid))
+ continue;
+
+ /*
+ * Caller needs to ensure that relstate doesn't change underneath us.
+ * See DropSubscription where we get the relstates.
+ */
+ if (rstate->state != SUBREL_STATE_SYNCDONE)
+ {
+ char syncslotname[NAMEDATALEN] = {0};
+
+ ReplicationSlotNameForTablesync(subid, relid, syncslotname,
+ sizeof(syncslotname));
+ elog(WARNING, "could not drop tablesync replication slot \"%s\"",
+ syncslotname);
+ }
+ }
+
+ ereport(ERROR,
+ (errcode(ERRCODE_CONNECTION_FAILURE),
+ errmsg("could not connect to publisher when attempting to drop replication slot \"%s\": %s",
+ slotname, err),
+ /* translator: %s is an SQL ALTER command */
+ errhint("Use %s to disable the subscription, and then use %s to disassociate it from the slot.",
+ "ALTER SUBSCRIPTION ... DISABLE",
+ "ALTER SUBSCRIPTION ... SET (slot_name = NONE)")));
+}
+
+/*
+ * Check for duplicates in the given list of publications and error out if
+ * found one. Add publications to datums as text datums, if datums is not
+ * NULL.
+ */
+static void
+check_duplicates_in_publist(List *publist, Datum *datums)
+{
+ ListCell *cell;
+ int j = 0;
+
+ foreach(cell, publist)
+ {
+ char *name = strVal(lfirst(cell));
+ ListCell *pcell;
+
+ foreach(pcell, publist)
+ {
+ char *pname = strVal(lfirst(pcell));
+
+ if (pcell == cell)
+ break;
+
+ if (strcmp(name, pname) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("publication name \"%s\" used more than once",
+ pname)));
+ }
+
+ if (datums)
+ datums[j++] = CStringGetTextDatum(name);
+ }
+}
+
+/*
+ * Merge current subscription's publications and user-specified publications
+ * from ADD/DROP PUBLICATIONS.
+ *
+ * If addpub is true, we will add the list of publications into oldpublist.
+ * Otherwise, we will delete the list of publications from oldpublist. The
+ * returned list is a copy, oldpublist itself is not changed.
+ *
+ * subname is the subscription name, for error messages.
+ */
+static List *
+merge_publications(List *oldpublist, List *newpublist, bool addpub, const char *subname)
+{
+ ListCell *lc;
+
+ oldpublist = list_copy(oldpublist);
+
+ check_duplicates_in_publist(newpublist, NULL);
+
+ foreach(lc, newpublist)
+ {
+ char *name = strVal(lfirst(lc));
+ ListCell *lc2;
+ bool found = false;
+
+ foreach(lc2, oldpublist)
+ {
+ char *pubname = strVal(lfirst(lc2));
+
+ if (strcmp(name, pubname) == 0)
+ {
+ found = true;
+ if (addpub)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("publication \"%s\" is already in subscription \"%s\"",
+ name, subname)));
+ else
+ oldpublist = foreach_delete_current(oldpublist, lc2);
+
+ break;
+ }
+ }
+
+ if (addpub && !found)
+ oldpublist = lappend(oldpublist, makeString(name));
+ else if (!addpub && !found)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("publication \"%s\" is not in subscription \"%s\"",
+ name, subname)));
+ }
+
+ /*
+ * XXX Probably no strong reason for this, but for now it's to make ALTER
+ * SUBSCRIPTION ... DROP PUBLICATION consistent with SET PUBLICATION.
+ */
+ if (!oldpublist)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot drop all the publications from a subscription")));
+
+ return oldpublist;
+}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
new file mode 100644
index 0000000..97f9a22
--- /dev/null
+++ b/src/backend/commands/tablecmds.c
@@ -0,0 +1,19402 @@
+/*-------------------------------------------------------------------------
+ *
+ * tablecmds.c
+ * Commands for creating and altering table structures and settings
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/tablecmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/attmap.h"
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/heapam_xlog.h"
+#include "access/multixact.h"
+#include "access/reloptions.h"
+#include "access/relscan.h"
+#include "access/sysattr.h"
+#include "access/tableam.h"
+#include "access/toast_compression.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "access/xloginsert.h"
+#include "catalog/catalog.h"
+#include "catalog/heap.h"
+#include "catalog/index.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/partition.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_attrdef.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_constraint.h"
+#include "catalog/pg_depend.h"
+#include "catalog/pg_foreign_table.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_largeobject.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_statistic_ext.h"
+#include "catalog/pg_tablespace.h"
+#include "catalog/pg_trigger.h"
+#include "catalog/pg_type.h"
+#include "catalog/storage.h"
+#include "catalog/storage_xlog.h"
+#include "catalog/toasting.h"
+#include "commands/cluster.h"
+#include "commands/comment.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/policy.h"
+#include "commands/sequence.h"
+#include "commands/tablecmds.h"
+#include "commands/tablespace.h"
+#include "commands/trigger.h"
+#include "commands/typecmds.h"
+#include "commands/user.h"
+#include "executor/executor.h"
+#include "foreign/fdwapi.h"
+#include "foreign/foreign.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/parsenodes.h"
+#include "optimizer/optimizer.h"
+#include "parser/parse_clause.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_oper.h"
+#include "parser/parse_relation.h"
+#include "parser/parse_type.h"
+#include "parser/parse_utilcmd.h"
+#include "parser/parser.h"
+#include "partitioning/partbounds.h"
+#include "partitioning/partdesc.h"
+#include "pgstat.h"
+#include "rewrite/rewriteDefine.h"
+#include "rewrite/rewriteHandler.h"
+#include "rewrite/rewriteManip.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/lock.h"
+#include "storage/predicate.h"
+#include "storage/smgr.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/partcache.h"
+#include "utils/relcache.h"
+#include "utils/ruleutils.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/timestamp.h"
+#include "utils/typcache.h"
+
+/*
+ * ON COMMIT action list
+ */
+typedef struct OnCommitItem
+{
+ Oid relid; /* relid of relation */
+ OnCommitAction oncommit; /* what to do at end of xact */
+
+ /*
+ * If this entry was created during the current transaction,
+ * creating_subid is the ID of the creating subxact; if created in a prior
+ * transaction, creating_subid is zero. If deleted during the current
+ * transaction, deleting_subid is the ID of the deleting subxact; if no
+ * deletion request is pending, deleting_subid is zero.
+ */
+ SubTransactionId creating_subid;
+ SubTransactionId deleting_subid;
+} OnCommitItem;
+
+static List *on_commits = NIL;
+
+
+/*
+ * State information for ALTER TABLE
+ *
+ * The pending-work queue for an ALTER TABLE is a List of AlteredTableInfo
+ * structs, one for each table modified by the operation (the named table
+ * plus any child tables that are affected). We save lists of subcommands
+ * to apply to this table (possibly modified by parse transformation steps);
+ * these lists will be executed in Phase 2. If a Phase 3 step is needed,
+ * necessary information is stored in the constraints and newvals lists.
+ *
+ * Phase 2 is divided into multiple passes; subcommands are executed in
+ * a pass determined by subcommand type.
+ */
+
+#define AT_PASS_UNSET -1 /* UNSET will cause ERROR */
+#define AT_PASS_DROP 0 /* DROP (all flavors) */
+#define AT_PASS_ALTER_TYPE 1 /* ALTER COLUMN TYPE */
+#define AT_PASS_OLD_INDEX 2 /* re-add existing indexes */
+#define AT_PASS_OLD_CONSTR 3 /* re-add existing constraints */
+/* We could support a RENAME COLUMN pass here, but not currently used */
+#define AT_PASS_ADD_COL 4 /* ADD COLUMN */
+#define AT_PASS_ADD_CONSTR 5 /* ADD constraints (initial examination) */
+#define AT_PASS_COL_ATTRS 6 /* set column attributes, eg NOT NULL */
+#define AT_PASS_ADD_INDEXCONSTR 7 /* ADD index-based constraints */
+#define AT_PASS_ADD_INDEX 8 /* ADD indexes */
+#define AT_PASS_ADD_OTHERCONSTR 9 /* ADD other constraints, defaults */
+#define AT_PASS_MISC 10 /* other stuff */
+#define AT_NUM_PASSES 11
+
+typedef struct AlteredTableInfo
+{
+ /* Information saved before any work commences: */
+ Oid relid; /* Relation to work on */
+ char relkind; /* Its relkind */
+ TupleDesc oldDesc; /* Pre-modification tuple descriptor */
+
+ /*
+ * Transiently set during Phase 2, normally set to NULL.
+ *
+ * ATRewriteCatalogs sets this when it starts, and closes when ATExecCmd
+ * returns control. This can be exploited by ATExecCmd subroutines to
+ * close/reopen across transaction boundaries.
+ */
+ Relation rel;
+
+ /* Information saved by Phase 1 for Phase 2: */
+ List *subcmds[AT_NUM_PASSES]; /* Lists of AlterTableCmd */
+ /* Information saved by Phases 1/2 for Phase 3: */
+ List *constraints; /* List of NewConstraint */
+ List *newvals; /* List of NewColumnValue */
+ List *afterStmts; /* List of utility command parsetrees */
+ bool verify_new_notnull; /* T if we should recheck NOT NULL */
+ int rewrite; /* Reason for forced rewrite, if any */
+ Oid newAccessMethod; /* new access method; 0 means no change */
+ Oid newTableSpace; /* new tablespace; 0 means no change */
+ bool chgPersistence; /* T if SET LOGGED/UNLOGGED is used */
+ char newrelpersistence; /* if above is true */
+ Expr *partition_constraint; /* for attach partition validation */
+ /* true, if validating default due to some other attach/detach */
+ bool validate_default;
+ /* Objects to rebuild after completing ALTER TYPE operations */
+ List *changedConstraintOids; /* OIDs of constraints to rebuild */
+ List *changedConstraintDefs; /* string definitions of same */
+ List *changedIndexOids; /* OIDs of indexes to rebuild */
+ List *changedIndexDefs; /* string definitions of same */
+ char *replicaIdentityIndex; /* index to reset as REPLICA IDENTITY */
+ char *clusterOnIndex; /* index to use for CLUSTER */
+ List *changedStatisticsOids; /* OIDs of statistics to rebuild */
+ List *changedStatisticsDefs; /* string definitions of same */
+} AlteredTableInfo;
+
+/* Struct describing one new constraint to check in Phase 3 scan */
+/* Note: new NOT NULL constraints are handled elsewhere */
+typedef struct NewConstraint
+{
+ char *name; /* Constraint name, or NULL if none */
+ ConstrType contype; /* CHECK or FOREIGN */
+ Oid refrelid; /* PK rel, if FOREIGN */
+ Oid refindid; /* OID of PK's index, if FOREIGN */
+ Oid conid; /* OID of pg_constraint entry, if FOREIGN */
+ Node *qual; /* Check expr or CONSTR_FOREIGN Constraint */
+ ExprState *qualstate; /* Execution state for CHECK expr */
+} NewConstraint;
+
+/*
+ * Struct describing one new column value that needs to be computed during
+ * Phase 3 copy (this could be either a new column with a non-null default, or
+ * a column that we're changing the type of). Columns without such an entry
+ * are just copied from the old table during ATRewriteTable. Note that the
+ * expr is an expression over *old* table values, except when is_generated
+ * is true; then it is an expression over columns of the *new* tuple.
+ */
+typedef struct NewColumnValue
+{
+ AttrNumber attnum; /* which column */
+ Expr *expr; /* expression to compute */
+ ExprState *exprstate; /* execution state */
+ bool is_generated; /* is it a GENERATED expression? */
+} NewColumnValue;
+
+/*
+ * Error-reporting support for RemoveRelations
+ */
+struct dropmsgstrings
+{
+ char kind;
+ int nonexistent_code;
+ const char *nonexistent_msg;
+ const char *skipping_msg;
+ const char *nota_msg;
+ const char *drophint_msg;
+};
+
+static const struct dropmsgstrings dropmsgstringarray[] = {
+ {RELKIND_RELATION,
+ ERRCODE_UNDEFINED_TABLE,
+ gettext_noop("table \"%s\" does not exist"),
+ gettext_noop("table \"%s\" does not exist, skipping"),
+ gettext_noop("\"%s\" is not a table"),
+ gettext_noop("Use DROP TABLE to remove a table.")},
+ {RELKIND_SEQUENCE,
+ ERRCODE_UNDEFINED_TABLE,
+ gettext_noop("sequence \"%s\" does not exist"),
+ gettext_noop("sequence \"%s\" does not exist, skipping"),
+ gettext_noop("\"%s\" is not a sequence"),
+ gettext_noop("Use DROP SEQUENCE to remove a sequence.")},
+ {RELKIND_VIEW,
+ ERRCODE_UNDEFINED_TABLE,
+ gettext_noop("view \"%s\" does not exist"),
+ gettext_noop("view \"%s\" does not exist, skipping"),
+ gettext_noop("\"%s\" is not a view"),
+ gettext_noop("Use DROP VIEW to remove a view.")},
+ {RELKIND_MATVIEW,
+ ERRCODE_UNDEFINED_TABLE,
+ gettext_noop("materialized view \"%s\" does not exist"),
+ gettext_noop("materialized view \"%s\" does not exist, skipping"),
+ gettext_noop("\"%s\" is not a materialized view"),
+ gettext_noop("Use DROP MATERIALIZED VIEW to remove a materialized view.")},
+ {RELKIND_INDEX,
+ ERRCODE_UNDEFINED_OBJECT,
+ gettext_noop("index \"%s\" does not exist"),
+ gettext_noop("index \"%s\" does not exist, skipping"),
+ gettext_noop("\"%s\" is not an index"),
+ gettext_noop("Use DROP INDEX to remove an index.")},
+ {RELKIND_COMPOSITE_TYPE,
+ ERRCODE_UNDEFINED_OBJECT,
+ gettext_noop("type \"%s\" does not exist"),
+ gettext_noop("type \"%s\" does not exist, skipping"),
+ gettext_noop("\"%s\" is not a type"),
+ gettext_noop("Use DROP TYPE to remove a type.")},
+ {RELKIND_FOREIGN_TABLE,
+ ERRCODE_UNDEFINED_OBJECT,
+ gettext_noop("foreign table \"%s\" does not exist"),
+ gettext_noop("foreign table \"%s\" does not exist, skipping"),
+ gettext_noop("\"%s\" is not a foreign table"),
+ gettext_noop("Use DROP FOREIGN TABLE to remove a foreign table.")},
+ {RELKIND_PARTITIONED_TABLE,
+ ERRCODE_UNDEFINED_TABLE,
+ gettext_noop("table \"%s\" does not exist"),
+ gettext_noop("table \"%s\" does not exist, skipping"),
+ gettext_noop("\"%s\" is not a table"),
+ gettext_noop("Use DROP TABLE to remove a table.")},
+ {RELKIND_PARTITIONED_INDEX,
+ ERRCODE_UNDEFINED_OBJECT,
+ gettext_noop("index \"%s\" does not exist"),
+ gettext_noop("index \"%s\" does not exist, skipping"),
+ gettext_noop("\"%s\" is not an index"),
+ gettext_noop("Use DROP INDEX to remove an index.")},
+ {'\0', 0, NULL, NULL, NULL, NULL}
+};
+
+/* communication between RemoveRelations and RangeVarCallbackForDropRelation */
+struct DropRelationCallbackState
+{
+ /* These fields are set by RemoveRelations: */
+ char expected_relkind;
+ LOCKMODE heap_lockmode;
+ /* These fields are state to track which subsidiary locks are held: */
+ Oid heapOid;
+ Oid partParentOid;
+ /* These fields are passed back by RangeVarCallbackForDropRelation: */
+ char actual_relkind;
+ char actual_relpersistence;
+};
+
+/* Alter table target-type flags for ATSimplePermissions */
+#define ATT_TABLE 0x0001
+#define ATT_VIEW 0x0002
+#define ATT_MATVIEW 0x0004
+#define ATT_INDEX 0x0008
+#define ATT_COMPOSITE_TYPE 0x0010
+#define ATT_FOREIGN_TABLE 0x0020
+#define ATT_PARTITIONED_INDEX 0x0040
+#define ATT_SEQUENCE 0x0080
+
+/*
+ * ForeignTruncateInfo
+ *
+ * Information related to truncation of foreign tables. This is used for
+ * the elements in a hash table. It uses the server OID as lookup key,
+ * and includes a per-server list of all foreign tables involved in the
+ * truncation.
+ */
+typedef struct ForeignTruncateInfo
+{
+ Oid serverid;
+ List *rels;
+} ForeignTruncateInfo;
+
+/*
+ * Partition tables are expected to be dropped when the parent partitioned
+ * table gets dropped. Hence for partitioning we use AUTO dependency.
+ * Otherwise, for regular inheritance use NORMAL dependency.
+ */
+#define child_dependency_type(child_is_partition) \
+ ((child_is_partition) ? DEPENDENCY_AUTO : DEPENDENCY_NORMAL)
+
+static void truncate_check_rel(Oid relid, Form_pg_class reltuple);
+static void truncate_check_perms(Oid relid, Form_pg_class reltuple);
+static void truncate_check_activity(Relation rel);
+static void RangeVarCallbackForTruncate(const RangeVar *relation,
+ Oid relId, Oid oldRelId, void *arg);
+static List *MergeAttributes(List *schema, List *supers, char relpersistence,
+ bool is_partition, List **supconstr);
+static bool MergeCheckConstraint(List *constraints, char *name, Node *expr);
+static void MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel);
+static void MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel);
+static void StoreCatalogInheritance(Oid relationId, List *supers,
+ bool child_is_partition);
+static void StoreCatalogInheritance1(Oid relationId, Oid parentOid,
+ int32 seqNumber, Relation inhRelation,
+ bool child_is_partition);
+static int findAttrByName(const char *attributeName, List *schema);
+static void AlterIndexNamespaces(Relation classRel, Relation rel,
+ Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved);
+static void AlterSeqNamespaces(Relation classRel, Relation rel,
+ Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved,
+ LOCKMODE lockmode);
+static ObjectAddress ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd,
+ bool recurse, bool recursing, LOCKMODE lockmode);
+static bool ATExecAlterConstrRecurse(Constraint *cmdcon, Relation conrel, Relation tgrel,
+ Relation rel, HeapTuple contuple, List **otherrelids,
+ LOCKMODE lockmode);
+static ObjectAddress ATExecValidateConstraint(List **wqueue,
+ Relation rel, char *constrName,
+ bool recurse, bool recursing, LOCKMODE lockmode);
+static int transformColumnNameList(Oid relId, List *colList,
+ int16 *attnums, Oid *atttypids);
+static int transformFkeyGetPrimaryKey(Relation pkrel, Oid *indexOid,
+ List **attnamelist,
+ int16 *attnums, Oid *atttypids,
+ Oid *opclasses);
+static Oid transformFkeyCheckAttrs(Relation pkrel,
+ int numattrs, int16 *attnums,
+ Oid *opclasses);
+static void checkFkeyPermissions(Relation rel, int16 *attnums, int natts);
+static CoercionPathType findFkeyCast(Oid targetTypeId, Oid sourceTypeId,
+ Oid *funcid);
+static void validateForeignKeyConstraint(char *conname,
+ Relation rel, Relation pkrel,
+ Oid pkindOid, Oid constraintOid);
+static void ATController(AlterTableStmt *parsetree,
+ Relation rel, List *cmds, bool recurse, LOCKMODE lockmode,
+ AlterTableUtilityContext *context);
+static void ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
+ bool recurse, bool recursing, LOCKMODE lockmode,
+ AlterTableUtilityContext *context);
+static void ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode,
+ AlterTableUtilityContext *context);
+static void ATExecCmd(List **wqueue, AlteredTableInfo *tab,
+ AlterTableCmd *cmd, LOCKMODE lockmode, int cur_pass,
+ AlterTableUtilityContext *context);
+static AlterTableCmd *ATParseTransformCmd(List **wqueue, AlteredTableInfo *tab,
+ Relation rel, AlterTableCmd *cmd,
+ bool recurse, LOCKMODE lockmode,
+ int cur_pass,
+ AlterTableUtilityContext *context);
+static void ATRewriteTables(AlterTableStmt *parsetree,
+ List **wqueue, LOCKMODE lockmode,
+ AlterTableUtilityContext *context);
+static void ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode);
+static AlteredTableInfo *ATGetQueueEntry(List **wqueue, Relation rel);
+static void ATSimplePermissions(AlterTableType cmdtype, Relation rel, int allowed_targets);
+static void ATSimpleRecursion(List **wqueue, Relation rel,
+ AlterTableCmd *cmd, bool recurse, LOCKMODE lockmode,
+ AlterTableUtilityContext *context);
+static void ATCheckPartitionsNotInUse(Relation rel, LOCKMODE lockmode);
+static void ATTypedTableRecursion(List **wqueue, Relation rel, AlterTableCmd *cmd,
+ LOCKMODE lockmode,
+ AlterTableUtilityContext *context);
+static List *find_typed_table_dependencies(Oid typeOid, const char *typeName,
+ DropBehavior behavior);
+static void ATPrepAddColumn(List **wqueue, Relation rel, bool recurse, bool recursing,
+ bool is_view, AlterTableCmd *cmd, LOCKMODE lockmode,
+ AlterTableUtilityContext *context);
+static ObjectAddress ATExecAddColumn(List **wqueue, AlteredTableInfo *tab,
+ Relation rel, AlterTableCmd **cmd,
+ bool recurse, bool recursing,
+ LOCKMODE lockmode, int cur_pass,
+ AlterTableUtilityContext *context);
+static bool check_for_column_name_collision(Relation rel, const char *colname,
+ bool if_not_exists);
+static void add_column_datatype_dependency(Oid relid, int32 attnum, Oid typid);
+static void add_column_collation_dependency(Oid relid, int32 attnum, Oid collid);
+static void ATPrepDropNotNull(Relation rel, bool recurse, bool recursing);
+static ObjectAddress ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode);
+static void ATPrepSetNotNull(List **wqueue, Relation rel,
+ AlterTableCmd *cmd, bool recurse, bool recursing,
+ LOCKMODE lockmode,
+ AlterTableUtilityContext *context);
+static ObjectAddress ATExecSetNotNull(AlteredTableInfo *tab, Relation rel,
+ const char *colName, LOCKMODE lockmode);
+static void ATExecCheckNotNull(AlteredTableInfo *tab, Relation rel,
+ const char *colName, LOCKMODE lockmode);
+static bool NotNullImpliedByRelConstraints(Relation rel, Form_pg_attribute attr);
+static bool ConstraintImpliedByRelConstraint(Relation scanrel,
+ List *testConstraint, List *provenConstraint);
+static ObjectAddress ATExecColumnDefault(Relation rel, const char *colName,
+ Node *newDefault, LOCKMODE lockmode);
+static ObjectAddress ATExecCookedColumnDefault(Relation rel, AttrNumber attnum,
+ Node *newDefault);
+static ObjectAddress ATExecAddIdentity(Relation rel, const char *colName,
+ Node *def, LOCKMODE lockmode);
+static ObjectAddress ATExecSetIdentity(Relation rel, const char *colName,
+ Node *def, LOCKMODE lockmode);
+static ObjectAddress ATExecDropIdentity(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode);
+static void ATPrepDropExpression(Relation rel, AlterTableCmd *cmd, bool recurse, bool recursing, LOCKMODE lockmode);
+static ObjectAddress ATExecDropExpression(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode);
+static ObjectAddress ATExecSetStatistics(Relation rel, const char *colName, int16 colNum,
+ Node *newValue, LOCKMODE lockmode);
+static ObjectAddress ATExecSetOptions(Relation rel, const char *colName,
+ Node *options, bool isReset, LOCKMODE lockmode);
+static ObjectAddress ATExecSetStorage(Relation rel, const char *colName,
+ Node *newValue, LOCKMODE lockmode);
+static void ATPrepDropColumn(List **wqueue, Relation rel, bool recurse, bool recursing,
+ AlterTableCmd *cmd, LOCKMODE lockmode,
+ AlterTableUtilityContext *context);
+static ObjectAddress ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
+ DropBehavior behavior,
+ bool recurse, bool recursing,
+ bool missing_ok, LOCKMODE lockmode,
+ ObjectAddresses *addrs);
+static ObjectAddress ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
+ IndexStmt *stmt, bool is_rebuild, LOCKMODE lockmode);
+static ObjectAddress ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
+ CreateStatsStmt *stmt, bool is_rebuild, LOCKMODE lockmode);
+static ObjectAddress ATExecAddConstraint(List **wqueue,
+ AlteredTableInfo *tab, Relation rel,
+ Constraint *newConstraint, bool recurse, bool is_readd,
+ LOCKMODE lockmode);
+static char *ChooseForeignKeyConstraintNameAddition(List *colnames);
+static ObjectAddress ATExecAddIndexConstraint(AlteredTableInfo *tab, Relation rel,
+ IndexStmt *stmt, LOCKMODE lockmode);
+static ObjectAddress ATAddCheckConstraint(List **wqueue,
+ AlteredTableInfo *tab, Relation rel,
+ Constraint *constr,
+ bool recurse, bool recursing, bool is_readd,
+ LOCKMODE lockmode);
+static ObjectAddress ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab,
+ Relation rel, Constraint *fkconstraint,
+ bool recurse, bool recursing,
+ LOCKMODE lockmode);
+static ObjectAddress addFkRecurseReferenced(List **wqueue, Constraint *fkconstraint,
+ Relation rel, Relation pkrel, Oid indexOid, Oid parentConstr,
+ int numfks, int16 *pkattnum, int16 *fkattnum,
+ Oid *pfeqoperators, Oid *ppeqoperators, Oid *ffeqoperators,
+ int numfkdelsetcols, int16 *fkdelsetcols,
+ bool old_check_ok,
+ Oid parentDelTrigger, Oid parentUpdTrigger);
+static void validateFkOnDeleteSetColumns(int numfks, const int16 *fkattnums,
+ int numfksetcols, const int16 *fksetcolsattnums,
+ List *fksetcols);
+static void addFkRecurseReferencing(List **wqueue, Constraint *fkconstraint,
+ Relation rel, Relation pkrel, Oid indexOid, Oid parentConstr,
+ int numfks, int16 *pkattnum, int16 *fkattnum,
+ Oid *pfeqoperators, Oid *ppeqoperators, Oid *ffeqoperators,
+ int numfkdelsetcols, int16 *fkdelsetcols,
+ bool old_check_ok, LOCKMODE lockmode,
+ Oid parentInsTrigger, Oid parentUpdTrigger);
+static void CloneForeignKeyConstraints(List **wqueue, Relation parentRel,
+ Relation partitionRel);
+static void CloneFkReferenced(Relation parentRel, Relation partitionRel);
+static void CloneFkReferencing(List **wqueue, Relation parentRel,
+ Relation partRel);
+static void createForeignKeyCheckTriggers(Oid myRelOid, Oid refRelOid,
+ Constraint *fkconstraint, Oid constraintOid,
+ Oid indexOid,
+ Oid parentInsTrigger, Oid parentUpdTrigger,
+ Oid *insertTrigOid, Oid *updateTrigOid);
+static void createForeignKeyActionTriggers(Relation rel, Oid refRelOid,
+ Constraint *fkconstraint, Oid constraintOid,
+ Oid indexOid,
+ Oid parentDelTrigger, Oid parentUpdTrigger,
+ Oid *deleteTrigOid, Oid *updateTrigOid);
+static bool tryAttachPartitionForeignKey(ForeignKeyCacheInfo *fk,
+ Oid partRelid,
+ Oid parentConstrOid, int numfks,
+ AttrNumber *mapped_conkey, AttrNumber *confkey,
+ Oid *conpfeqop,
+ Oid parentInsTrigger,
+ Oid parentUpdTrigger,
+ Relation trigrel);
+static void GetForeignKeyActionTriggers(Relation trigrel,
+ Oid conoid, Oid confrelid, Oid conrelid,
+ Oid *deleteTriggerOid,
+ Oid *updateTriggerOid);
+static void GetForeignKeyCheckTriggers(Relation trigrel,
+ Oid conoid, Oid confrelid, Oid conrelid,
+ Oid *insertTriggerOid,
+ Oid *updateTriggerOid);
+static void ATExecDropConstraint(Relation rel, const char *constrName,
+ DropBehavior behavior,
+ bool recurse, bool recursing,
+ bool missing_ok, LOCKMODE lockmode);
+static void ATPrepAlterColumnType(List **wqueue,
+ AlteredTableInfo *tab, Relation rel,
+ bool recurse, bool recursing,
+ AlterTableCmd *cmd, LOCKMODE lockmode,
+ AlterTableUtilityContext *context);
+static bool ATColumnChangeRequiresRewrite(Node *expr, AttrNumber varattno);
+static ObjectAddress ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel,
+ AlterTableCmd *cmd, LOCKMODE lockmode);
+static void RememberConstraintForRebuilding(Oid conoid, AlteredTableInfo *tab);
+static void RememberIndexForRebuilding(Oid indoid, AlteredTableInfo *tab);
+static void RememberStatisticsForRebuilding(Oid indoid, AlteredTableInfo *tab);
+static void ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab,
+ LOCKMODE lockmode);
+static void ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId,
+ char *cmd, List **wqueue, LOCKMODE lockmode,
+ bool rewrite);
+static void RebuildConstraintComment(AlteredTableInfo *tab, int pass,
+ Oid objid, Relation rel, List *domname,
+ const char *conname);
+static void TryReuseIndex(Oid oldId, IndexStmt *stmt);
+static void TryReuseForeignKey(Oid oldId, Constraint *con);
+static ObjectAddress ATExecAlterColumnGenericOptions(Relation rel, const char *colName,
+ List *options, LOCKMODE lockmode);
+static void change_owner_fix_column_acls(Oid relationOid,
+ Oid oldOwnerId, Oid newOwnerId);
+static void change_owner_recurse_to_sequences(Oid relationOid,
+ Oid newOwnerId, LOCKMODE lockmode);
+static ObjectAddress ATExecClusterOn(Relation rel, const char *indexName,
+ LOCKMODE lockmode);
+static void ATExecDropCluster(Relation rel, LOCKMODE lockmode);
+static void ATPrepSetAccessMethod(AlteredTableInfo *tab, Relation rel, const char *amname);
+static bool ATPrepChangePersistence(Relation rel, bool toLogged);
+static void ATPrepSetTableSpace(AlteredTableInfo *tab, Relation rel,
+ const char *tablespacename, LOCKMODE lockmode);
+static void ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode);
+static void ATExecSetTableSpaceNoStorage(Relation rel, Oid newTableSpace);
+static void ATExecSetRelOptions(Relation rel, List *defList,
+ AlterTableType operation,
+ LOCKMODE lockmode);
+static void ATExecEnableDisableTrigger(Relation rel, const char *trigname,
+ char fires_when, bool skip_system, bool recurse,
+ LOCKMODE lockmode);
+static void ATExecEnableDisableRule(Relation rel, const char *rulename,
+ char fires_when, LOCKMODE lockmode);
+static void ATPrepAddInherit(Relation child_rel);
+static ObjectAddress ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode);
+static ObjectAddress ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode);
+static void drop_parent_dependency(Oid relid, Oid refclassid, Oid refobjid,
+ DependencyType deptype);
+static ObjectAddress ATExecAddOf(Relation rel, const TypeName *ofTypename, LOCKMODE lockmode);
+static void ATExecDropOf(Relation rel, LOCKMODE lockmode);
+static void ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKMODE lockmode);
+static void ATExecGenericOptions(Relation rel, List *options);
+static void ATExecSetRowSecurity(Relation rel, bool rls);
+static void ATExecForceNoForceRowSecurity(Relation rel, bool force_rls);
+static ObjectAddress ATExecSetCompression(AlteredTableInfo *tab, Relation rel,
+ const char *column, Node *newValue, LOCKMODE lockmode);
+
+static void index_copy_data(Relation rel, RelFileNode newrnode);
+static const char *storage_name(char c);
+
+static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid,
+ Oid oldRelOid, void *arg);
+static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid,
+ Oid oldrelid, void *arg);
+static PartitionSpec *transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy);
+static void ComputePartitionAttrs(ParseState *pstate, Relation rel, List *partParams, AttrNumber *partattrs,
+ List **partexprs, Oid *partopclass, Oid *partcollation, char strategy);
+static void CreateInheritance(Relation child_rel, Relation parent_rel);
+static void RemoveInheritance(Relation child_rel, Relation parent_rel,
+ bool allow_detached);
+static ObjectAddress ATExecAttachPartition(List **wqueue, Relation rel,
+ PartitionCmd *cmd,
+ AlterTableUtilityContext *context);
+static void AttachPartitionEnsureIndexes(Relation rel, Relation attachrel);
+static void QueuePartitionConstraintValidation(List **wqueue, Relation scanrel,
+ List *partConstraint,
+ bool validate_default);
+static void CloneRowTriggersToPartition(Relation parent, Relation partition);
+static void DetachAddConstraintIfNeeded(List **wqueue, Relation partRel);
+static void DropClonedTriggersFromPartition(Oid partitionId);
+static ObjectAddress ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab,
+ Relation rel, RangeVar *name,
+ bool concurrent);
+static void DetachPartitionFinalize(Relation rel, Relation partRel,
+ bool concurrent, Oid defaultPartOid);
+static ObjectAddress ATExecDetachPartitionFinalize(Relation rel, RangeVar *name);
+static ObjectAddress ATExecAttachPartitionIdx(List **wqueue, Relation rel,
+ RangeVar *name);
+static void validatePartitionedIndex(Relation partedIdx, Relation partedTbl);
+static void refuseDupeIndexAttach(Relation parentIdx, Relation partIdx,
+ Relation partitionTbl);
+static List *GetParentedForeignKeyRefs(Relation partition);
+static void ATDetachCheckNoForeignKeyRefs(Relation partition);
+static char GetAttributeCompression(Oid atttypid, char *compression);
+
+
+/* ----------------------------------------------------------------
+ * DefineRelation
+ * Creates a new relation.
+ *
+ * stmt carries parsetree information from an ordinary CREATE TABLE statement.
+ * The other arguments are used to extend the behavior for other cases:
+ * relkind: relkind to assign to the new relation
+ * ownerId: if not InvalidOid, use this as the new relation's owner.
+ * typaddress: if not null, it's set to the pg_type entry's address.
+ * queryString: for error reporting
+ *
+ * Note that permissions checks are done against current user regardless of
+ * ownerId. A nonzero ownerId is used when someone is creating a relation
+ * "on behalf of" someone else, so we still want to see that the current user
+ * has permissions to do it.
+ *
+ * If successful, returns the address of the new relation.
+ * ----------------------------------------------------------------
+ */
+ObjectAddress
+DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
+ ObjectAddress *typaddress, const char *queryString)
+{
+ char relname[NAMEDATALEN];
+ Oid namespaceId;
+ Oid relationId;
+ Oid tablespaceId;
+ Relation rel;
+ TupleDesc descriptor;
+ List *inheritOids;
+ List *old_constraints;
+ List *rawDefaults;
+ List *cookedDefaults;
+ Datum reloptions;
+ ListCell *listptr;
+ AttrNumber attnum;
+ bool partitioned;
+ static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
+ Oid ofTypeId;
+ ObjectAddress address;
+ LOCKMODE parentLockmode;
+ const char *accessMethod = NULL;
+ Oid accessMethodId = InvalidOid;
+
+ /*
+ * Truncate relname to appropriate length (probably a waste of time, as
+ * parser should have done this already).
+ */
+ strlcpy(relname, stmt->relation->relname, NAMEDATALEN);
+
+ /*
+ * Check consistency of arguments
+ */
+ if (stmt->oncommit != ONCOMMIT_NOOP
+ && stmt->relation->relpersistence != RELPERSISTENCE_TEMP)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("ON COMMIT can only be used on temporary tables")));
+
+ if (stmt->partspec != NULL)
+ {
+ if (relkind != RELKIND_RELATION)
+ elog(ERROR, "unexpected relkind: %d", (int) relkind);
+
+ relkind = RELKIND_PARTITIONED_TABLE;
+ partitioned = true;
+ }
+ else
+ partitioned = false;
+
+ /*
+ * Look up the namespace in which we are supposed to create the relation,
+ * check we have permission to create there, lock it against concurrent
+ * drop, and mark stmt->relation as RELPERSISTENCE_TEMP if a temporary
+ * namespace is selected.
+ */
+ namespaceId =
+ RangeVarGetAndCheckCreationNamespace(stmt->relation, NoLock, NULL);
+
+ /*
+ * Security check: disallow creating temp tables from security-restricted
+ * code. This is needed because calling code might not expect untrusted
+ * tables to appear in pg_temp at the front of its search path.
+ */
+ if (stmt->relation->relpersistence == RELPERSISTENCE_TEMP
+ && InSecurityRestrictedOperation())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("cannot create temporary table within security-restricted operation")));
+
+ /*
+ * Determine the lockmode to use when scanning parents. A self-exclusive
+ * lock is needed here.
+ *
+ * For regular inheritance, if two backends attempt to add children to the
+ * same parent simultaneously, and that parent has no pre-existing
+ * children, then both will attempt to update the parent's relhassubclass
+ * field, leading to a "tuple concurrently updated" error. Also, this
+ * interlocks against a concurrent ANALYZE on the parent table, which
+ * might otherwise be attempting to clear the parent's relhassubclass
+ * field, if its previous children were recently dropped.
+ *
+ * If the child table is a partition, then we instead grab an exclusive
+ * lock on the parent because its partition descriptor will be changed by
+ * addition of the new partition.
+ */
+ parentLockmode = (stmt->partbound != NULL ? AccessExclusiveLock :
+ ShareUpdateExclusiveLock);
+
+ /* Determine the list of OIDs of the parents. */
+ inheritOids = NIL;
+ foreach(listptr, stmt->inhRelations)
+ {
+ RangeVar *rv = (RangeVar *) lfirst(listptr);
+ Oid parentOid;
+
+ parentOid = RangeVarGetRelid(rv, parentLockmode, false);
+
+ /*
+ * Reject duplications in the list of parents.
+ */
+ if (list_member_oid(inheritOids, parentOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_TABLE),
+ errmsg("relation \"%s\" would be inherited from more than once",
+ get_rel_name(parentOid))));
+
+ inheritOids = lappend_oid(inheritOids, parentOid);
+ }
+
+ /*
+ * Select tablespace to use: an explicitly indicated one, or (in the case
+ * of a partitioned table) the parent's, if it has one.
+ */
+ if (stmt->tablespacename)
+ {
+ tablespaceId = get_tablespace_oid(stmt->tablespacename, false);
+
+ if (partitioned && tablespaceId == MyDatabaseTableSpace)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot specify default tablespace for partitioned relations")));
+ }
+ else if (stmt->partbound)
+ {
+ /*
+ * For partitions, when no other tablespace is specified, we default
+ * the tablespace to the parent partitioned table's.
+ */
+ Assert(list_length(inheritOids) == 1);
+ tablespaceId = get_rel_tablespace(linitial_oid(inheritOids));
+ }
+ else
+ tablespaceId = InvalidOid;
+
+ /* still nothing? use the default */
+ if (!OidIsValid(tablespaceId))
+ tablespaceId = GetDefaultTablespace(stmt->relation->relpersistence,
+ partitioned);
+
+ /* Check permissions except when using database's default */
+ if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
+ {
+ AclResult aclresult;
+
+ aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_TABLESPACE,
+ get_tablespace_name(tablespaceId));
+ }
+
+ /* In all cases disallow placing user relations in pg_global */
+ if (tablespaceId == GLOBALTABLESPACE_OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("only shared relations can be placed in pg_global tablespace")));
+
+ /* Identify user ID that will own the table */
+ if (!OidIsValid(ownerId))
+ ownerId = GetUserId();
+
+ /*
+ * Parse and validate reloptions, if any.
+ */
+ reloptions = transformRelOptions((Datum) 0, stmt->options, NULL, validnsps,
+ true, false);
+
+ switch (relkind)
+ {
+ case RELKIND_VIEW:
+ (void) view_reloptions(reloptions, true);
+ break;
+ case RELKIND_PARTITIONED_TABLE:
+ (void) partitioned_table_reloptions(reloptions, true);
+ break;
+ default:
+ (void) heap_reloptions(relkind, reloptions, true);
+ }
+
+ if (stmt->ofTypename)
+ {
+ AclResult aclresult;
+
+ ofTypeId = typenameTypeId(NULL, stmt->ofTypename);
+
+ aclresult = pg_type_aclcheck(ofTypeId, GetUserId(), ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error_type(aclresult, ofTypeId);
+ }
+ else
+ ofTypeId = InvalidOid;
+
+ /*
+ * Look up inheritance ancestors and generate relation schema, including
+ * inherited attributes. (Note that stmt->tableElts is destructively
+ * modified by MergeAttributes.)
+ */
+ stmt->tableElts =
+ MergeAttributes(stmt->tableElts, inheritOids,
+ stmt->relation->relpersistence,
+ stmt->partbound != NULL,
+ &old_constraints);
+
+ /*
+ * Create a tuple descriptor from the relation schema. Note that this
+ * deals with column names, types, and NOT NULL constraints, but not
+ * default values or CHECK constraints; we handle those below.
+ */
+ descriptor = BuildDescForRelation(stmt->tableElts);
+
+ /*
+ * Find columns with default values and prepare for insertion of the
+ * defaults. Pre-cooked (that is, inherited) defaults go into a list of
+ * CookedConstraint structs that we'll pass to heap_create_with_catalog,
+ * while raw defaults go into a list of RawColumnDefault structs that will
+ * be processed by AddRelationNewConstraints. (We can't deal with raw
+ * expressions until we can do transformExpr.)
+ *
+ * We can set the atthasdef flags now in the tuple descriptor; this just
+ * saves StoreAttrDefault from having to do an immediate update of the
+ * pg_attribute rows.
+ */
+ rawDefaults = NIL;
+ cookedDefaults = NIL;
+ attnum = 0;
+
+ foreach(listptr, stmt->tableElts)
+ {
+ ColumnDef *colDef = lfirst(listptr);
+ Form_pg_attribute attr;
+
+ attnum++;
+ attr = TupleDescAttr(descriptor, attnum - 1);
+
+ if (colDef->raw_default != NULL)
+ {
+ RawColumnDefault *rawEnt;
+
+ Assert(colDef->cooked_default == NULL);
+
+ rawEnt = (RawColumnDefault *) palloc(sizeof(RawColumnDefault));
+ rawEnt->attnum = attnum;
+ rawEnt->raw_default = colDef->raw_default;
+ rawEnt->missingMode = false;
+ rawEnt->generated = colDef->generated;
+ rawDefaults = lappend(rawDefaults, rawEnt);
+ attr->atthasdef = true;
+ }
+ else if (colDef->cooked_default != NULL)
+ {
+ CookedConstraint *cooked;
+
+ cooked = (CookedConstraint *) palloc(sizeof(CookedConstraint));
+ cooked->contype = CONSTR_DEFAULT;
+ cooked->conoid = InvalidOid; /* until created */
+ cooked->name = NULL;
+ cooked->attnum = attnum;
+ cooked->expr = colDef->cooked_default;
+ cooked->skip_validation = false;
+ cooked->is_local = true; /* not used for defaults */
+ cooked->inhcount = 0; /* ditto */
+ cooked->is_no_inherit = false;
+ cookedDefaults = lappend(cookedDefaults, cooked);
+ attr->atthasdef = true;
+ }
+
+ if (colDef->identity)
+ attr->attidentity = colDef->identity;
+
+ if (colDef->generated)
+ attr->attgenerated = colDef->generated;
+
+ if (colDef->compression)
+ attr->attcompression = GetAttributeCompression(attr->atttypid,
+ colDef->compression);
+ }
+
+ /*
+ * If the statement hasn't specified an access method, but we're defining
+ * a type of relation that needs one, use the default.
+ */
+ if (stmt->accessMethod != NULL)
+ {
+ accessMethod = stmt->accessMethod;
+
+ if (partitioned)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("specifying a table access method is not supported on a partitioned table")));
+ }
+ else if (RELKIND_HAS_TABLE_AM(relkind))
+ accessMethod = default_table_access_method;
+
+ /* look up the access method, verify it is for a table */
+ if (accessMethod != NULL)
+ accessMethodId = get_table_am_oid(accessMethod, false);
+
+ /*
+ * Create the relation. Inherited defaults and constraints are passed in
+ * for immediate handling --- since they don't need parsing, they can be
+ * stored immediately.
+ */
+ relationId = heap_create_with_catalog(relname,
+ namespaceId,
+ tablespaceId,
+ InvalidOid,
+ InvalidOid,
+ ofTypeId,
+ ownerId,
+ accessMethodId,
+ descriptor,
+ list_concat(cookedDefaults,
+ old_constraints),
+ relkind,
+ stmt->relation->relpersistence,
+ false,
+ false,
+ stmt->oncommit,
+ reloptions,
+ true,
+ allowSystemTableMods,
+ false,
+ InvalidOid,
+ typaddress);
+
+ /*
+ * We must bump the command counter to make the newly-created relation
+ * tuple visible for opening.
+ */
+ CommandCounterIncrement();
+
+ /*
+ * Open the new relation and acquire exclusive lock on it. This isn't
+ * really necessary for locking out other backends (since they can't see
+ * the new rel anyway until we commit), but it keeps the lock manager from
+ * complaining about deadlock risks.
+ */
+ rel = relation_open(relationId, AccessExclusiveLock);
+
+ /*
+ * Now add any newly specified column default and generation expressions
+ * to the new relation. These are passed to us in the form of raw
+ * parsetrees; we need to transform them to executable expression trees
+ * before they can be added. The most convenient way to do that is to
+ * apply the parser's transformExpr routine, but transformExpr doesn't
+ * work unless we have a pre-existing relation. So, the transformation has
+ * to be postponed to this final step of CREATE TABLE.
+ *
+ * This needs to be before processing the partitioning clauses because
+ * those could refer to generated columns.
+ */
+ if (rawDefaults)
+ AddRelationNewConstraints(rel, rawDefaults, NIL,
+ true, true, false, queryString);
+
+ /*
+ * Make column generation expressions visible for use by partitioning.
+ */
+ CommandCounterIncrement();
+
+ /* Process and store partition bound, if any. */
+ if (stmt->partbound)
+ {
+ PartitionBoundSpec *bound;
+ ParseState *pstate;
+ Oid parentId = linitial_oid(inheritOids),
+ defaultPartOid;
+ Relation parent,
+ defaultRel = NULL;
+ ParseNamespaceItem *nsitem;
+
+ /* Already have strong enough lock on the parent */
+ parent = table_open(parentId, NoLock);
+
+ /*
+ * We are going to try to validate the partition bound specification
+ * against the partition key of parentRel, so it better have one.
+ */
+ if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("\"%s\" is not partitioned",
+ RelationGetRelationName(parent))));
+
+ /*
+ * The partition constraint of the default partition depends on the
+ * partition bounds of every other partition. It is possible that
+ * another backend might be about to execute a query on the default
+ * partition table, and that the query relies on previously cached
+ * default partition constraints. We must therefore take a table lock
+ * strong enough to prevent all queries on the default partition from
+ * proceeding until we commit and send out a shared-cache-inval notice
+ * that will make them update their index lists.
+ *
+ * Order of locking: The relation being added won't be visible to
+ * other backends until it is committed, hence here in
+ * DefineRelation() the order of locking the default partition and the
+ * relation being added does not matter. But at all other places we
+ * need to lock the default relation before we lock the relation being
+ * added or removed i.e. we should take the lock in same order at all
+ * the places such that lock parent, lock default partition and then
+ * lock the partition so as to avoid a deadlock.
+ */
+ defaultPartOid =
+ get_default_oid_from_partdesc(RelationGetPartitionDesc(parent,
+ true));
+ if (OidIsValid(defaultPartOid))
+ defaultRel = table_open(defaultPartOid, AccessExclusiveLock);
+
+ /* Transform the bound values */
+ pstate = make_parsestate(NULL);
+ pstate->p_sourcetext = queryString;
+
+ /*
+ * Add an nsitem containing this relation, so that transformExpr
+ * called on partition bound expressions is able to report errors
+ * using a proper context.
+ */
+ nsitem = addRangeTableEntryForRelation(pstate, rel, AccessShareLock,
+ NULL, false, false);
+ addNSItemToQuery(pstate, nsitem, false, true, true);
+
+ bound = transformPartitionBound(pstate, parent, stmt->partbound);
+
+ /*
+ * Check first that the new partition's bound is valid and does not
+ * overlap with any of existing partitions of the parent.
+ */
+ check_new_partition_bound(relname, parent, bound, pstate);
+
+ /*
+ * If the default partition exists, its partition constraints will
+ * change after the addition of this new partition such that it won't
+ * allow any row that qualifies for this new partition. So, check that
+ * the existing data in the default partition satisfies the constraint
+ * as it will exist after adding this partition.
+ */
+ if (OidIsValid(defaultPartOid))
+ {
+ check_default_partition_contents(parent, defaultRel, bound);
+ /* Keep the lock until commit. */
+ table_close(defaultRel, NoLock);
+ }
+
+ /* Update the pg_class entry. */
+ StorePartitionBound(rel, parent, bound);
+
+ table_close(parent, NoLock);
+ }
+
+ /* Store inheritance information for new rel. */
+ StoreCatalogInheritance(relationId, inheritOids, stmt->partbound != NULL);
+
+ /*
+ * Process the partitioning specification (if any) and store the partition
+ * key information into the catalog.
+ */
+ if (partitioned)
+ {
+ ParseState *pstate;
+ char strategy;
+ int partnatts;
+ AttrNumber partattrs[PARTITION_MAX_KEYS];
+ Oid partopclass[PARTITION_MAX_KEYS];
+ Oid partcollation[PARTITION_MAX_KEYS];
+ List *partexprs = NIL;
+
+ pstate = make_parsestate(NULL);
+ pstate->p_sourcetext = queryString;
+
+ partnatts = list_length(stmt->partspec->partParams);
+
+ /* Protect fixed-size arrays here and in executor */
+ if (partnatts > PARTITION_MAX_KEYS)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_COLUMNS),
+ errmsg("cannot partition using more than %d columns",
+ PARTITION_MAX_KEYS)));
+
+ /*
+ * We need to transform the raw parsetrees corresponding to partition
+ * expressions into executable expression trees. Like column defaults
+ * and CHECK constraints, we could not have done the transformation
+ * earlier.
+ */
+ stmt->partspec = transformPartitionSpec(rel, stmt->partspec,
+ &strategy);
+
+ ComputePartitionAttrs(pstate, rel, stmt->partspec->partParams,
+ partattrs, &partexprs, partopclass,
+ partcollation, strategy);
+
+ StorePartitionKey(rel, strategy, partnatts, partattrs, partexprs,
+ partopclass, partcollation);
+
+ /* make it all visible */
+ CommandCounterIncrement();
+ }
+
+ /*
+ * If we're creating a partition, create now all the indexes, triggers,
+ * FKs defined in the parent.
+ *
+ * We can't do it earlier, because DefineIndex wants to know the partition
+ * key which we just stored.
+ */
+ if (stmt->partbound)
+ {
+ Oid parentId = linitial_oid(inheritOids);
+ Relation parent;
+ List *idxlist;
+ ListCell *cell;
+
+ /* Already have strong enough lock on the parent */
+ parent = table_open(parentId, NoLock);
+ idxlist = RelationGetIndexList(parent);
+
+ /*
+ * For each index in the parent table, create one in the partition
+ */
+ foreach(cell, idxlist)
+ {
+ Relation idxRel = index_open(lfirst_oid(cell), AccessShareLock);
+ AttrMap *attmap;
+ IndexStmt *idxstmt;
+ Oid constraintOid;
+
+ if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ {
+ if (idxRel->rd_index->indisunique)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot create foreign partition of partitioned table \"%s\"",
+ RelationGetRelationName(parent)),
+ errdetail("Table \"%s\" contains indexes that are unique.",
+ RelationGetRelationName(parent))));
+ else
+ {
+ index_close(idxRel, AccessShareLock);
+ continue;
+ }
+ }
+
+ attmap = build_attrmap_by_name(RelationGetDescr(rel),
+ RelationGetDescr(parent));
+ idxstmt =
+ generateClonedIndexStmt(NULL, idxRel,
+ attmap, &constraintOid);
+ DefineIndex(RelationGetRelid(rel),
+ idxstmt,
+ InvalidOid,
+ RelationGetRelid(idxRel),
+ constraintOid,
+ false, false, false, false, false);
+
+ index_close(idxRel, AccessShareLock);
+ }
+
+ list_free(idxlist);
+
+ /*
+ * If there are any row-level triggers, clone them to the new
+ * partition.
+ */
+ if (parent->trigdesc != NULL)
+ CloneRowTriggersToPartition(parent, rel);
+
+ /*
+ * And foreign keys too. Note that because we're freshly creating the
+ * table, there is no need to verify these new constraints.
+ */
+ CloneForeignKeyConstraints(NULL, parent, rel);
+
+ table_close(parent, NoLock);
+ }
+
+ /*
+ * Now add any newly specified CHECK constraints to the new relation. Same
+ * as for defaults above, but these need to come after partitioning is set
+ * up.
+ */
+ if (stmt->constraints)
+ AddRelationNewConstraints(rel, NIL, stmt->constraints,
+ true, true, false, queryString);
+
+ ObjectAddressSet(address, RelationRelationId, relationId);
+
+ /*
+ * Clean up. We keep lock on new relation (although it shouldn't be
+ * visible to anyone else anyway, until commit).
+ */
+ relation_close(rel, NoLock);
+
+ return address;
+}
+
+/*
+ * Emit the right error or warning message for a "DROP" command issued on a
+ * non-existent relation
+ */
+static void
+DropErrorMsgNonExistent(RangeVar *rel, char rightkind, bool missing_ok)
+{
+ const struct dropmsgstrings *rentry;
+
+ if (rel->schemaname != NULL &&
+ !OidIsValid(LookupNamespaceNoError(rel->schemaname)))
+ {
+ if (!missing_ok)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_SCHEMA),
+ errmsg("schema \"%s\" does not exist", rel->schemaname)));
+ }
+ else
+ {
+ ereport(NOTICE,
+ (errmsg("schema \"%s\" does not exist, skipping",
+ rel->schemaname)));
+ }
+ return;
+ }
+
+ for (rentry = dropmsgstringarray; rentry->kind != '\0'; rentry++)
+ {
+ if (rentry->kind == rightkind)
+ {
+ if (!missing_ok)
+ {
+ ereport(ERROR,
+ (errcode(rentry->nonexistent_code),
+ errmsg(rentry->nonexistent_msg, rel->relname)));
+ }
+ else
+ {
+ ereport(NOTICE, (errmsg(rentry->skipping_msg, rel->relname)));
+ break;
+ }
+ }
+ }
+
+ Assert(rentry->kind != '\0'); /* Should be impossible */
+}
+
+/*
+ * Emit the right error message for a "DROP" command issued on a
+ * relation of the wrong type
+ */
+static void
+DropErrorMsgWrongType(const char *relname, char wrongkind, char rightkind)
+{
+ const struct dropmsgstrings *rentry;
+ const struct dropmsgstrings *wentry;
+
+ for (rentry = dropmsgstringarray; rentry->kind != '\0'; rentry++)
+ if (rentry->kind == rightkind)
+ break;
+ Assert(rentry->kind != '\0');
+
+ for (wentry = dropmsgstringarray; wentry->kind != '\0'; wentry++)
+ if (wentry->kind == wrongkind)
+ break;
+ /* wrongkind could be something we don't have in our table... */
+
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg(rentry->nota_msg, relname),
+ (wentry->kind != '\0') ? errhint("%s", _(wentry->drophint_msg)) : 0));
+}
+
+/*
+ * RemoveRelations
+ * Implements DROP TABLE, DROP INDEX, DROP SEQUENCE, DROP VIEW,
+ * DROP MATERIALIZED VIEW, DROP FOREIGN TABLE
+ */
+void
+RemoveRelations(DropStmt *drop)
+{
+ ObjectAddresses *objects;
+ char relkind;
+ ListCell *cell;
+ int flags = 0;
+ LOCKMODE lockmode = AccessExclusiveLock;
+
+ /* DROP CONCURRENTLY uses a weaker lock, and has some restrictions */
+ if (drop->concurrent)
+ {
+ /*
+ * Note that for temporary relations this lock may get upgraded later
+ * on, but as no other session can access a temporary relation, this
+ * is actually fine.
+ */
+ lockmode = ShareUpdateExclusiveLock;
+ Assert(drop->removeType == OBJECT_INDEX);
+ if (list_length(drop->objects) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("DROP INDEX CONCURRENTLY does not support dropping multiple objects")));
+ if (drop->behavior == DROP_CASCADE)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("DROP INDEX CONCURRENTLY does not support CASCADE")));
+ }
+
+ /*
+ * First we identify all the relations, then we delete them in a single
+ * performMultipleDeletions() call. This is to avoid unwanted DROP
+ * RESTRICT errors if one of the relations depends on another.
+ */
+
+ /* Determine required relkind */
+ switch (drop->removeType)
+ {
+ case OBJECT_TABLE:
+ relkind = RELKIND_RELATION;
+ break;
+
+ case OBJECT_INDEX:
+ relkind = RELKIND_INDEX;
+ break;
+
+ case OBJECT_SEQUENCE:
+ relkind = RELKIND_SEQUENCE;
+ break;
+
+ case OBJECT_VIEW:
+ relkind = RELKIND_VIEW;
+ break;
+
+ case OBJECT_MATVIEW:
+ relkind = RELKIND_MATVIEW;
+ break;
+
+ case OBJECT_FOREIGN_TABLE:
+ relkind = RELKIND_FOREIGN_TABLE;
+ break;
+
+ default:
+ elog(ERROR, "unrecognized drop object type: %d",
+ (int) drop->removeType);
+ relkind = 0; /* keep compiler quiet */
+ break;
+ }
+
+ /* Lock and validate each relation; build a list of object addresses */
+ objects = new_object_addresses();
+
+ foreach(cell, drop->objects)
+ {
+ RangeVar *rel = makeRangeVarFromNameList((List *) lfirst(cell));
+ Oid relOid;
+ ObjectAddress obj;
+ struct DropRelationCallbackState state;
+
+ /*
+ * These next few steps are a great deal like relation_openrv, but we
+ * don't bother building a relcache entry since we don't need it.
+ *
+ * Check for shared-cache-inval messages before trying to access the
+ * relation. This is needed to cover the case where the name
+ * identifies a rel that has been dropped and recreated since the
+ * start of our transaction: if we don't flush the old syscache entry,
+ * then we'll latch onto that entry and suffer an error later.
+ */
+ AcceptInvalidationMessages();
+
+ /* Look up the appropriate relation using namespace search. */
+ state.expected_relkind = relkind;
+ state.heap_lockmode = drop->concurrent ?
+ ShareUpdateExclusiveLock : AccessExclusiveLock;
+ /* We must initialize these fields to show that no locks are held: */
+ state.heapOid = InvalidOid;
+ state.partParentOid = InvalidOid;
+
+ relOid = RangeVarGetRelidExtended(rel, lockmode, RVR_MISSING_OK,
+ RangeVarCallbackForDropRelation,
+ (void *) &state);
+
+ /* Not there? */
+ if (!OidIsValid(relOid))
+ {
+ DropErrorMsgNonExistent(rel, relkind, drop->missing_ok);
+ continue;
+ }
+
+ /*
+ * Decide if concurrent mode needs to be used here or not. The
+ * callback retrieved the rel's persistence for us.
+ */
+ if (drop->concurrent &&
+ state.actual_relpersistence != RELPERSISTENCE_TEMP)
+ {
+ Assert(list_length(drop->objects) == 1 &&
+ drop->removeType == OBJECT_INDEX);
+ flags |= PERFORM_DELETION_CONCURRENTLY;
+ }
+
+ /*
+ * Concurrent index drop cannot be used with partitioned indexes,
+ * either.
+ */
+ if ((flags & PERFORM_DELETION_CONCURRENTLY) != 0 &&
+ state.actual_relkind == RELKIND_PARTITIONED_INDEX)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot drop partitioned index \"%s\" concurrently",
+ rel->relname)));
+
+ /*
+ * If we're told to drop a partitioned index, we must acquire lock on
+ * all the children of its parent partitioned table before proceeding.
+ * Otherwise we'd try to lock the child index partitions before their
+ * tables, leading to potential deadlock against other sessions that
+ * will lock those objects in the other order.
+ */
+ if (state.actual_relkind == RELKIND_PARTITIONED_INDEX)
+ (void) find_all_inheritors(state.heapOid,
+ state.heap_lockmode,
+ NULL);
+
+ /* OK, we're ready to delete this one */
+ obj.classId = RelationRelationId;
+ obj.objectId = relOid;
+ obj.objectSubId = 0;
+
+ add_exact_object_address(&obj, objects);
+ }
+
+ performMultipleDeletions(objects, drop->behavior, flags);
+
+ free_object_addresses(objects);
+}
+
+/*
+ * Before acquiring a table lock, check whether we have sufficient rights.
+ * In the case of DROP INDEX, also try to lock the table before the index.
+ * Also, if the table to be dropped is a partition, we try to lock the parent
+ * first.
+ */
+static void
+RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
+ void *arg)
+{
+ HeapTuple tuple;
+ struct DropRelationCallbackState *state;
+ char expected_relkind;
+ bool is_partition;
+ Form_pg_class classform;
+ LOCKMODE heap_lockmode;
+ bool invalid_system_index = false;
+
+ state = (struct DropRelationCallbackState *) arg;
+ heap_lockmode = state->heap_lockmode;
+
+ /*
+ * If we previously locked some other index's heap, and the name we're
+ * looking up no longer refers to that relation, release the now-useless
+ * lock.
+ */
+ if (relOid != oldRelOid && OidIsValid(state->heapOid))
+ {
+ UnlockRelationOid(state->heapOid, heap_lockmode);
+ state->heapOid = InvalidOid;
+ }
+
+ /*
+ * Similarly, if we previously locked some other partition's heap, and the
+ * name we're looking up no longer refers to that relation, release the
+ * now-useless lock.
+ */
+ if (relOid != oldRelOid && OidIsValid(state->partParentOid))
+ {
+ UnlockRelationOid(state->partParentOid, AccessExclusiveLock);
+ state->partParentOid = InvalidOid;
+ }
+
+ /* Didn't find a relation, so no need for locking or permission checks. */
+ if (!OidIsValid(relOid))
+ return;
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relOid));
+ if (!HeapTupleIsValid(tuple))
+ return; /* concurrently dropped, so nothing to do */
+ classform = (Form_pg_class) GETSTRUCT(tuple);
+ is_partition = classform->relispartition;
+
+ /* Pass back some data to save lookups in RemoveRelations */
+ state->actual_relkind = classform->relkind;
+ state->actual_relpersistence = classform->relpersistence;
+
+ /*
+ * Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE,
+ * but RemoveRelations() can only pass one relkind for a given relation.
+ * It chooses RELKIND_RELATION for both regular and partitioned tables.
+ * That means we must be careful before giving the wrong type error when
+ * the relation is RELKIND_PARTITIONED_TABLE. An equivalent problem
+ * exists with indexes.
+ */
+ if (classform->relkind == RELKIND_PARTITIONED_TABLE)
+ expected_relkind = RELKIND_RELATION;
+ else if (classform->relkind == RELKIND_PARTITIONED_INDEX)
+ expected_relkind = RELKIND_INDEX;
+ else
+ expected_relkind = classform->relkind;
+
+ if (state->expected_relkind != expected_relkind)
+ DropErrorMsgWrongType(rel->relname, classform->relkind,
+ state->expected_relkind);
+
+ /* Allow DROP to either table owner or schema owner */
+ if (!pg_class_ownercheck(relOid, GetUserId()) &&
+ !pg_namespace_ownercheck(classform->relnamespace, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER,
+ get_relkind_objtype(classform->relkind),
+ rel->relname);
+
+ /*
+ * Check the case of a system index that might have been invalidated by a
+ * failed concurrent process and allow its drop. For the time being, this
+ * only concerns indexes of toast relations that became invalid during a
+ * REINDEX CONCURRENTLY process.
+ */
+ if (IsSystemClass(relOid, classform) && classform->relkind == RELKIND_INDEX)
+ {
+ HeapTuple locTuple;
+ Form_pg_index indexform;
+ bool indisvalid;
+
+ locTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(relOid));
+ if (!HeapTupleIsValid(locTuple))
+ {
+ ReleaseSysCache(tuple);
+ return;
+ }
+
+ indexform = (Form_pg_index) GETSTRUCT(locTuple);
+ indisvalid = indexform->indisvalid;
+ ReleaseSysCache(locTuple);
+
+ /* Mark object as being an invalid index of system catalogs */
+ if (!indisvalid)
+ invalid_system_index = true;
+ }
+
+ /* In the case of an invalid index, it is fine to bypass this check */
+ if (!invalid_system_index && !allowSystemTableMods && IsSystemClass(relOid, classform))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied: \"%s\" is a system catalog",
+ rel->relname)));
+
+ ReleaseSysCache(tuple);
+
+ /*
+ * In DROP INDEX, attempt to acquire lock on the parent table before
+ * locking the index. index_drop() will need this anyway, and since
+ * regular queries lock tables before their indexes, we risk deadlock if
+ * we do it the other way around. No error if we don't find a pg_index
+ * entry, though --- the relation may have been dropped. Note that this
+ * code will execute for either plain or partitioned indexes.
+ */
+ if (expected_relkind == RELKIND_INDEX &&
+ relOid != oldRelOid)
+ {
+ state->heapOid = IndexGetRelation(relOid, true);
+ if (OidIsValid(state->heapOid))
+ LockRelationOid(state->heapOid, heap_lockmode);
+ }
+
+ /*
+ * Similarly, if the relation is a partition, we must acquire lock on its
+ * parent before locking the partition. That's because queries lock the
+ * parent before its partitions, so we risk deadlock if we do it the other
+ * way around.
+ */
+ if (is_partition && relOid != oldRelOid)
+ {
+ state->partParentOid = get_partition_parent(relOid, true);
+ if (OidIsValid(state->partParentOid))
+ LockRelationOid(state->partParentOid, AccessExclusiveLock);
+ }
+}
+
+/*
+ * ExecuteTruncate
+ * Executes a TRUNCATE command.
+ *
+ * This is a multi-relation truncate. We first open and grab exclusive
+ * lock on all relations involved, checking permissions and otherwise
+ * verifying that the relation is OK for truncation. Note that if relations
+ * are foreign tables, at this stage, we have not yet checked that their
+ * foreign data in external data sources are OK for truncation. These are
+ * checked when foreign data are actually truncated later. In CASCADE mode,
+ * relations having FK references to the targeted relations are automatically
+ * added to the group; in RESTRICT mode, we check that all FK references are
+ * internal to the group that's being truncated. Finally all the relations
+ * are truncated and reindexed.
+ */
+void
+ExecuteTruncate(TruncateStmt *stmt)
+{
+ List *rels = NIL;
+ List *relids = NIL;
+ List *relids_logged = NIL;
+ ListCell *cell;
+
+ /*
+ * Open, exclusive-lock, and check all the explicitly-specified relations
+ */
+ foreach(cell, stmt->relations)
+ {
+ RangeVar *rv = lfirst(cell);
+ Relation rel;
+ bool recurse = rv->inh;
+ Oid myrelid;
+ LOCKMODE lockmode = AccessExclusiveLock;
+
+ myrelid = RangeVarGetRelidExtended(rv, lockmode,
+ 0, RangeVarCallbackForTruncate,
+ NULL);
+
+ /* don't throw error for "TRUNCATE foo, foo" */
+ if (list_member_oid(relids, myrelid))
+ continue;
+
+ /* open the relation, we already hold a lock on it */
+ rel = table_open(myrelid, NoLock);
+
+ /*
+ * RangeVarGetRelidExtended() has done most checks with its callback,
+ * but other checks with the now-opened Relation remain.
+ */
+ truncate_check_activity(rel);
+
+ rels = lappend(rels, rel);
+ relids = lappend_oid(relids, myrelid);
+
+ /* Log this relation only if needed for logical decoding */
+ if (RelationIsLogicallyLogged(rel))
+ relids_logged = lappend_oid(relids_logged, myrelid);
+
+ if (recurse)
+ {
+ ListCell *child;
+ List *children;
+
+ children = find_all_inheritors(myrelid, lockmode, NULL);
+
+ foreach(child, children)
+ {
+ Oid childrelid = lfirst_oid(child);
+
+ if (list_member_oid(relids, childrelid))
+ continue;
+
+ /* find_all_inheritors already got lock */
+ rel = table_open(childrelid, NoLock);
+
+ /*
+ * It is possible that the parent table has children that are
+ * temp tables of other backends. We cannot safely access
+ * such tables (because of buffering issues), and the best
+ * thing to do is to silently ignore them. Note that this
+ * check is the same as one of the checks done in
+ * truncate_check_activity() called below, still it is kept
+ * here for simplicity.
+ */
+ if (RELATION_IS_OTHER_TEMP(rel))
+ {
+ table_close(rel, lockmode);
+ continue;
+ }
+
+ /*
+ * Inherited TRUNCATE commands perform access permission
+ * checks on the parent table only. So we skip checking the
+ * children's permissions and don't call
+ * truncate_check_perms() here.
+ */
+ truncate_check_rel(RelationGetRelid(rel), rel->rd_rel);
+ truncate_check_activity(rel);
+
+ rels = lappend(rels, rel);
+ relids = lappend_oid(relids, childrelid);
+
+ /* Log this relation only if needed for logical decoding */
+ if (RelationIsLogicallyLogged(rel))
+ relids_logged = lappend_oid(relids_logged, childrelid);
+ }
+ }
+ else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot truncate only a partitioned table"),
+ errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly.")));
+ }
+
+ ExecuteTruncateGuts(rels, relids, relids_logged,
+ stmt->behavior, stmt->restart_seqs);
+
+ /* And close the rels */
+ foreach(cell, rels)
+ {
+ Relation rel = (Relation) lfirst(cell);
+
+ table_close(rel, NoLock);
+ }
+}
+
+/*
+ * ExecuteTruncateGuts
+ *
+ * Internal implementation of TRUNCATE. This is called by the actual TRUNCATE
+ * command (see above) as well as replication subscribers that execute a
+ * replicated TRUNCATE action.
+ *
+ * explicit_rels is the list of Relations to truncate that the command
+ * specified. relids is the list of Oids corresponding to explicit_rels.
+ * relids_logged is the list of Oids (a subset of relids) that require
+ * WAL-logging. This is all a bit redundant, but the existing callers have
+ * this information handy in this form.
+ */
+void
+ExecuteTruncateGuts(List *explicit_rels,
+ List *relids,
+ List *relids_logged,
+ DropBehavior behavior, bool restart_seqs)
+{
+ List *rels;
+ List *seq_relids = NIL;
+ HTAB *ft_htab = NULL;
+ EState *estate;
+ ResultRelInfo *resultRelInfos;
+ ResultRelInfo *resultRelInfo;
+ SubTransactionId mySubid;
+ ListCell *cell;
+ Oid *logrelids;
+
+ /*
+ * Check the explicitly-specified relations.
+ *
+ * In CASCADE mode, suck in all referencing relations as well. This
+ * requires multiple iterations to find indirectly-dependent relations. At
+ * each phase, we need to exclusive-lock new rels before looking for their
+ * dependencies, else we might miss something. Also, we check each rel as
+ * soon as we open it, to avoid a faux pas such as holding lock for a long
+ * time on a rel we have no permissions for.
+ */
+ rels = list_copy(explicit_rels);
+ if (behavior == DROP_CASCADE)
+ {
+ for (;;)
+ {
+ List *newrelids;
+
+ newrelids = heap_truncate_find_FKs(relids);
+ if (newrelids == NIL)
+ break; /* nothing else to add */
+
+ foreach(cell, newrelids)
+ {
+ Oid relid = lfirst_oid(cell);
+ Relation rel;
+
+ rel = table_open(relid, AccessExclusiveLock);
+ ereport(NOTICE,
+ (errmsg("truncate cascades to table \"%s\"",
+ RelationGetRelationName(rel))));
+ truncate_check_rel(relid, rel->rd_rel);
+ truncate_check_perms(relid, rel->rd_rel);
+ truncate_check_activity(rel);
+ rels = lappend(rels, rel);
+ relids = lappend_oid(relids, relid);
+
+ /* Log this relation only if needed for logical decoding */
+ if (RelationIsLogicallyLogged(rel))
+ relids_logged = lappend_oid(relids_logged, relid);
+ }
+ }
+ }
+
+ /*
+ * Check foreign key references. In CASCADE mode, this should be
+ * unnecessary since we just pulled in all the references; but as a
+ * cross-check, do it anyway if in an Assert-enabled build.
+ */
+#ifdef USE_ASSERT_CHECKING
+ heap_truncate_check_FKs(rels, false);
+#else
+ if (behavior == DROP_RESTRICT)
+ heap_truncate_check_FKs(rels, false);
+#endif
+
+ /*
+ * If we are asked to restart sequences, find all the sequences, lock them
+ * (we need AccessExclusiveLock for ResetSequence), and check permissions.
+ * We want to do this early since it's pointless to do all the truncation
+ * work only to fail on sequence permissions.
+ */
+ if (restart_seqs)
+ {
+ foreach(cell, rels)
+ {
+ Relation rel = (Relation) lfirst(cell);
+ List *seqlist = getOwnedSequences(RelationGetRelid(rel));
+ ListCell *seqcell;
+
+ foreach(seqcell, seqlist)
+ {
+ Oid seq_relid = lfirst_oid(seqcell);
+ Relation seq_rel;
+
+ seq_rel = relation_open(seq_relid, AccessExclusiveLock);
+
+ /* This check must match AlterSequence! */
+ if (!pg_class_ownercheck(seq_relid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SEQUENCE,
+ RelationGetRelationName(seq_rel));
+
+ seq_relids = lappend_oid(seq_relids, seq_relid);
+
+ relation_close(seq_rel, NoLock);
+ }
+ }
+ }
+
+ /* Prepare to catch AFTER triggers. */
+ AfterTriggerBeginQuery();
+
+ /*
+ * To fire triggers, we'll need an EState as well as a ResultRelInfo for
+ * each relation. We don't need to call ExecOpenIndices, though.
+ *
+ * We put the ResultRelInfos in the es_opened_result_relations list, even
+ * though we don't have a range table and don't populate the
+ * es_result_relations array. That's a bit bogus, but it's enough to make
+ * ExecGetTriggerResultRel() find them.
+ */
+ estate = CreateExecutorState();
+ resultRelInfos = (ResultRelInfo *)
+ palloc(list_length(rels) * sizeof(ResultRelInfo));
+ resultRelInfo = resultRelInfos;
+ foreach(cell, rels)
+ {
+ Relation rel = (Relation) lfirst(cell);
+
+ InitResultRelInfo(resultRelInfo,
+ rel,
+ 0, /* dummy rangetable index */
+ NULL,
+ 0);
+ estate->es_opened_result_relations =
+ lappend(estate->es_opened_result_relations, resultRelInfo);
+ resultRelInfo++;
+ }
+
+ /*
+ * Process all BEFORE STATEMENT TRUNCATE triggers before we begin
+ * truncating (this is because one of them might throw an error). Also, if
+ * we were to allow them to prevent statement execution, that would need
+ * to be handled here.
+ */
+ resultRelInfo = resultRelInfos;
+ foreach(cell, rels)
+ {
+ ExecBSTruncateTriggers(estate, resultRelInfo);
+ resultRelInfo++;
+ }
+
+ /*
+ * OK, truncate each table.
+ */
+ mySubid = GetCurrentSubTransactionId();
+
+ foreach(cell, rels)
+ {
+ Relation rel = (Relation) lfirst(cell);
+
+ /* Skip partitioned tables as there is nothing to do */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ continue;
+
+ /*
+ * Build the lists of foreign tables belonging to each foreign server
+ * and pass each list to the foreign data wrapper's callback function,
+ * so that each server can truncate its all foreign tables in bulk.
+ * Each list is saved as a single entry in a hash table that uses the
+ * server OID as lookup key.
+ */
+ if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ {
+ Oid serverid = GetForeignServerIdByRelId(RelationGetRelid(rel));
+ bool found;
+ ForeignTruncateInfo *ft_info;
+
+ /* First time through, initialize hashtable for foreign tables */
+ if (!ft_htab)
+ {
+ HASHCTL hctl;
+
+ memset(&hctl, 0, sizeof(HASHCTL));
+ hctl.keysize = sizeof(Oid);
+ hctl.entrysize = sizeof(ForeignTruncateInfo);
+ hctl.hcxt = CurrentMemoryContext;
+
+ ft_htab = hash_create("TRUNCATE for Foreign Tables",
+ 32, /* start small and extend */
+ &hctl,
+ HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+ }
+
+ /* Find or create cached entry for the foreign table */
+ ft_info = hash_search(ft_htab, &serverid, HASH_ENTER, &found);
+ if (!found)
+ {
+ ft_info->serverid = serverid;
+ ft_info->rels = NIL;
+ }
+
+ /*
+ * Save the foreign table in the entry of the server that the
+ * foreign table belongs to.
+ */
+ ft_info->rels = lappend(ft_info->rels, rel);
+ continue;
+ }
+
+ /*
+ * Normally, we need a transaction-safe truncation here. However, if
+ * the table was either created in the current (sub)transaction or has
+ * a new relfilenode in the current (sub)transaction, then we can just
+ * truncate it in-place, because a rollback would cause the whole
+ * table or the current physical file to be thrown away anyway.
+ */
+ if (rel->rd_createSubid == mySubid ||
+ rel->rd_newRelfilenodeSubid == mySubid)
+ {
+ /* Immediate, non-rollbackable truncation is OK */
+ heap_truncate_one_rel(rel);
+ }
+ else
+ {
+ Oid heap_relid;
+ Oid toast_relid;
+ ReindexParams reindex_params = {0};
+
+ /*
+ * This effectively deletes all rows in the table, and may be done
+ * in a serializable transaction. In that case we must record a
+ * rw-conflict in to this transaction from each transaction
+ * holding a predicate lock on the table.
+ */
+ CheckTableForSerializableConflictIn(rel);
+
+ /*
+ * Need the full transaction-safe pushups.
+ *
+ * Create a new empty storage file for the relation, and assign it
+ * as the relfilenode value. The old storage file is scheduled for
+ * deletion at commit.
+ */
+ RelationSetNewRelfilenode(rel, rel->rd_rel->relpersistence);
+
+ heap_relid = RelationGetRelid(rel);
+
+ /*
+ * The same for the toast table, if any.
+ */
+ toast_relid = rel->rd_rel->reltoastrelid;
+ if (OidIsValid(toast_relid))
+ {
+ Relation toastrel = relation_open(toast_relid,
+ AccessExclusiveLock);
+
+ RelationSetNewRelfilenode(toastrel,
+ toastrel->rd_rel->relpersistence);
+ table_close(toastrel, NoLock);
+ }
+
+ /*
+ * Reconstruct the indexes to match, and we're done.
+ */
+ reindex_relation(heap_relid, REINDEX_REL_PROCESS_TOAST,
+ &reindex_params);
+ }
+
+ pgstat_count_truncate(rel);
+ }
+
+ /* Now go through the hash table, and truncate foreign tables */
+ if (ft_htab)
+ {
+ ForeignTruncateInfo *ft_info;
+ HASH_SEQ_STATUS seq;
+
+ hash_seq_init(&seq, ft_htab);
+
+ PG_TRY();
+ {
+ while ((ft_info = hash_seq_search(&seq)) != NULL)
+ {
+ FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->serverid);
+
+ /* truncate_check_rel() has checked that already */
+ Assert(routine->ExecForeignTruncate != NULL);
+
+ routine->ExecForeignTruncate(ft_info->rels,
+ behavior,
+ restart_seqs);
+ }
+ }
+ PG_FINALLY();
+ {
+ hash_destroy(ft_htab);
+ }
+ PG_END_TRY();
+ }
+
+ /*
+ * Restart owned sequences if we were asked to.
+ */
+ foreach(cell, seq_relids)
+ {
+ Oid seq_relid = lfirst_oid(cell);
+
+ ResetSequence(seq_relid);
+ }
+
+ /*
+ * Write a WAL record to allow this set of actions to be logically
+ * decoded.
+ *
+ * Assemble an array of relids so we can write a single WAL record for the
+ * whole action.
+ */
+ if (list_length(relids_logged) > 0)
+ {
+ xl_heap_truncate xlrec;
+ int i = 0;
+
+ /* should only get here if wal_level >= logical */
+ Assert(XLogLogicalInfoActive());
+
+ logrelids = palloc(list_length(relids_logged) * sizeof(Oid));
+ foreach(cell, relids_logged)
+ logrelids[i++] = lfirst_oid(cell);
+
+ xlrec.dbId = MyDatabaseId;
+ xlrec.nrelids = list_length(relids_logged);
+ xlrec.flags = 0;
+ if (behavior == DROP_CASCADE)
+ xlrec.flags |= XLH_TRUNCATE_CASCADE;
+ if (restart_seqs)
+ xlrec.flags |= XLH_TRUNCATE_RESTART_SEQS;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapTruncate);
+ XLogRegisterData((char *) logrelids, list_length(relids_logged) * sizeof(Oid));
+
+ XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
+
+ (void) XLogInsert(RM_HEAP_ID, XLOG_HEAP_TRUNCATE);
+ }
+
+ /*
+ * Process all AFTER STATEMENT TRUNCATE triggers.
+ */
+ resultRelInfo = resultRelInfos;
+ foreach(cell, rels)
+ {
+ ExecASTruncateTriggers(estate, resultRelInfo);
+ resultRelInfo++;
+ }
+
+ /* Handle queued AFTER triggers */
+ AfterTriggerEndQuery(estate);
+
+ /* We can clean up the EState now */
+ FreeExecutorState(estate);
+
+ /*
+ * Close any rels opened by CASCADE (can't do this while EState still
+ * holds refs)
+ */
+ rels = list_difference_ptr(rels, explicit_rels);
+ foreach(cell, rels)
+ {
+ Relation rel = (Relation) lfirst(cell);
+
+ table_close(rel, NoLock);
+ }
+}
+
+/*
+ * Check that a given relation is safe to truncate. Subroutine for
+ * ExecuteTruncate() and RangeVarCallbackForTruncate().
+ */
+static void
+truncate_check_rel(Oid relid, Form_pg_class reltuple)
+{
+ char *relname = NameStr(reltuple->relname);
+
+ /*
+ * Only allow truncate on regular tables, foreign tables using foreign
+ * data wrappers supporting TRUNCATE and partitioned tables (although, the
+ * latter are only being included here for the following checks; no
+ * physical truncation will occur in their case.).
+ */
+ if (reltuple->relkind == RELKIND_FOREIGN_TABLE)
+ {
+ Oid serverid = GetForeignServerIdByRelId(relid);
+ FdwRoutine *fdwroutine = GetFdwRoutineByServerId(serverid);
+
+ if (!fdwroutine->ExecForeignTruncate)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot truncate foreign table \"%s\"",
+ relname)));
+ }
+ else if (reltuple->relkind != RELKIND_RELATION &&
+ reltuple->relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a table", relname)));
+
+ /*
+ * Most system catalogs can't be truncated at all, or at least not unless
+ * allow_system_table_mods=on. As an exception, however, we allow
+ * pg_largeobject to be truncated as part of pg_upgrade, because we need
+ * to change its relfilenode to match the old cluster, and allowing a
+ * TRUNCATE command to be executed is the easiest way of doing that.
+ */
+ if (!allowSystemTableMods && IsSystemClass(relid, reltuple)
+ && (!IsBinaryUpgrade || relid != LargeObjectRelationId))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied: \"%s\" is a system catalog",
+ relname)));
+
+ InvokeObjectTruncateHook(relid);
+}
+
+/*
+ * Check that current user has the permission to truncate given relation.
+ */
+static void
+truncate_check_perms(Oid relid, Form_pg_class reltuple)
+{
+ char *relname = NameStr(reltuple->relname);
+ AclResult aclresult;
+
+ /* Permissions checks */
+ aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_TRUNCATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, get_relkind_objtype(reltuple->relkind),
+ relname);
+}
+
+/*
+ * Set of extra sanity checks to check if a given relation is safe to
+ * truncate. This is split with truncate_check_rel() as
+ * RangeVarCallbackForTruncate() cannot open a Relation yet.
+ */
+static void
+truncate_check_activity(Relation rel)
+{
+ /*
+ * Don't allow truncate on temp tables of other backends ... their local
+ * buffer manager is not going to cope.
+ */
+ if (RELATION_IS_OTHER_TEMP(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot truncate temporary tables of other sessions")));
+
+ /*
+ * Also check for active uses of the relation in the current transaction,
+ * including open scans and pending AFTER trigger events.
+ */
+ CheckTableNotInUse(rel, "TRUNCATE");
+}
+
+/*
+ * storage_name
+ * returns the name corresponding to a typstorage/attstorage enum value
+ */
+static const char *
+storage_name(char c)
+{
+ switch (c)
+ {
+ case TYPSTORAGE_PLAIN:
+ return "PLAIN";
+ case TYPSTORAGE_EXTERNAL:
+ return "EXTERNAL";
+ case TYPSTORAGE_EXTENDED:
+ return "EXTENDED";
+ case TYPSTORAGE_MAIN:
+ return "MAIN";
+ default:
+ return "???";
+ }
+}
+
+/*----------
+ * MergeAttributes
+ * Returns new schema given initial schema and superclasses.
+ *
+ * Input arguments:
+ * 'schema' is the column/attribute definition for the table. (It's a list
+ * of ColumnDef's.) It is destructively changed.
+ * 'supers' is a list of OIDs of parent relations, already locked by caller.
+ * 'relpersistence' is the persistence type of the table.
+ * 'is_partition' tells if the table is a partition.
+ *
+ * Output arguments:
+ * 'supconstr' receives a list of constraints belonging to the parents,
+ * updated as necessary to be valid for the child.
+ *
+ * Return value:
+ * Completed schema list.
+ *
+ * Notes:
+ * The order in which the attributes are inherited is very important.
+ * Intuitively, the inherited attributes should come first. If a table
+ * inherits from multiple parents, the order of those attributes are
+ * according to the order of the parents specified in CREATE TABLE.
+ *
+ * Here's an example:
+ *
+ * create table person (name text, age int4, location point);
+ * create table emp (salary int4, manager text) inherits(person);
+ * create table student (gpa float8) inherits (person);
+ * create table stud_emp (percent int4) inherits (emp, student);
+ *
+ * The order of the attributes of stud_emp is:
+ *
+ * person {1:name, 2:age, 3:location}
+ * / \
+ * {6:gpa} student emp {4:salary, 5:manager}
+ * \ /
+ * stud_emp {7:percent}
+ *
+ * If the same attribute name appears multiple times, then it appears
+ * in the result table in the proper location for its first appearance.
+ *
+ * Constraints (including NOT NULL constraints) for the child table
+ * are the union of all relevant constraints, from both the child schema
+ * and parent tables.
+ *
+ * The default value for a child column is defined as:
+ * (1) If the child schema specifies a default, that value is used.
+ * (2) If neither the child nor any parent specifies a default, then
+ * the column will not have a default.
+ * (3) If conflicting defaults are inherited from different parents
+ * (and not overridden by the child), an error is raised.
+ * (4) Otherwise the inherited default is used.
+ * Rule (3) is new in Postgres 7.1; in earlier releases you got a
+ * rather arbitrary choice of which parent default to use.
+ *----------
+ */
+static List *
+MergeAttributes(List *schema, List *supers, char relpersistence,
+ bool is_partition, List **supconstr)
+{
+ List *inhSchema = NIL;
+ List *constraints = NIL;
+ bool have_bogus_defaults = false;
+ int child_attno;
+ static Node bogus_marker = {0}; /* marks conflicting defaults */
+ List *saved_schema = NIL;
+ ListCell *entry;
+
+ /*
+ * Check for and reject tables with too many columns. We perform this
+ * check relatively early for two reasons: (a) we don't run the risk of
+ * overflowing an AttrNumber in subsequent code (b) an O(n^2) algorithm is
+ * okay if we're processing <= 1600 columns, but could take minutes to
+ * execute if the user attempts to create a table with hundreds of
+ * thousands of columns.
+ *
+ * Note that we also need to check that we do not exceed this figure after
+ * including columns from inherited relations.
+ */
+ if (list_length(schema) > MaxHeapAttributeNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_COLUMNS),
+ errmsg("tables can have at most %d columns",
+ MaxHeapAttributeNumber)));
+
+ /*
+ * Check for duplicate names in the explicit list of attributes.
+ *
+ * Although we might consider merging such entries in the same way that we
+ * handle name conflicts for inherited attributes, it seems to make more
+ * sense to assume such conflicts are errors.
+ *
+ * We don't use foreach() here because we have two nested loops over the
+ * schema list, with possible element deletions in the inner one. If we
+ * used foreach_delete_current() it could only fix up the state of one of
+ * the loops, so it seems cleaner to use looping over list indexes for
+ * both loops. Note that any deletion will happen beyond where the outer
+ * loop is, so its index never needs adjustment.
+ */
+ for (int coldefpos = 0; coldefpos < list_length(schema); coldefpos++)
+ {
+ ColumnDef *coldef = list_nth_node(ColumnDef, schema, coldefpos);
+
+ if (!is_partition && coldef->typeName == NULL)
+ {
+ /*
+ * Typed table column option that does not belong to a column from
+ * the type. This works because the columns from the type come
+ * first in the list. (We omit this check for partition column
+ * lists; those are processed separately below.)
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" does not exist",
+ coldef->colname)));
+ }
+
+ /* restpos scans all entries beyond coldef; incr is in loop body */
+ for (int restpos = coldefpos + 1; restpos < list_length(schema);)
+ {
+ ColumnDef *restdef = list_nth_node(ColumnDef, schema, restpos);
+
+ if (strcmp(coldef->colname, restdef->colname) == 0)
+ {
+ if (coldef->is_from_type)
+ {
+ /*
+ * merge the column options into the column from the type
+ */
+ coldef->is_not_null = restdef->is_not_null;
+ coldef->raw_default = restdef->raw_default;
+ coldef->cooked_default = restdef->cooked_default;
+ coldef->constraints = restdef->constraints;
+ coldef->is_from_type = false;
+ schema = list_delete_nth_cell(schema, restpos);
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_COLUMN),
+ errmsg("column \"%s\" specified more than once",
+ coldef->colname)));
+ }
+ else
+ restpos++;
+ }
+ }
+
+ /*
+ * In case of a partition, there are no new column definitions, only dummy
+ * ColumnDefs created for column constraints. Set them aside for now and
+ * process them at the end.
+ */
+ if (is_partition)
+ {
+ saved_schema = schema;
+ schema = NIL;
+ }
+
+ /*
+ * Scan the parents left-to-right, and merge their attributes to form a
+ * list of inherited attributes (inhSchema). Also check to see if we need
+ * to inherit an OID column.
+ */
+ child_attno = 0;
+ foreach(entry, supers)
+ {
+ Oid parent = lfirst_oid(entry);
+ Relation relation;
+ TupleDesc tupleDesc;
+ TupleConstr *constr;
+ AttrMap *newattmap;
+ List *inherited_defaults;
+ List *cols_with_defaults;
+ AttrNumber parent_attno;
+ ListCell *lc1;
+ ListCell *lc2;
+
+ /* caller already got lock */
+ relation = table_open(parent, NoLock);
+
+ /*
+ * Check for active uses of the parent partitioned table in the
+ * current transaction, such as being used in some manner by an
+ * enclosing command.
+ */
+ if (is_partition)
+ CheckTableNotInUse(relation, "CREATE TABLE .. PARTITION OF");
+
+ /*
+ * We do not allow partitioned tables and partitions to participate in
+ * regular inheritance.
+ */
+ if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+ !is_partition)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot inherit from partitioned table \"%s\"",
+ RelationGetRelationName(relation))));
+ if (relation->rd_rel->relispartition && !is_partition)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot inherit from partition \"%s\"",
+ RelationGetRelationName(relation))));
+
+ if (relation->rd_rel->relkind != RELKIND_RELATION &&
+ relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+ relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("inherited relation \"%s\" is not a table or foreign table",
+ RelationGetRelationName(relation))));
+
+ /*
+ * If the parent is permanent, so must be all of its partitions. Note
+ * that inheritance allows that case.
+ */
+ if (is_partition &&
+ relation->rd_rel->relpersistence != RELPERSISTENCE_TEMP &&
+ relpersistence == RELPERSISTENCE_TEMP)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot create a temporary relation as partition of permanent relation \"%s\"",
+ RelationGetRelationName(relation))));
+
+ /* Permanent rels cannot inherit from temporary ones */
+ if (relpersistence != RELPERSISTENCE_TEMP &&
+ relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg(!is_partition
+ ? "cannot inherit from temporary relation \"%s\""
+ : "cannot create a permanent relation as partition of temporary relation \"%s\"",
+ RelationGetRelationName(relation))));
+
+ /* If existing rel is temp, it must belong to this session */
+ if (relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+ !relation->rd_islocaltemp)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg(!is_partition
+ ? "cannot inherit from temporary relation of another session"
+ : "cannot create as partition of temporary relation of another session")));
+
+ /*
+ * We should have an UNDER permission flag for this, but for now,
+ * demand that creator of a child table own the parent.
+ */
+ if (!pg_class_ownercheck(RelationGetRelid(relation), GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(relation->rd_rel->relkind),
+ RelationGetRelationName(relation));
+
+ tupleDesc = RelationGetDescr(relation);
+ constr = tupleDesc->constr;
+
+ /*
+ * newattmap->attnums[] will contain the child-table attribute numbers
+ * for the attributes of this parent table. (They are not the same
+ * for parents after the first one, nor if we have dropped columns.)
+ */
+ newattmap = make_attrmap(tupleDesc->natts);
+
+ /* We can't process inherited defaults until newattmap is complete. */
+ inherited_defaults = cols_with_defaults = NIL;
+
+ for (parent_attno = 1; parent_attno <= tupleDesc->natts;
+ parent_attno++)
+ {
+ Form_pg_attribute attribute = TupleDescAttr(tupleDesc,
+ parent_attno - 1);
+ char *attributeName = NameStr(attribute->attname);
+ int exist_attno;
+ ColumnDef *def;
+
+ /*
+ * Ignore dropped columns in the parent.
+ */
+ if (attribute->attisdropped)
+ continue; /* leave newattmap->attnums entry as zero */
+
+ /*
+ * Does it conflict with some previously inherited column?
+ */
+ exist_attno = findAttrByName(attributeName, inhSchema);
+ if (exist_attno > 0)
+ {
+ Oid defTypeId;
+ int32 deftypmod;
+ Oid defCollId;
+
+ /*
+ * Yes, try to merge the two column definitions. They must
+ * have the same type, typmod, and collation.
+ */
+ ereport(NOTICE,
+ (errmsg("merging multiple inherited definitions of column \"%s\"",
+ attributeName)));
+ def = (ColumnDef *) list_nth(inhSchema, exist_attno - 1);
+ typenameTypeIdAndMod(NULL, def->typeName, &defTypeId, &deftypmod);
+ if (defTypeId != attribute->atttypid ||
+ deftypmod != attribute->atttypmod)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("inherited column \"%s\" has a type conflict",
+ attributeName),
+ errdetail("%s versus %s",
+ format_type_with_typemod(defTypeId,
+ deftypmod),
+ format_type_with_typemod(attribute->atttypid,
+ attribute->atttypmod))));
+ defCollId = GetColumnDefCollation(NULL, def, defTypeId);
+ if (defCollId != attribute->attcollation)
+ ereport(ERROR,
+ (errcode(ERRCODE_COLLATION_MISMATCH),
+ errmsg("inherited column \"%s\" has a collation conflict",
+ attributeName),
+ errdetail("\"%s\" versus \"%s\"",
+ get_collation_name(defCollId),
+ get_collation_name(attribute->attcollation))));
+
+ /* Copy/check storage parameter */
+ if (def->storage == 0)
+ def->storage = attribute->attstorage;
+ else if (def->storage != attribute->attstorage)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("inherited column \"%s\" has a storage parameter conflict",
+ attributeName),
+ errdetail("%s versus %s",
+ storage_name(def->storage),
+ storage_name(attribute->attstorage))));
+
+ /* Copy/check compression parameter */
+ if (CompressionMethodIsValid(attribute->attcompression))
+ {
+ const char *compression =
+ GetCompressionMethodName(attribute->attcompression);
+
+ if (def->compression == NULL)
+ def->compression = pstrdup(compression);
+ else if (strcmp(def->compression, compression) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("column \"%s\" has a compression method conflict",
+ attributeName),
+ errdetail("%s versus %s", def->compression, compression)));
+ }
+
+ def->inhcount++;
+ /* Merge of NOT NULL constraints = OR 'em together */
+ def->is_not_null |= attribute->attnotnull;
+ /* Default and other constraints are handled below */
+ newattmap->attnums[parent_attno - 1] = exist_attno;
+
+ /* Check for GENERATED conflicts */
+ if (def->generated != attribute->attgenerated)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("inherited column \"%s\" has a generation conflict",
+ attributeName)));
+ }
+ else
+ {
+ /*
+ * No, create a new inherited column
+ */
+ def = makeNode(ColumnDef);
+ def->colname = pstrdup(attributeName);
+ def->typeName = makeTypeNameFromOid(attribute->atttypid,
+ attribute->atttypmod);
+ def->inhcount = 1;
+ def->is_local = false;
+ def->is_not_null = attribute->attnotnull;
+ def->is_from_type = false;
+ def->storage = attribute->attstorage;
+ def->raw_default = NULL;
+ def->cooked_default = NULL;
+ def->generated = attribute->attgenerated;
+ def->collClause = NULL;
+ def->collOid = attribute->attcollation;
+ def->constraints = NIL;
+ def->location = -1;
+ if (CompressionMethodIsValid(attribute->attcompression))
+ def->compression =
+ pstrdup(GetCompressionMethodName(attribute->attcompression));
+ else
+ def->compression = NULL;
+ inhSchema = lappend(inhSchema, def);
+ newattmap->attnums[parent_attno - 1] = ++child_attno;
+ }
+
+ /*
+ * Locate default if any
+ */
+ if (attribute->atthasdef)
+ {
+ Node *this_default = NULL;
+
+ /* Find default in constraint structure */
+ if (constr != NULL)
+ {
+ AttrDefault *attrdef = constr->defval;
+
+ for (int i = 0; i < constr->num_defval; i++)
+ {
+ if (attrdef[i].adnum == parent_attno)
+ {
+ this_default = stringToNode(attrdef[i].adbin);
+ break;
+ }
+ }
+ }
+ if (this_default == NULL)
+ elog(ERROR, "default expression not found for attribute %d of relation \"%s\"",
+ parent_attno, RelationGetRelationName(relation));
+
+ /*
+ * If it's a GENERATED default, it might contain Vars that
+ * need to be mapped to the inherited column(s)' new numbers.
+ * We can't do that till newattmap is ready, so just remember
+ * all the inherited default expressions for the moment.
+ */
+ inherited_defaults = lappend(inherited_defaults, this_default);
+ cols_with_defaults = lappend(cols_with_defaults, def);
+ }
+ }
+
+ /*
+ * Now process any inherited default expressions, adjusting attnos
+ * using the completed newattmap map.
+ */
+ forboth(lc1, inherited_defaults, lc2, cols_with_defaults)
+ {
+ Node *this_default = (Node *) lfirst(lc1);
+ ColumnDef *def = (ColumnDef *) lfirst(lc2);
+ bool found_whole_row;
+
+ /* Adjust Vars to match new table's column numbering */
+ this_default = map_variable_attnos(this_default,
+ 1, 0,
+ newattmap,
+ InvalidOid, &found_whole_row);
+
+ /*
+ * For the moment we have to reject whole-row variables. We could
+ * convert them, if we knew the new table's rowtype OID, but that
+ * hasn't been assigned yet. (A variable could only appear in a
+ * generation expression, so the error message is correct.)
+ */
+ if (found_whole_row)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot convert whole-row table reference"),
+ errdetail("Generation expression for column \"%s\" contains a whole-row reference to table \"%s\".",
+ def->colname,
+ RelationGetRelationName(relation))));
+
+ /*
+ * If we already had a default from some prior parent, check to
+ * see if they are the same. If so, no problem; if not, mark the
+ * column as having a bogus default. Below, we will complain if
+ * the bogus default isn't overridden by the child schema.
+ */
+ Assert(def->raw_default == NULL);
+ if (def->cooked_default == NULL)
+ def->cooked_default = this_default;
+ else if (!equal(def->cooked_default, this_default))
+ {
+ def->cooked_default = &bogus_marker;
+ have_bogus_defaults = true;
+ }
+ }
+
+ /*
+ * Now copy the CHECK constraints of this parent, adjusting attnos
+ * using the completed newattmap map. Identically named constraints
+ * are merged if possible, else we throw error.
+ */
+ if (constr && constr->num_check > 0)
+ {
+ ConstrCheck *check = constr->check;
+ int i;
+
+ for (i = 0; i < constr->num_check; i++)
+ {
+ char *name = check[i].ccname;
+ Node *expr;
+ bool found_whole_row;
+
+ /* ignore if the constraint is non-inheritable */
+ if (check[i].ccnoinherit)
+ continue;
+
+ /* Adjust Vars to match new table's column numbering */
+ expr = map_variable_attnos(stringToNode(check[i].ccbin),
+ 1, 0,
+ newattmap,
+ InvalidOid, &found_whole_row);
+
+ /*
+ * For the moment we have to reject whole-row variables. We
+ * could convert them, if we knew the new table's rowtype OID,
+ * but that hasn't been assigned yet.
+ */
+ if (found_whole_row)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot convert whole-row table reference"),
+ errdetail("Constraint \"%s\" contains a whole-row reference to table \"%s\".",
+ name,
+ RelationGetRelationName(relation))));
+
+ /* check for duplicate */
+ if (!MergeCheckConstraint(constraints, name, expr))
+ {
+ /* nope, this is a new one */
+ CookedConstraint *cooked;
+
+ cooked = (CookedConstraint *) palloc(sizeof(CookedConstraint));
+ cooked->contype = CONSTR_CHECK;
+ cooked->conoid = InvalidOid; /* until created */
+ cooked->name = pstrdup(name);
+ cooked->attnum = 0; /* not used for constraints */
+ cooked->expr = expr;
+ cooked->skip_validation = false;
+ cooked->is_local = false;
+ cooked->inhcount = 1;
+ cooked->is_no_inherit = false;
+ constraints = lappend(constraints, cooked);
+ }
+ }
+ }
+
+ free_attrmap(newattmap);
+
+ /*
+ * Close the parent rel, but keep our lock on it until xact commit.
+ * That will prevent someone else from deleting or ALTERing the parent
+ * before the child is committed.
+ */
+ table_close(relation, NoLock);
+ }
+
+ /*
+ * If we had no inherited attributes, the result schema is just the
+ * explicitly declared columns. Otherwise, we need to merge the declared
+ * columns into the inherited schema list. Although, we never have any
+ * explicitly declared columns if the table is a partition.
+ */
+ if (inhSchema != NIL)
+ {
+ int schema_attno = 0;
+
+ foreach(entry, schema)
+ {
+ ColumnDef *newdef = lfirst(entry);
+ char *attributeName = newdef->colname;
+ int exist_attno;
+
+ schema_attno++;
+
+ /*
+ * Does it conflict with some previously inherited column?
+ */
+ exist_attno = findAttrByName(attributeName, inhSchema);
+ if (exist_attno > 0)
+ {
+ ColumnDef *def;
+ Oid defTypeId,
+ newTypeId;
+ int32 deftypmod,
+ newtypmod;
+ Oid defcollid,
+ newcollid;
+
+ /*
+ * Partitions have only one parent and have no column
+ * definitions of their own, so conflict should never occur.
+ */
+ Assert(!is_partition);
+
+ /*
+ * Yes, try to merge the two column definitions. They must
+ * have the same type, typmod, and collation.
+ */
+ if (exist_attno == schema_attno)
+ ereport(NOTICE,
+ (errmsg("merging column \"%s\" with inherited definition",
+ attributeName)));
+ else
+ ereport(NOTICE,
+ (errmsg("moving and merging column \"%s\" with inherited definition", attributeName),
+ errdetail("User-specified column moved to the position of the inherited column.")));
+ def = (ColumnDef *) list_nth(inhSchema, exist_attno - 1);
+ typenameTypeIdAndMod(NULL, def->typeName, &defTypeId, &deftypmod);
+ typenameTypeIdAndMod(NULL, newdef->typeName, &newTypeId, &newtypmod);
+ if (defTypeId != newTypeId || deftypmod != newtypmod)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("column \"%s\" has a type conflict",
+ attributeName),
+ errdetail("%s versus %s",
+ format_type_with_typemod(defTypeId,
+ deftypmod),
+ format_type_with_typemod(newTypeId,
+ newtypmod))));
+ defcollid = GetColumnDefCollation(NULL, def, defTypeId);
+ newcollid = GetColumnDefCollation(NULL, newdef, newTypeId);
+ if (defcollid != newcollid)
+ ereport(ERROR,
+ (errcode(ERRCODE_COLLATION_MISMATCH),
+ errmsg("column \"%s\" has a collation conflict",
+ attributeName),
+ errdetail("\"%s\" versus \"%s\"",
+ get_collation_name(defcollid),
+ get_collation_name(newcollid))));
+
+ /*
+ * Identity is never inherited. The new column can have an
+ * identity definition, so we always just take that one.
+ */
+ def->identity = newdef->identity;
+
+ /* Copy storage parameter */
+ if (def->storage == 0)
+ def->storage = newdef->storage;
+ else if (newdef->storage != 0 && def->storage != newdef->storage)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("column \"%s\" has a storage parameter conflict",
+ attributeName),
+ errdetail("%s versus %s",
+ storage_name(def->storage),
+ storage_name(newdef->storage))));
+
+ /* Copy compression parameter */
+ if (def->compression == NULL)
+ def->compression = newdef->compression;
+ else if (newdef->compression != NULL)
+ {
+ if (strcmp(def->compression, newdef->compression) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("column \"%s\" has a compression method conflict",
+ attributeName),
+ errdetail("%s versus %s", def->compression, newdef->compression)));
+ }
+
+ /* Mark the column as locally defined */
+ def->is_local = true;
+ /* Merge of NOT NULL constraints = OR 'em together */
+ def->is_not_null |= newdef->is_not_null;
+
+ /*
+ * Check for conflicts related to generated columns.
+ *
+ * If the parent column is generated, the child column must be
+ * unadorned and will be made a generated column. (We could
+ * in theory allow the child column definition specifying the
+ * exact same generation expression, but that's a bit
+ * complicated to implement and doesn't seem very useful.) We
+ * also check that the child column doesn't specify a default
+ * value or identity, which matches the rules for a single
+ * column in parse_util.c.
+ */
+ if (def->generated)
+ {
+ if (newdef->generated)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
+ errmsg("child column \"%s\" specifies generation expression",
+ def->colname),
+ errhint("Omit the generation expression in the definition of the child table column to inherit the generation expression from the parent table.")));
+ if (newdef->raw_default && !newdef->generated)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
+ errmsg("column \"%s\" inherits from generated column but specifies default",
+ def->colname)));
+ if (newdef->identity)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
+ errmsg("column \"%s\" inherits from generated column but specifies identity",
+ def->colname)));
+ }
+
+ /*
+ * If the parent column is not generated, then take whatever
+ * the child column definition says.
+ */
+ else
+ {
+ if (newdef->generated)
+ def->generated = newdef->generated;
+ }
+
+ /* If new def has a default, override previous default */
+ if (newdef->raw_default != NULL)
+ {
+ def->raw_default = newdef->raw_default;
+ def->cooked_default = newdef->cooked_default;
+ }
+ }
+ else
+ {
+ /*
+ * No, attach new column to result schema
+ */
+ inhSchema = lappend(inhSchema, newdef);
+ }
+ }
+
+ schema = inhSchema;
+
+ /*
+ * Check that we haven't exceeded the legal # of columns after merging
+ * in inherited columns.
+ */
+ if (list_length(schema) > MaxHeapAttributeNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_COLUMNS),
+ errmsg("tables can have at most %d columns",
+ MaxHeapAttributeNumber)));
+ }
+
+ /*
+ * Now that we have the column definition list for a partition, we can
+ * check whether the columns referenced in the column constraint specs
+ * actually exist. Also, we merge NOT NULL and defaults into each
+ * corresponding column definition.
+ */
+ if (is_partition)
+ {
+ foreach(entry, saved_schema)
+ {
+ ColumnDef *restdef = lfirst(entry);
+ bool found = false;
+ ListCell *l;
+
+ foreach(l, schema)
+ {
+ ColumnDef *coldef = lfirst(l);
+
+ if (strcmp(coldef->colname, restdef->colname) == 0)
+ {
+ found = true;
+ coldef->is_not_null |= restdef->is_not_null;
+
+ /*
+ * Override the parent's default value for this column
+ * (coldef->cooked_default) with the partition's local
+ * definition (restdef->raw_default), if there's one. It
+ * should be physically impossible to get a cooked default
+ * in the local definition or a raw default in the
+ * inherited definition, but make sure they're nulls, for
+ * future-proofing.
+ */
+ Assert(restdef->cooked_default == NULL);
+ Assert(coldef->raw_default == NULL);
+ if (restdef->raw_default)
+ {
+ coldef->raw_default = restdef->raw_default;
+ coldef->cooked_default = NULL;
+ }
+ }
+ }
+
+ /* complain for constraints on columns not in parent */
+ if (!found)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" does not exist",
+ restdef->colname)));
+ }
+ }
+
+ /*
+ * If we found any conflicting parent default values, check to make sure
+ * they were overridden by the child.
+ */
+ if (have_bogus_defaults)
+ {
+ foreach(entry, schema)
+ {
+ ColumnDef *def = lfirst(entry);
+
+ if (def->cooked_default == &bogus_marker)
+ {
+ if (def->generated)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
+ errmsg("column \"%s\" inherits conflicting generation expressions",
+ def->colname)));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
+ errmsg("column \"%s\" inherits conflicting default values",
+ def->colname),
+ errhint("To resolve the conflict, specify a default explicitly.")));
+ }
+ }
+ }
+
+ *supconstr = constraints;
+ return schema;
+}
+
+
+/*
+ * MergeCheckConstraint
+ * Try to merge an inherited CHECK constraint with previous ones
+ *
+ * If we inherit identically-named constraints from multiple parents, we must
+ * merge them, or throw an error if they don't have identical definitions.
+ *
+ * constraints is a list of CookedConstraint structs for previous constraints.
+ *
+ * Returns true if merged (constraint is a duplicate), or false if it's
+ * got a so-far-unique name, or throws error if conflict.
+ */
+static bool
+MergeCheckConstraint(List *constraints, char *name, Node *expr)
+{
+ ListCell *lc;
+
+ foreach(lc, constraints)
+ {
+ CookedConstraint *ccon = (CookedConstraint *) lfirst(lc);
+
+ Assert(ccon->contype == CONSTR_CHECK);
+
+ /* Non-matching names never conflict */
+ if (strcmp(ccon->name, name) != 0)
+ continue;
+
+ if (equal(expr, ccon->expr))
+ {
+ /* OK to merge */
+ ccon->inhcount++;
+ return true;
+ }
+
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("check constraint name \"%s\" appears multiple times but with different expressions",
+ name)));
+ }
+
+ return false;
+}
+
+
+/*
+ * StoreCatalogInheritance
+ * Updates the system catalogs with proper inheritance information.
+ *
+ * supers is a list of the OIDs of the new relation's direct ancestors.
+ */
+static void
+StoreCatalogInheritance(Oid relationId, List *supers,
+ bool child_is_partition)
+{
+ Relation relation;
+ int32 seqNumber;
+ ListCell *entry;
+
+ /*
+ * sanity checks
+ */
+ AssertArg(OidIsValid(relationId));
+
+ if (supers == NIL)
+ return;
+
+ /*
+ * Store INHERITS information in pg_inherits using direct ancestors only.
+ * Also enter dependencies on the direct ancestors, and make sure they are
+ * marked with relhassubclass = true.
+ *
+ * (Once upon a time, both direct and indirect ancestors were found here
+ * and then entered into pg_ipl. Since that catalog doesn't exist
+ * anymore, there's no need to look for indirect ancestors.)
+ */
+ relation = table_open(InheritsRelationId, RowExclusiveLock);
+
+ seqNumber = 1;
+ foreach(entry, supers)
+ {
+ Oid parentOid = lfirst_oid(entry);
+
+ StoreCatalogInheritance1(relationId, parentOid, seqNumber, relation,
+ child_is_partition);
+ seqNumber++;
+ }
+
+ table_close(relation, RowExclusiveLock);
+}
+
+/*
+ * Make catalog entries showing relationId as being an inheritance child
+ * of parentOid. inhRelation is the already-opened pg_inherits catalog.
+ */
+static void
+StoreCatalogInheritance1(Oid relationId, Oid parentOid,
+ int32 seqNumber, Relation inhRelation,
+ bool child_is_partition)
+{
+ ObjectAddress childobject,
+ parentobject;
+
+ /* store the pg_inherits row */
+ StoreSingleInheritance(relationId, parentOid, seqNumber);
+
+ /*
+ * Store a dependency too
+ */
+ parentobject.classId = RelationRelationId;
+ parentobject.objectId = parentOid;
+ parentobject.objectSubId = 0;
+ childobject.classId = RelationRelationId;
+ childobject.objectId = relationId;
+ childobject.objectSubId = 0;
+
+ recordDependencyOn(&childobject, &parentobject,
+ child_dependency_type(child_is_partition));
+
+ /*
+ * Post creation hook of this inheritance. Since object_access_hook
+ * doesn't take multiple object identifiers, we relay oid of parent
+ * relation using auxiliary_id argument.
+ */
+ InvokeObjectPostAlterHookArg(InheritsRelationId,
+ relationId, 0,
+ parentOid, false);
+
+ /*
+ * Mark the parent as having subclasses.
+ */
+ SetRelationHasSubclass(parentOid, true);
+}
+
+/*
+ * Look for an existing schema entry with the given name.
+ *
+ * Returns the index (starting with 1) if attribute already exists in schema,
+ * 0 if it doesn't.
+ */
+static int
+findAttrByName(const char *attributeName, List *schema)
+{
+ ListCell *s;
+ int i = 1;
+
+ foreach(s, schema)
+ {
+ ColumnDef *def = lfirst(s);
+
+ if (strcmp(attributeName, def->colname) == 0)
+ return i;
+
+ i++;
+ }
+ return 0;
+}
+
+
+/*
+ * SetRelationHasSubclass
+ * Set the value of the relation's relhassubclass field in pg_class.
+ *
+ * NOTE: caller must be holding an appropriate lock on the relation.
+ * ShareUpdateExclusiveLock is sufficient.
+ *
+ * NOTE: an important side-effect of this operation is that an SI invalidation
+ * message is sent out to all backends --- including me --- causing plans
+ * referencing the relation to be rebuilt with the new list of children.
+ * This must happen even if we find that no change is needed in the pg_class
+ * row.
+ */
+void
+SetRelationHasSubclass(Oid relationId, bool relhassubclass)
+{
+ Relation relationRelation;
+ HeapTuple tuple;
+ Form_pg_class classtuple;
+
+ /*
+ * Fetch a modifiable copy of the tuple, modify it, update pg_class.
+ */
+ relationRelation = table_open(RelationRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relationId));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", relationId);
+ classtuple = (Form_pg_class) GETSTRUCT(tuple);
+
+ if (classtuple->relhassubclass != relhassubclass)
+ {
+ classtuple->relhassubclass = relhassubclass;
+ CatalogTupleUpdate(relationRelation, &tuple->t_self, tuple);
+ }
+ else
+ {
+ /* no need to change tuple, but force relcache rebuild anyway */
+ CacheInvalidateRelcacheByTuple(tuple);
+ }
+
+ heap_freetuple(tuple);
+ table_close(relationRelation, RowExclusiveLock);
+}
+
+/*
+ * CheckRelationTableSpaceMove
+ * Check if relation can be moved to new tablespace.
+ *
+ * NOTE: The caller must hold AccessExclusiveLock on the relation.
+ *
+ * Returns true if the relation can be moved to the new tablespace; raises
+ * an error if it is not possible to do the move; returns false if the move
+ * would have no effect.
+ */
+bool
+CheckRelationTableSpaceMove(Relation rel, Oid newTableSpaceId)
+{
+ Oid oldTableSpaceId;
+
+ /*
+ * No work if no change in tablespace. Note that MyDatabaseTableSpace is
+ * stored as 0.
+ */
+ oldTableSpaceId = rel->rd_rel->reltablespace;
+ if (newTableSpaceId == oldTableSpaceId ||
+ (newTableSpaceId == MyDatabaseTableSpace && oldTableSpaceId == 0))
+ return false;
+
+ /*
+ * We cannot support moving mapped relations into different tablespaces.
+ * (In particular this eliminates all shared catalogs.)
+ */
+ if (RelationIsMapped(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot move system relation \"%s\"",
+ RelationGetRelationName(rel))));
+
+ /* Cannot move a non-shared relation into pg_global */
+ if (newTableSpaceId == GLOBALTABLESPACE_OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("only shared relations can be placed in pg_global tablespace")));
+
+ /*
+ * Do not allow moving temp tables of other backends ... their local
+ * buffer manager is not going to cope.
+ */
+ if (RELATION_IS_OTHER_TEMP(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot move temporary tables of other sessions")));
+
+ return true;
+}
+
+/*
+ * SetRelationTableSpace
+ * Set new reltablespace and relfilenode in pg_class entry.
+ *
+ * newTableSpaceId is the new tablespace for the relation, and
+ * newRelFileNode its new filenode. If newRelFileNode is InvalidOid,
+ * this field is not updated.
+ *
+ * NOTE: The caller must hold AccessExclusiveLock on the relation.
+ *
+ * The caller of this routine had better check if a relation can be
+ * moved to this new tablespace by calling CheckRelationTableSpaceMove()
+ * first, and is responsible for making the change visible with
+ * CommandCounterIncrement().
+ */
+void
+SetRelationTableSpace(Relation rel,
+ Oid newTableSpaceId,
+ Oid newRelFileNode)
+{
+ Relation pg_class;
+ HeapTuple tuple;
+ Form_pg_class rd_rel;
+ Oid reloid = RelationGetRelid(rel);
+
+ Assert(CheckRelationTableSpaceMove(rel, newTableSpaceId));
+
+ /* Get a modifiable copy of the relation's pg_class row. */
+ pg_class = table_open(RelationRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(reloid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", reloid);
+ rd_rel = (Form_pg_class) GETSTRUCT(tuple);
+
+ /* Update the pg_class row. */
+ rd_rel->reltablespace = (newTableSpaceId == MyDatabaseTableSpace) ?
+ InvalidOid : newTableSpaceId;
+ if (OidIsValid(newRelFileNode))
+ rd_rel->relfilenode = newRelFileNode;
+ CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
+
+ /*
+ * Record dependency on tablespace. This is only required for relations
+ * that have no physical storage.
+ */
+ if (!RELKIND_HAS_STORAGE(rel->rd_rel->relkind))
+ changeDependencyOnTablespace(RelationRelationId, reloid,
+ rd_rel->reltablespace);
+
+ heap_freetuple(tuple);
+ table_close(pg_class, RowExclusiveLock);
+}
+
+/*
+ * renameatt_check - basic sanity checks before attribute rename
+ */
+static void
+renameatt_check(Oid myrelid, Form_pg_class classform, bool recursing)
+{
+ char relkind = classform->relkind;
+
+ if (classform->reloftype && !recursing)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot rename column of typed table")));
+
+ /*
+ * Renaming the columns of sequences or toast tables doesn't actually
+ * break anything from the system's point of view, since internal
+ * references are by attnum. But it doesn't seem right to allow users to
+ * change names that are hardcoded into the system, hence the following
+ * restriction.
+ */
+ if (relkind != RELKIND_RELATION &&
+ relkind != RELKIND_VIEW &&
+ relkind != RELKIND_MATVIEW &&
+ relkind != RELKIND_COMPOSITE_TYPE &&
+ relkind != RELKIND_INDEX &&
+ relkind != RELKIND_PARTITIONED_INDEX &&
+ relkind != RELKIND_FOREIGN_TABLE &&
+ relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot rename columns of relation \"%s\"",
+ NameStr(classform->relname)),
+ errdetail_relkind_not_supported(relkind)));
+
+ /*
+ * permissions checking. only the owner of a class can change its schema.
+ */
+ if (!pg_class_ownercheck(myrelid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(myrelid)),
+ NameStr(classform->relname));
+ if (!allowSystemTableMods && IsSystemClass(myrelid, classform))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied: \"%s\" is a system catalog",
+ NameStr(classform->relname))));
+}
+
+/*
+ * renameatt_internal - workhorse for renameatt
+ *
+ * Return value is the attribute number in the 'myrelid' relation.
+ */
+static AttrNumber
+renameatt_internal(Oid myrelid,
+ const char *oldattname,
+ const char *newattname,
+ bool recurse,
+ bool recursing,
+ int expected_parents,
+ DropBehavior behavior)
+{
+ Relation targetrelation;
+ Relation attrelation;
+ HeapTuple atttup;
+ Form_pg_attribute attform;
+ AttrNumber attnum;
+
+ /*
+ * Grab an exclusive lock on the target table, which we will NOT release
+ * until end of transaction.
+ */
+ targetrelation = relation_open(myrelid, AccessExclusiveLock);
+ renameatt_check(myrelid, RelationGetForm(targetrelation), recursing);
+
+ /*
+ * if the 'recurse' flag is set then we are supposed to rename this
+ * attribute in all classes that inherit from 'relname' (as well as in
+ * 'relname').
+ *
+ * any permissions or problems with duplicate attributes will cause the
+ * whole transaction to abort, which is what we want -- all or nothing.
+ */
+ if (recurse)
+ {
+ List *child_oids,
+ *child_numparents;
+ ListCell *lo,
+ *li;
+
+ /*
+ * we need the number of parents for each child so that the recursive
+ * calls to renameatt() can determine whether there are any parents
+ * outside the inheritance hierarchy being processed.
+ */
+ child_oids = find_all_inheritors(myrelid, AccessExclusiveLock,
+ &child_numparents);
+
+ /*
+ * find_all_inheritors does the recursive search of the inheritance
+ * hierarchy, so all we have to do is process all of the relids in the
+ * list that it returns.
+ */
+ forboth(lo, child_oids, li, child_numparents)
+ {
+ Oid childrelid = lfirst_oid(lo);
+ int numparents = lfirst_int(li);
+
+ if (childrelid == myrelid)
+ continue;
+ /* note we need not recurse again */
+ renameatt_internal(childrelid, oldattname, newattname, false, true, numparents, behavior);
+ }
+ }
+ else
+ {
+ /*
+ * If we are told not to recurse, there had better not be any child
+ * tables; else the rename would put them out of step.
+ *
+ * expected_parents will only be 0 if we are not already recursing.
+ */
+ if (expected_parents == 0 &&
+ find_inheritance_children(myrelid, NoLock) != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("inherited column \"%s\" must be renamed in child tables too",
+ oldattname)));
+ }
+
+ /* rename attributes in typed tables of composite type */
+ if (targetrelation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+ {
+ List *child_oids;
+ ListCell *lo;
+
+ child_oids = find_typed_table_dependencies(targetrelation->rd_rel->reltype,
+ RelationGetRelationName(targetrelation),
+ behavior);
+
+ foreach(lo, child_oids)
+ renameatt_internal(lfirst_oid(lo), oldattname, newattname, true, true, 0, behavior);
+ }
+
+ attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+ atttup = SearchSysCacheCopyAttName(myrelid, oldattname);
+ if (!HeapTupleIsValid(atttup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" does not exist",
+ oldattname)));
+ attform = (Form_pg_attribute) GETSTRUCT(atttup);
+
+ attnum = attform->attnum;
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot rename system column \"%s\"",
+ oldattname)));
+
+ /*
+ * if the attribute is inherited, forbid the renaming. if this is a
+ * top-level call to renameatt(), then expected_parents will be 0, so the
+ * effect of this code will be to prohibit the renaming if the attribute
+ * is inherited at all. if this is a recursive call to renameatt(),
+ * expected_parents will be the number of parents the current relation has
+ * within the inheritance hierarchy being processed, so we'll prohibit the
+ * renaming only if there are additional parents from elsewhere.
+ */
+ if (attform->attinhcount > expected_parents)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot rename inherited column \"%s\"",
+ oldattname)));
+
+ /* new name should not already exist */
+ (void) check_for_column_name_collision(targetrelation, newattname, false);
+
+ /* apply the update */
+ namestrcpy(&(attform->attname), newattname);
+
+ CatalogTupleUpdate(attrelation, &atttup->t_self, atttup);
+
+ InvokeObjectPostAlterHook(RelationRelationId, myrelid, attnum);
+
+ heap_freetuple(atttup);
+
+ table_close(attrelation, RowExclusiveLock);
+
+ relation_close(targetrelation, NoLock); /* close rel but keep lock */
+
+ return attnum;
+}
+
+/*
+ * Perform permissions and integrity checks before acquiring a relation lock.
+ */
+static void
+RangeVarCallbackForRenameAttribute(const RangeVar *rv, Oid relid, Oid oldrelid,
+ void *arg)
+{
+ HeapTuple tuple;
+ Form_pg_class form;
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ return; /* concurrently dropped */
+ form = (Form_pg_class) GETSTRUCT(tuple);
+ renameatt_check(relid, form, false);
+ ReleaseSysCache(tuple);
+}
+
+/*
+ * renameatt - changes the name of an attribute in a relation
+ *
+ * The returned ObjectAddress is that of the renamed column.
+ */
+ObjectAddress
+renameatt(RenameStmt *stmt)
+{
+ Oid relid;
+ AttrNumber attnum;
+ ObjectAddress address;
+
+ /* lock level taken here should match renameatt_internal */
+ relid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock,
+ stmt->missing_ok ? RVR_MISSING_OK : 0,
+ RangeVarCallbackForRenameAttribute,
+ NULL);
+
+ if (!OidIsValid(relid))
+ {
+ ereport(NOTICE,
+ (errmsg("relation \"%s\" does not exist, skipping",
+ stmt->relation->relname)));
+ return InvalidObjectAddress;
+ }
+
+ attnum =
+ renameatt_internal(relid,
+ stmt->subname, /* old att name */
+ stmt->newname, /* new att name */
+ stmt->relation->inh, /* recursive? */
+ false, /* recursing? */
+ 0, /* expected inhcount */
+ stmt->behavior);
+
+ ObjectAddressSubSet(address, RelationRelationId, relid, attnum);
+
+ return address;
+}
+
+/*
+ * same logic as renameatt_internal
+ */
+static ObjectAddress
+rename_constraint_internal(Oid myrelid,
+ Oid mytypid,
+ const char *oldconname,
+ const char *newconname,
+ bool recurse,
+ bool recursing,
+ int expected_parents)
+{
+ Relation targetrelation = NULL;
+ Oid constraintOid;
+ HeapTuple tuple;
+ Form_pg_constraint con;
+ ObjectAddress address;
+
+ AssertArg(!myrelid || !mytypid);
+
+ if (mytypid)
+ {
+ constraintOid = get_domain_constraint_oid(mytypid, oldconname, false);
+ }
+ else
+ {
+ targetrelation = relation_open(myrelid, AccessExclusiveLock);
+
+ /*
+ * don't tell it whether we're recursing; we allow changing typed
+ * tables here
+ */
+ renameatt_check(myrelid, RelationGetForm(targetrelation), false);
+
+ constraintOid = get_relation_constraint_oid(myrelid, oldconname, false);
+ }
+
+ tuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(constraintOid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for constraint %u",
+ constraintOid);
+ con = (Form_pg_constraint) GETSTRUCT(tuple);
+
+ if (myrelid && con->contype == CONSTRAINT_CHECK && !con->connoinherit)
+ {
+ if (recurse)
+ {
+ List *child_oids,
+ *child_numparents;
+ ListCell *lo,
+ *li;
+
+ child_oids = find_all_inheritors(myrelid, AccessExclusiveLock,
+ &child_numparents);
+
+ forboth(lo, child_oids, li, child_numparents)
+ {
+ Oid childrelid = lfirst_oid(lo);
+ int numparents = lfirst_int(li);
+
+ if (childrelid == myrelid)
+ continue;
+
+ rename_constraint_internal(childrelid, InvalidOid, oldconname, newconname, false, true, numparents);
+ }
+ }
+ else
+ {
+ if (expected_parents == 0 &&
+ find_inheritance_children(myrelid, NoLock) != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("inherited constraint \"%s\" must be renamed in child tables too",
+ oldconname)));
+ }
+
+ if (con->coninhcount > expected_parents)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot rename inherited constraint \"%s\"",
+ oldconname)));
+ }
+
+ if (con->conindid
+ && (con->contype == CONSTRAINT_PRIMARY
+ || con->contype == CONSTRAINT_UNIQUE
+ || con->contype == CONSTRAINT_EXCLUSION))
+ /* rename the index; this renames the constraint as well */
+ RenameRelationInternal(con->conindid, newconname, false, true);
+ else
+ RenameConstraintById(constraintOid, newconname);
+
+ ObjectAddressSet(address, ConstraintRelationId, constraintOid);
+
+ ReleaseSysCache(tuple);
+
+ if (targetrelation)
+ {
+ /*
+ * Invalidate relcache so as others can see the new constraint name.
+ */
+ CacheInvalidateRelcache(targetrelation);
+
+ relation_close(targetrelation, NoLock); /* close rel but keep lock */
+ }
+
+ return address;
+}
+
+ObjectAddress
+RenameConstraint(RenameStmt *stmt)
+{
+ Oid relid = InvalidOid;
+ Oid typid = InvalidOid;
+
+ if (stmt->renameType == OBJECT_DOMCONSTRAINT)
+ {
+ Relation rel;
+ HeapTuple tup;
+
+ typid = typenameTypeId(NULL, makeTypeNameFromNameList(castNode(List, stmt->object)));
+ rel = table_open(TypeRelationId, RowExclusiveLock);
+ tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", typid);
+ checkDomainOwner(tup);
+ ReleaseSysCache(tup);
+ table_close(rel, NoLock);
+ }
+ else
+ {
+ /* lock level taken here should match rename_constraint_internal */
+ relid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock,
+ stmt->missing_ok ? RVR_MISSING_OK : 0,
+ RangeVarCallbackForRenameAttribute,
+ NULL);
+ if (!OidIsValid(relid))
+ {
+ ereport(NOTICE,
+ (errmsg("relation \"%s\" does not exist, skipping",
+ stmt->relation->relname)));
+ return InvalidObjectAddress;
+ }
+ }
+
+ return
+ rename_constraint_internal(relid, typid,
+ stmt->subname,
+ stmt->newname,
+ (stmt->relation &&
+ stmt->relation->inh), /* recursive? */
+ false, /* recursing? */
+ 0 /* expected inhcount */ );
+}
+
+/*
+ * Execute ALTER TABLE/INDEX/SEQUENCE/VIEW/MATERIALIZED VIEW/FOREIGN TABLE
+ * RENAME
+ */
+ObjectAddress
+RenameRelation(RenameStmt *stmt)
+{
+ bool is_index_stmt = stmt->renameType == OBJECT_INDEX;
+ Oid relid;
+ ObjectAddress address;
+
+ /*
+ * Grab an exclusive lock on the target table, index, sequence, view,
+ * materialized view, or foreign table, which we will NOT release until
+ * end of transaction.
+ *
+ * Lock level used here should match RenameRelationInternal, to avoid lock
+ * escalation. However, because ALTER INDEX can be used with any relation
+ * type, we mustn't believe without verification.
+ */
+ for (;;)
+ {
+ LOCKMODE lockmode;
+ char relkind;
+ bool obj_is_index;
+
+ lockmode = is_index_stmt ? ShareUpdateExclusiveLock : AccessExclusiveLock;
+
+ relid = RangeVarGetRelidExtended(stmt->relation, lockmode,
+ stmt->missing_ok ? RVR_MISSING_OK : 0,
+ RangeVarCallbackForAlterRelation,
+ (void *) stmt);
+
+ if (!OidIsValid(relid))
+ {
+ ereport(NOTICE,
+ (errmsg("relation \"%s\" does not exist, skipping",
+ stmt->relation->relname)));
+ return InvalidObjectAddress;
+ }
+
+ /*
+ * We allow mismatched statement and object types (e.g., ALTER INDEX
+ * to rename a table), but we might've used the wrong lock level. If
+ * that happens, retry with the correct lock level. We don't bother
+ * if we already acquired AccessExclusiveLock with an index, however.
+ */
+ relkind = get_rel_relkind(relid);
+ obj_is_index = (relkind == RELKIND_INDEX ||
+ relkind == RELKIND_PARTITIONED_INDEX);
+ if (obj_is_index || is_index_stmt == obj_is_index)
+ break;
+
+ UnlockRelationOid(relid, lockmode);
+ is_index_stmt = obj_is_index;
+ }
+
+ /* Do the work */
+ RenameRelationInternal(relid, stmt->newname, false, is_index_stmt);
+
+ ObjectAddressSet(address, RelationRelationId, relid);
+
+ return address;
+}
+
+/*
+ * RenameRelationInternal - change the name of a relation
+ */
+void
+RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bool is_index)
+{
+ Relation targetrelation;
+ Relation relrelation; /* for RELATION relation */
+ HeapTuple reltup;
+ Form_pg_class relform;
+ Oid namespaceId;
+
+ /*
+ * Grab a lock on the target relation, which we will NOT release until end
+ * of transaction. We need at least a self-exclusive lock so that
+ * concurrent DDL doesn't overwrite the rename if they start updating
+ * while still seeing the old version. The lock also guards against
+ * triggering relcache reloads in concurrent sessions, which might not
+ * handle this information changing under them. For indexes, we can use a
+ * reduced lock level because RelationReloadIndexInfo() handles indexes
+ * specially.
+ */
+ targetrelation = relation_open(myrelid, is_index ? ShareUpdateExclusiveLock : AccessExclusiveLock);
+ namespaceId = RelationGetNamespace(targetrelation);
+
+ /*
+ * Find relation's pg_class tuple, and make sure newrelname isn't in use.
+ */
+ relrelation = table_open(RelationRelationId, RowExclusiveLock);
+
+ reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(myrelid));
+ if (!HeapTupleIsValid(reltup)) /* shouldn't happen */
+ elog(ERROR, "cache lookup failed for relation %u", myrelid);
+ relform = (Form_pg_class) GETSTRUCT(reltup);
+
+ if (get_relname_relid(newrelname, namespaceId) != InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_TABLE),
+ errmsg("relation \"%s\" already exists",
+ newrelname)));
+
+ /*
+ * RenameRelation is careful not to believe the caller's idea of the
+ * relation kind being handled. We don't have to worry about this, but
+ * let's not be totally oblivious to it. We can process an index as
+ * not-an-index, but not the other way around.
+ */
+ Assert(!is_index ||
+ is_index == (targetrelation->rd_rel->relkind == RELKIND_INDEX ||
+ targetrelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX));
+
+ /*
+ * Update pg_class tuple with new relname. (Scribbling on reltup is OK
+ * because it's a copy...)
+ */
+ namestrcpy(&(relform->relname), newrelname);
+
+ CatalogTupleUpdate(relrelation, &reltup->t_self, reltup);
+
+ InvokeObjectPostAlterHookArg(RelationRelationId, myrelid, 0,
+ InvalidOid, is_internal);
+
+ heap_freetuple(reltup);
+ table_close(relrelation, RowExclusiveLock);
+
+ /*
+ * Also rename the associated type, if any.
+ */
+ if (OidIsValid(targetrelation->rd_rel->reltype))
+ RenameTypeInternal(targetrelation->rd_rel->reltype,
+ newrelname, namespaceId);
+
+ /*
+ * Also rename the associated constraint, if any.
+ */
+ if (targetrelation->rd_rel->relkind == RELKIND_INDEX ||
+ targetrelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
+ {
+ Oid constraintId = get_index_constraint(myrelid);
+
+ if (OidIsValid(constraintId))
+ RenameConstraintById(constraintId, newrelname);
+ }
+
+ /*
+ * Close rel, but keep lock!
+ */
+ relation_close(targetrelation, NoLock);
+}
+
+/*
+ * ResetRelRewrite - reset relrewrite
+ */
+void
+ResetRelRewrite(Oid myrelid)
+{
+ Relation relrelation; /* for RELATION relation */
+ HeapTuple reltup;
+ Form_pg_class relform;
+
+ /*
+ * Find relation's pg_class tuple.
+ */
+ relrelation = table_open(RelationRelationId, RowExclusiveLock);
+
+ reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(myrelid));
+ if (!HeapTupleIsValid(reltup)) /* shouldn't happen */
+ elog(ERROR, "cache lookup failed for relation %u", myrelid);
+ relform = (Form_pg_class) GETSTRUCT(reltup);
+
+ /*
+ * Update pg_class tuple.
+ */
+ relform->relrewrite = InvalidOid;
+
+ CatalogTupleUpdate(relrelation, &reltup->t_self, reltup);
+
+ heap_freetuple(reltup);
+ table_close(relrelation, RowExclusiveLock);
+}
+
+/*
+ * Disallow ALTER TABLE (and similar commands) when the current backend has
+ * any open reference to the target table besides the one just acquired by
+ * the calling command; this implies there's an open cursor or active plan.
+ * We need this check because our lock doesn't protect us against stomping
+ * on our own foot, only other people's feet!
+ *
+ * For ALTER TABLE, the only case known to cause serious trouble is ALTER
+ * COLUMN TYPE, and some changes are obviously pretty benign, so this could
+ * possibly be relaxed to only error out for certain types of alterations.
+ * But the use-case for allowing any of these things is not obvious, so we
+ * won't work hard at it for now.
+ *
+ * We also reject these commands if there are any pending AFTER trigger events
+ * for the rel. This is certainly necessary for the rewriting variants of
+ * ALTER TABLE, because they don't preserve tuple TIDs and so the pending
+ * events would try to fetch the wrong tuples. It might be overly cautious
+ * in other cases, but again it seems better to err on the side of paranoia.
+ *
+ * REINDEX calls this with "rel" referencing the index to be rebuilt; here
+ * we are worried about active indexscans on the index. The trigger-event
+ * check can be skipped, since we are doing no damage to the parent table.
+ *
+ * The statement name (eg, "ALTER TABLE") is passed for use in error messages.
+ */
+void
+CheckTableNotInUse(Relation rel, const char *stmt)
+{
+ int expected_refcnt;
+
+ expected_refcnt = rel->rd_isnailed ? 2 : 1;
+ if (rel->rd_refcnt != expected_refcnt)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ /* translator: first %s is a SQL command, eg ALTER TABLE */
+ errmsg("cannot %s \"%s\" because it is being used by active queries in this session",
+ stmt, RelationGetRelationName(rel))));
+
+ if (rel->rd_rel->relkind != RELKIND_INDEX &&
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX &&
+ AfterTriggerPendingOnRel(RelationGetRelid(rel)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ /* translator: first %s is a SQL command, eg ALTER TABLE */
+ errmsg("cannot %s \"%s\" because it has pending trigger events",
+ stmt, RelationGetRelationName(rel))));
+}
+
+/*
+ * AlterTableLookupRelation
+ * Look up, and lock, the OID for the relation named by an alter table
+ * statement.
+ */
+Oid
+AlterTableLookupRelation(AlterTableStmt *stmt, LOCKMODE lockmode)
+{
+ return RangeVarGetRelidExtended(stmt->relation, lockmode,
+ stmt->missing_ok ? RVR_MISSING_OK : 0,
+ RangeVarCallbackForAlterRelation,
+ (void *) stmt);
+}
+
+/*
+ * AlterTable
+ * Execute ALTER TABLE, which can be a list of subcommands
+ *
+ * ALTER TABLE is performed in three phases:
+ * 1. Examine subcommands and perform pre-transformation checking.
+ * 2. Validate and transform subcommands, and update system catalogs.
+ * 3. Scan table(s) to check new constraints, and optionally recopy
+ * the data into new table(s).
+ * Phase 3 is not performed unless one or more of the subcommands requires
+ * it. The intention of this design is to allow multiple independent
+ * updates of the table schema to be performed with only one pass over the
+ * data.
+ *
+ * ATPrepCmd performs phase 1. A "work queue" entry is created for
+ * each table to be affected (there may be multiple affected tables if the
+ * commands traverse a table inheritance hierarchy). Also we do preliminary
+ * validation of the subcommands. Because earlier subcommands may change
+ * the catalog state seen by later commands, there are limits to what can
+ * be done in this phase. Generally, this phase acquires table locks,
+ * checks permissions and relkind, and recurses to find child tables.
+ *
+ * ATRewriteCatalogs performs phase 2 for each affected table.
+ * Certain subcommands need to be performed before others to avoid
+ * unnecessary conflicts; for example, DROP COLUMN should come before
+ * ADD COLUMN. Therefore phase 1 divides the subcommands into multiple
+ * lists, one for each logical "pass" of phase 2.
+ *
+ * ATRewriteTables performs phase 3 for those tables that need it.
+ *
+ * For most subcommand types, phases 2 and 3 do no explicit recursion,
+ * since phase 1 already does it. However, for certain subcommand types
+ * it is only possible to determine how to recurse at phase 2 time; for
+ * those cases, phase 1 sets the cmd->recurse flag (or, in some older coding,
+ * changes the command subtype of a "Recurse" variant XXX to be cleaned up.)
+ *
+ * Thanks to the magic of MVCC, an error anywhere along the way rolls back
+ * the whole operation; we don't have to do anything special to clean up.
+ *
+ * The caller must lock the relation, with an appropriate lock level
+ * for the subcommands requested, using AlterTableGetLockLevel(stmt->cmds)
+ * or higher. We pass the lock level down
+ * so that we can apply it recursively to inherited tables. Note that the
+ * lock level we want as we recurse might well be higher than required for
+ * that specific subcommand. So we pass down the overall lock requirement,
+ * rather than reassess it at lower levels.
+ *
+ * The caller also provides a "context" which is to be passed back to
+ * utility.c when we need to execute a subcommand such as CREATE INDEX.
+ * Some of the fields therein, such as the relid, are used here as well.
+ */
+void
+AlterTable(AlterTableStmt *stmt, LOCKMODE lockmode,
+ AlterTableUtilityContext *context)
+{
+ Relation rel;
+
+ /* Caller is required to provide an adequate lock. */
+ rel = relation_open(context->relid, NoLock);
+
+ CheckTableNotInUse(rel, "ALTER TABLE");
+
+ ATController(stmt, rel, stmt->cmds, stmt->relation->inh, lockmode, context);
+}
+
+/*
+ * AlterTableInternal
+ *
+ * ALTER TABLE with target specified by OID
+ *
+ * We do not reject if the relation is already open, because it's quite
+ * likely that one or more layers of caller have it open. That means it
+ * is unsafe to use this entry point for alterations that could break
+ * existing query plans. On the assumption it's not used for such, we
+ * don't have to reject pending AFTER triggers, either.
+ *
+ * Also, since we don't have an AlterTableUtilityContext, this cannot be
+ * used for any subcommand types that require parse transformation or
+ * could generate subcommands that have to be passed to ProcessUtility.
+ */
+void
+AlterTableInternal(Oid relid, List *cmds, bool recurse)
+{
+ Relation rel;
+ LOCKMODE lockmode = AlterTableGetLockLevel(cmds);
+
+ rel = relation_open(relid, lockmode);
+
+ EventTriggerAlterTableRelid(relid);
+
+ ATController(NULL, rel, cmds, recurse, lockmode, NULL);
+}
+
+/*
+ * AlterTableGetLockLevel
+ *
+ * Sets the overall lock level required for the supplied list of subcommands.
+ * Policy for doing this set according to needs of AlterTable(), see
+ * comments there for overall explanation.
+ *
+ * Function is called before and after parsing, so it must give same
+ * answer each time it is called. Some subcommands are transformed
+ * into other subcommand types, so the transform must never be made to a
+ * lower lock level than previously assigned. All transforms are noted below.
+ *
+ * Since this is called before we lock the table we cannot use table metadata
+ * to influence the type of lock we acquire.
+ *
+ * There should be no lockmodes hardcoded into the subcommand functions. All
+ * lockmode decisions for ALTER TABLE are made here only. The one exception is
+ * ALTER TABLE RENAME which is treated as a different statement type T_RenameStmt
+ * and does not travel through this section of code and cannot be combined with
+ * any of the subcommands given here.
+ *
+ * Note that Hot Standby only knows about AccessExclusiveLocks on the primary
+ * so any changes that might affect SELECTs running on standbys need to use
+ * AccessExclusiveLocks even if you think a lesser lock would do, unless you
+ * have a solution for that also.
+ *
+ * Also note that pg_dump uses only an AccessShareLock, meaning that anything
+ * that takes a lock less than AccessExclusiveLock can change object definitions
+ * while pg_dump is running. Be careful to check that the appropriate data is
+ * derived by pg_dump using an MVCC snapshot, rather than syscache lookups,
+ * otherwise we might end up with an inconsistent dump that can't restore.
+ */
+LOCKMODE
+AlterTableGetLockLevel(List *cmds)
+{
+ /*
+ * This only works if we read catalog tables using MVCC snapshots.
+ */
+ ListCell *lcmd;
+ LOCKMODE lockmode = ShareUpdateExclusiveLock;
+
+ foreach(lcmd, cmds)
+ {
+ AlterTableCmd *cmd = (AlterTableCmd *) lfirst(lcmd);
+ LOCKMODE cmd_lockmode = AccessExclusiveLock; /* default for compiler */
+
+ switch (cmd->subtype)
+ {
+ /*
+ * These subcommands rewrite the heap, so require full locks.
+ */
+ case AT_AddColumn: /* may rewrite heap, in some cases and visible
+ * to SELECT */
+ case AT_SetAccessMethod: /* must rewrite heap */
+ case AT_SetTableSpace: /* must rewrite heap */
+ case AT_AlterColumnType: /* must rewrite heap */
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
+ /*
+ * These subcommands may require addition of toast tables. If
+ * we add a toast table to a table currently being scanned, we
+ * might miss data added to the new toast table by concurrent
+ * insert transactions.
+ */
+ case AT_SetStorage: /* may add toast tables, see
+ * ATRewriteCatalogs() */
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
+ /*
+ * Removing constraints can affect SELECTs that have been
+ * optimized assuming the constraint holds true. See also
+ * CloneFkReferenced.
+ */
+ case AT_DropConstraint: /* as DROP INDEX */
+ case AT_DropNotNull: /* may change some SQL plans */
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
+ /*
+ * Subcommands that may be visible to concurrent SELECTs
+ */
+ case AT_DropColumn: /* change visible to SELECT */
+ case AT_AddColumnToView: /* CREATE VIEW */
+ case AT_DropOids: /* used to equiv to DropColumn */
+ case AT_EnableAlwaysRule: /* may change SELECT rules */
+ case AT_EnableReplicaRule: /* may change SELECT rules */
+ case AT_EnableRule: /* may change SELECT rules */
+ case AT_DisableRule: /* may change SELECT rules */
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
+ /*
+ * Changing owner may remove implicit SELECT privileges
+ */
+ case AT_ChangeOwner: /* change visible to SELECT */
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
+ /*
+ * Changing foreign table options may affect optimization.
+ */
+ case AT_GenericOptions:
+ case AT_AlterColumnGenericOptions:
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
+ /*
+ * These subcommands affect write operations only.
+ */
+ case AT_EnableTrig:
+ case AT_EnableAlwaysTrig:
+ case AT_EnableReplicaTrig:
+ case AT_EnableTrigAll:
+ case AT_EnableTrigUser:
+ case AT_DisableTrig:
+ case AT_DisableTrigAll:
+ case AT_DisableTrigUser:
+ cmd_lockmode = ShareRowExclusiveLock;
+ break;
+
+ /*
+ * These subcommands affect write operations only. XXX
+ * Theoretically, these could be ShareRowExclusiveLock.
+ */
+ case AT_ColumnDefault:
+ case AT_CookedColumnDefault:
+ case AT_AlterConstraint:
+ case AT_AddIndex: /* from ADD CONSTRAINT */
+ case AT_AddIndexConstraint:
+ case AT_ReplicaIdentity:
+ case AT_SetNotNull:
+ case AT_EnableRowSecurity:
+ case AT_DisableRowSecurity:
+ case AT_ForceRowSecurity:
+ case AT_NoForceRowSecurity:
+ case AT_AddIdentity:
+ case AT_DropIdentity:
+ case AT_SetIdentity:
+ case AT_DropExpression:
+ case AT_SetCompression:
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
+ case AT_AddConstraint:
+ case AT_AddConstraintRecurse: /* becomes AT_AddConstraint */
+ case AT_ReAddConstraint: /* becomes AT_AddConstraint */
+ case AT_ReAddDomainConstraint: /* becomes AT_AddConstraint */
+ if (IsA(cmd->def, Constraint))
+ {
+ Constraint *con = (Constraint *) cmd->def;
+
+ switch (con->contype)
+ {
+ case CONSTR_EXCLUSION:
+ case CONSTR_PRIMARY:
+ case CONSTR_UNIQUE:
+
+ /*
+ * Cases essentially the same as CREATE INDEX. We
+ * could reduce the lock strength to ShareLock if
+ * we can work out how to allow concurrent catalog
+ * updates. XXX Might be set down to
+ * ShareRowExclusiveLock but requires further
+ * analysis.
+ */
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+ case CONSTR_FOREIGN:
+
+ /*
+ * We add triggers to both tables when we add a
+ * Foreign Key, so the lock level must be at least
+ * as strong as CREATE TRIGGER.
+ */
+ cmd_lockmode = ShareRowExclusiveLock;
+ break;
+
+ default:
+ cmd_lockmode = AccessExclusiveLock;
+ }
+ }
+ break;
+
+ /*
+ * These subcommands affect inheritance behaviour. Queries
+ * started before us will continue to see the old inheritance
+ * behaviour, while queries started after we commit will see
+ * new behaviour. No need to prevent reads or writes to the
+ * subtable while we hook it up though. Changing the TupDesc
+ * may be a problem, so keep highest lock.
+ */
+ case AT_AddInherit:
+ case AT_DropInherit:
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
+ /*
+ * These subcommands affect implicit row type conversion. They
+ * have affects similar to CREATE/DROP CAST on queries. don't
+ * provide for invalidating parse trees as a result of such
+ * changes, so we keep these at AccessExclusiveLock.
+ */
+ case AT_AddOf:
+ case AT_DropOf:
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
+ /*
+ * Only used by CREATE OR REPLACE VIEW which must conflict
+ * with an SELECTs currently using the view.
+ */
+ case AT_ReplaceRelOptions:
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
+ /*
+ * These subcommands affect general strategies for performance
+ * and maintenance, though don't change the semantic results
+ * from normal data reads and writes. Delaying an ALTER TABLE
+ * behind currently active writes only delays the point where
+ * the new strategy begins to take effect, so there is no
+ * benefit in waiting. In this case the minimum restriction
+ * applies: we don't currently allow concurrent catalog
+ * updates.
+ */
+ case AT_SetStatistics: /* Uses MVCC in getTableAttrs() */
+ case AT_ClusterOn: /* Uses MVCC in getIndexes() */
+ case AT_DropCluster: /* Uses MVCC in getIndexes() */
+ case AT_SetOptions: /* Uses MVCC in getTableAttrs() */
+ case AT_ResetOptions: /* Uses MVCC in getTableAttrs() */
+ cmd_lockmode = ShareUpdateExclusiveLock;
+ break;
+
+ case AT_SetLogged:
+ case AT_SetUnLogged:
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
+ case AT_ValidateConstraint: /* Uses MVCC in getConstraints() */
+ cmd_lockmode = ShareUpdateExclusiveLock;
+ break;
+
+ /*
+ * Rel options are more complex than first appears. Options
+ * are set here for tables, views and indexes; for historical
+ * reasons these can all be used with ALTER TABLE, so we can't
+ * decide between them using the basic grammar.
+ */
+ case AT_SetRelOptions: /* Uses MVCC in getIndexes() and
+ * getTables() */
+ case AT_ResetRelOptions: /* Uses MVCC in getIndexes() and
+ * getTables() */
+ cmd_lockmode = AlterTableGetRelOptionsLockLevel((List *) cmd->def);
+ break;
+
+ case AT_AttachPartition:
+ cmd_lockmode = ShareUpdateExclusiveLock;
+ break;
+
+ case AT_DetachPartition:
+ if (((PartitionCmd *) cmd->def)->concurrent)
+ cmd_lockmode = ShareUpdateExclusiveLock;
+ else
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
+ case AT_DetachPartitionFinalize:
+ cmd_lockmode = ShareUpdateExclusiveLock;
+ break;
+
+ case AT_CheckNotNull:
+
+ /*
+ * This only examines the table's schema; but lock must be
+ * strong enough to prevent concurrent DROP NOT NULL.
+ */
+ cmd_lockmode = AccessShareLock;
+ break;
+
+ default: /* oops */
+ elog(ERROR, "unrecognized alter table type: %d",
+ (int) cmd->subtype);
+ break;
+ }
+
+ /*
+ * Take the greatest lockmode from any subcommand
+ */
+ if (cmd_lockmode > lockmode)
+ lockmode = cmd_lockmode;
+ }
+
+ return lockmode;
+}
+
+/*
+ * ATController provides top level control over the phases.
+ *
+ * parsetree is passed in to allow it to be passed to event triggers
+ * when requested.
+ */
+static void
+ATController(AlterTableStmt *parsetree,
+ Relation rel, List *cmds, bool recurse, LOCKMODE lockmode,
+ AlterTableUtilityContext *context)
+{
+ List *wqueue = NIL;
+ ListCell *lcmd;
+
+ /* Phase 1: preliminary examination of commands, create work queue */
+ foreach(lcmd, cmds)
+ {
+ AlterTableCmd *cmd = (AlterTableCmd *) lfirst(lcmd);
+
+ ATPrepCmd(&wqueue, rel, cmd, recurse, false, lockmode, context);
+ }
+
+ /* Close the relation, but keep lock until commit */
+ relation_close(rel, NoLock);
+
+ /* Phase 2: update system catalogs */
+ ATRewriteCatalogs(&wqueue, lockmode, context);
+
+ /* Phase 3: scan/rewrite tables as needed, and run afterStmts */
+ ATRewriteTables(parsetree, &wqueue, lockmode, context);
+}
+
+/*
+ * ATPrepCmd
+ *
+ * Traffic cop for ALTER TABLE Phase 1 operations, including simple
+ * recursion and permission checks.
+ *
+ * Caller must have acquired appropriate lock type on relation already.
+ * This lock should be held until commit.
+ */
+static void
+ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
+ bool recurse, bool recursing, LOCKMODE lockmode,
+ AlterTableUtilityContext *context)
+{
+ AlteredTableInfo *tab;
+ int pass = AT_PASS_UNSET;
+
+ /* Find or create work queue entry for this table */
+ tab = ATGetQueueEntry(wqueue, rel);
+
+ /*
+ * Disallow any ALTER TABLE other than ALTER TABLE DETACH FINALIZE on
+ * partitions that are pending detach.
+ */
+ if (rel->rd_rel->relispartition &&
+ cmd->subtype != AT_DetachPartitionFinalize &&
+ PartitionHasPendingDetach(RelationGetRelid(rel)))
+ ereport(ERROR,
+ errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot alter partition \"%s\" with an incomplete detach",
+ RelationGetRelationName(rel)),
+ errhint("Use ALTER TABLE ... DETACH PARTITION ... FINALIZE to complete the pending detach operation."));
+
+ /*
+ * Copy the original subcommand for each table, so we can scribble on it.
+ * This avoids conflicts when different child tables need to make
+ * different parse transformations (for example, the same column may have
+ * different column numbers in different children).
+ */
+ cmd = copyObject(cmd);
+
+ /*
+ * Do permissions and relkind checking, recursion to child tables if
+ * needed, and any additional phase-1 processing needed. (But beware of
+ * adding any processing that looks at table details that another
+ * subcommand could change. In some cases we reject multiple subcommands
+ * that could try to change the same state in contrary ways.)
+ */
+ switch (cmd->subtype)
+ {
+ case AT_AddColumn: /* ADD COLUMN */
+ ATSimplePermissions(cmd->subtype, rel,
+ ATT_TABLE | ATT_COMPOSITE_TYPE | ATT_FOREIGN_TABLE);
+ ATPrepAddColumn(wqueue, rel, recurse, recursing, false, cmd,
+ lockmode, context);
+ /* Recursion occurs during execution phase */
+ pass = AT_PASS_ADD_COL;
+ break;
+ case AT_AddColumnToView: /* add column via CREATE OR REPLACE VIEW */
+ ATSimplePermissions(cmd->subtype, rel, ATT_VIEW);
+ ATPrepAddColumn(wqueue, rel, recurse, recursing, true, cmd,
+ lockmode, context);
+ /* Recursion occurs during execution phase */
+ pass = AT_PASS_ADD_COL;
+ break;
+ case AT_ColumnDefault: /* ALTER COLUMN DEFAULT */
+
+ /*
+ * We allow defaults on views so that INSERT into a view can have
+ * default-ish behavior. This works because the rewriter
+ * substitutes default values into INSERTs before it expands
+ * rules.
+ */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE);
+ ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+ /* No command-specific prep needed */
+ pass = cmd->def ? AT_PASS_ADD_OTHERCONSTR : AT_PASS_DROP;
+ break;
+ case AT_CookedColumnDefault: /* add a pre-cooked default */
+ /* This is currently used only in CREATE TABLE */
+ /* (so the permission check really isn't necessary) */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ /* This command never recurses */
+ pass = AT_PASS_ADD_OTHERCONSTR;
+ break;
+ case AT_AddIdentity:
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE);
+ /* This command never recurses */
+ pass = AT_PASS_ADD_OTHERCONSTR;
+ break;
+ case AT_SetIdentity:
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE);
+ /* This command never recurses */
+ /* This should run after AddIdentity, so do it in MISC pass */
+ pass = AT_PASS_MISC;
+ break;
+ case AT_DropIdentity:
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE);
+ /* This command never recurses */
+ pass = AT_PASS_DROP;
+ break;
+ case AT_DropNotNull: /* ALTER COLUMN DROP NOT NULL */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ ATPrepDropNotNull(rel, recurse, recursing);
+ ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+ pass = AT_PASS_DROP;
+ break;
+ case AT_SetNotNull: /* ALTER COLUMN SET NOT NULL */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ /* Need command-specific recursion decision */
+ ATPrepSetNotNull(wqueue, rel, cmd, recurse, recursing,
+ lockmode, context);
+ pass = AT_PASS_COL_ATTRS;
+ break;
+ case AT_CheckNotNull: /* check column is already marked NOT NULL */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+ /* No command-specific prep needed */
+ pass = AT_PASS_COL_ATTRS;
+ break;
+ case AT_DropExpression: /* ALTER COLUMN DROP EXPRESSION */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+ ATPrepDropExpression(rel, cmd, recurse, recursing, lockmode);
+ pass = AT_PASS_DROP;
+ break;
+ case AT_SetStatistics: /* ALTER COLUMN SET STATISTICS */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW | ATT_INDEX | ATT_PARTITIONED_INDEX | ATT_FOREIGN_TABLE);
+ ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
+ case AT_SetOptions: /* ALTER COLUMN SET ( options ) */
+ case AT_ResetOptions: /* ALTER COLUMN RESET ( options ) */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW | ATT_FOREIGN_TABLE);
+ /* This command never recurses */
+ pass = AT_PASS_MISC;
+ break;
+ case AT_SetStorage: /* ALTER COLUMN SET STORAGE */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW | ATT_FOREIGN_TABLE);
+ ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
+ case AT_SetCompression: /* ALTER COLUMN SET COMPRESSION */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW);
+ /* This command never recurses */
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
+ case AT_DropColumn: /* DROP COLUMN */
+ ATSimplePermissions(cmd->subtype, rel,
+ ATT_TABLE | ATT_COMPOSITE_TYPE | ATT_FOREIGN_TABLE);
+ ATPrepDropColumn(wqueue, rel, recurse, recursing, cmd,
+ lockmode, context);
+ /* Recursion occurs during execution phase */
+ pass = AT_PASS_DROP;
+ break;
+ case AT_AddIndex: /* ADD INDEX */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE);
+ /* This command never recurses */
+ /* No command-specific prep needed */
+ pass = AT_PASS_ADD_INDEX;
+ break;
+ case AT_AddConstraint: /* ADD CONSTRAINT */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ /* Recursion occurs during execution phase */
+ /* No command-specific prep needed except saving recurse flag */
+ if (recurse)
+ cmd->subtype = AT_AddConstraintRecurse;
+ pass = AT_PASS_ADD_CONSTR;
+ break;
+ case AT_AddIndexConstraint: /* ADD CONSTRAINT USING INDEX */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE);
+ /* This command never recurses */
+ /* No command-specific prep needed */
+ pass = AT_PASS_ADD_INDEXCONSTR;
+ break;
+ case AT_DropConstraint: /* DROP CONSTRAINT */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ ATCheckPartitionsNotInUse(rel, lockmode);
+ /* Other recursion occurs during execution phase */
+ /* No command-specific prep needed except saving recurse flag */
+ if (recurse)
+ cmd->subtype = AT_DropConstraintRecurse;
+ pass = AT_PASS_DROP;
+ break;
+ case AT_AlterColumnType: /* ALTER COLUMN TYPE */
+ ATSimplePermissions(cmd->subtype, rel,
+ ATT_TABLE | ATT_COMPOSITE_TYPE | ATT_FOREIGN_TABLE);
+ /* See comments for ATPrepAlterColumnType */
+ cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, recurse, lockmode,
+ AT_PASS_UNSET, context);
+ Assert(cmd != NULL);
+ /* Performs own recursion */
+ ATPrepAlterColumnType(wqueue, tab, rel, recurse, recursing, cmd,
+ lockmode, context);
+ pass = AT_PASS_ALTER_TYPE;
+ break;
+ case AT_AlterColumnGenericOptions:
+ ATSimplePermissions(cmd->subtype, rel, ATT_FOREIGN_TABLE);
+ /* This command never recurses */
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
+ case AT_ChangeOwner: /* ALTER OWNER */
+ /* This command never recurses */
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
+ case AT_ClusterOn: /* CLUSTER ON */
+ case AT_DropCluster: /* SET WITHOUT CLUSTER */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW);
+ /* These commands never recurse */
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
+ case AT_SetLogged: /* SET LOGGED */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_SEQUENCE);
+ if (tab->chgPersistence)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot change persistence setting twice")));
+ tab->chgPersistence = ATPrepChangePersistence(rel, true);
+ /* force rewrite if necessary; see comment in ATRewriteTables */
+ if (tab->chgPersistence)
+ {
+ tab->rewrite |= AT_REWRITE_ALTER_PERSISTENCE;
+ tab->newrelpersistence = RELPERSISTENCE_PERMANENT;
+ }
+ pass = AT_PASS_MISC;
+ break;
+ case AT_SetUnLogged: /* SET UNLOGGED */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_SEQUENCE);
+ if (tab->chgPersistence)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot change persistence setting twice")));
+ tab->chgPersistence = ATPrepChangePersistence(rel, false);
+ /* force rewrite if necessary; see comment in ATRewriteTables */
+ if (tab->chgPersistence)
+ {
+ tab->rewrite |= AT_REWRITE_ALTER_PERSISTENCE;
+ tab->newrelpersistence = RELPERSISTENCE_UNLOGGED;
+ }
+ pass = AT_PASS_MISC;
+ break;
+ case AT_DropOids: /* SET WITHOUT OIDS */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ pass = AT_PASS_DROP;
+ break;
+ case AT_SetAccessMethod: /* SET ACCESS METHOD */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW);
+
+ /* partitioned tables don't have an access method */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot change access method of a partitioned table")));
+
+ /* check if another access method change was already requested */
+ if (OidIsValid(tab->newAccessMethod))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot have multiple SET ACCESS METHOD subcommands")));
+
+ ATPrepSetAccessMethod(tab, rel, cmd->name);
+ pass = AT_PASS_MISC; /* does not matter; no work in Phase 2 */
+ break;
+ case AT_SetTableSpace: /* SET TABLESPACE */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW | ATT_INDEX |
+ ATT_PARTITIONED_INDEX);
+ /* This command never recurses */
+ ATPrepSetTableSpace(tab, rel, cmd->name, lockmode);
+ pass = AT_PASS_MISC; /* doesn't actually matter */
+ break;
+ case AT_SetRelOptions: /* SET (...) */
+ case AT_ResetRelOptions: /* RESET (...) */
+ case AT_ReplaceRelOptions: /* reset them all, then set just these */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_VIEW | ATT_MATVIEW | ATT_INDEX);
+ /* This command never recurses */
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
+ case AT_AddInherit: /* INHERIT */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ /* This command never recurses */
+ ATPrepAddInherit(rel);
+ pass = AT_PASS_MISC;
+ break;
+ case AT_DropInherit: /* NO INHERIT */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ /* This command never recurses */
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
+ case AT_AlterConstraint: /* ALTER CONSTRAINT */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE);
+ /* Recursion occurs during execution phase */
+ pass = AT_PASS_MISC;
+ break;
+ case AT_ValidateConstraint: /* VALIDATE CONSTRAINT */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ /* Recursion occurs during execution phase */
+ /* No command-specific prep needed except saving recurse flag */
+ if (recurse)
+ cmd->subtype = AT_ValidateConstraintRecurse;
+ pass = AT_PASS_MISC;
+ break;
+ case AT_ReplicaIdentity: /* REPLICA IDENTITY ... */
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW);
+ pass = AT_PASS_MISC;
+ /* This command never recurses */
+ /* No command-specific prep needed */
+ break;
+ case AT_EnableTrig: /* ENABLE TRIGGER variants */
+ case AT_EnableAlwaysTrig:
+ case AT_EnableReplicaTrig:
+ case AT_EnableTrigAll:
+ case AT_EnableTrigUser:
+ case AT_DisableTrig: /* DISABLE TRIGGER variants */
+ case AT_DisableTrigAll:
+ case AT_DisableTrigUser:
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ /* Set up recursion for phase 2; no other prep needed */
+ if (recurse)
+ cmd->recurse = true;
+ pass = AT_PASS_MISC;
+ break;
+ case AT_EnableRule: /* ENABLE/DISABLE RULE variants */
+ case AT_EnableAlwaysRule:
+ case AT_EnableReplicaRule:
+ case AT_DisableRule:
+ case AT_AddOf: /* OF */
+ case AT_DropOf: /* NOT OF */
+ case AT_EnableRowSecurity:
+ case AT_DisableRowSecurity:
+ case AT_ForceRowSecurity:
+ case AT_NoForceRowSecurity:
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE);
+ /* These commands never recurse */
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
+ case AT_GenericOptions:
+ ATSimplePermissions(cmd->subtype, rel, ATT_FOREIGN_TABLE);
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
+ case AT_AttachPartition:
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_PARTITIONED_INDEX);
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
+ case AT_DetachPartition:
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE);
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
+ case AT_DetachPartitionFinalize:
+ ATSimplePermissions(cmd->subtype, rel, ATT_TABLE);
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
+ default: /* oops */
+ elog(ERROR, "unrecognized alter table type: %d",
+ (int) cmd->subtype);
+ pass = AT_PASS_UNSET; /* keep compiler quiet */
+ break;
+ }
+ Assert(pass > AT_PASS_UNSET);
+
+ /* Add the subcommand to the appropriate list for phase 2 */
+ tab->subcmds[pass] = lappend(tab->subcmds[pass], cmd);
+}
+
+/*
+ * ATRewriteCatalogs
+ *
+ * Traffic cop for ALTER TABLE Phase 2 operations. Subcommands are
+ * dispatched in a "safe" execution order (designed to avoid unnecessary
+ * conflicts).
+ */
+static void
+ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode,
+ AlterTableUtilityContext *context)
+{
+ int pass;
+ ListCell *ltab;
+
+ /*
+ * We process all the tables "in parallel", one pass at a time. This is
+ * needed because we may have to propagate work from one table to another
+ * (specifically, ALTER TYPE on a foreign key's PK has to dispatch the
+ * re-adding of the foreign key constraint to the other table). Work can
+ * only be propagated into later passes, however.
+ */
+ for (pass = 0; pass < AT_NUM_PASSES; pass++)
+ {
+ /* Go through each table that needs to be processed */
+ foreach(ltab, *wqueue)
+ {
+ AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+ List *subcmds = tab->subcmds[pass];
+ ListCell *lcmd;
+
+ if (subcmds == NIL)
+ continue;
+
+ /*
+ * Open the relation and store it in tab. This allows subroutines
+ * close and reopen, if necessary. Appropriate lock was obtained
+ * by phase 1, needn't get it again.
+ */
+ tab->rel = relation_open(tab->relid, NoLock);
+
+ foreach(lcmd, subcmds)
+ ATExecCmd(wqueue, tab,
+ lfirst_node(AlterTableCmd, lcmd),
+ lockmode, pass, context);
+
+ /*
+ * After the ALTER TYPE pass, do cleanup work (this is not done in
+ * ATExecAlterColumnType since it should be done only once if
+ * multiple columns of a table are altered).
+ */
+ if (pass == AT_PASS_ALTER_TYPE)
+ ATPostAlterTypeCleanup(wqueue, tab, lockmode);
+
+ if (tab->rel)
+ {
+ relation_close(tab->rel, NoLock);
+ tab->rel = NULL;
+ }
+ }
+ }
+
+ /* Check to see if a toast table must be added. */
+ foreach(ltab, *wqueue)
+ {
+ AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+
+ /*
+ * If the table is source table of ATTACH PARTITION command, we did
+ * not modify anything about it that will change its toasting
+ * requirement, so no need to check.
+ */
+ if (((tab->relkind == RELKIND_RELATION ||
+ tab->relkind == RELKIND_PARTITIONED_TABLE) &&
+ tab->partition_constraint == NULL) ||
+ tab->relkind == RELKIND_MATVIEW)
+ AlterTableCreateToastTable(tab->relid, (Datum) 0, lockmode);
+ }
+}
+
+/*
+ * ATExecCmd: dispatch a subcommand to appropriate execution routine
+ */
+static void
+ATExecCmd(List **wqueue, AlteredTableInfo *tab,
+ AlterTableCmd *cmd, LOCKMODE lockmode, int cur_pass,
+ AlterTableUtilityContext *context)
+{
+ ObjectAddress address = InvalidObjectAddress;
+ Relation rel = tab->rel;
+
+ switch (cmd->subtype)
+ {
+ case AT_AddColumn: /* ADD COLUMN */
+ case AT_AddColumnToView: /* add column via CREATE OR REPLACE VIEW */
+ address = ATExecAddColumn(wqueue, tab, rel, &cmd,
+ false, false,
+ lockmode, cur_pass, context);
+ break;
+ case AT_AddColumnRecurse:
+ address = ATExecAddColumn(wqueue, tab, rel, &cmd,
+ true, false,
+ lockmode, cur_pass, context);
+ break;
+ case AT_ColumnDefault: /* ALTER COLUMN DEFAULT */
+ address = ATExecColumnDefault(rel, cmd->name, cmd->def, lockmode);
+ break;
+ case AT_CookedColumnDefault: /* add a pre-cooked default */
+ address = ATExecCookedColumnDefault(rel, cmd->num, cmd->def);
+ break;
+ case AT_AddIdentity:
+ cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode,
+ cur_pass, context);
+ Assert(cmd != NULL);
+ address = ATExecAddIdentity(rel, cmd->name, cmd->def, lockmode);
+ break;
+ case AT_SetIdentity:
+ cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode,
+ cur_pass, context);
+ Assert(cmd != NULL);
+ address = ATExecSetIdentity(rel, cmd->name, cmd->def, lockmode);
+ break;
+ case AT_DropIdentity:
+ address = ATExecDropIdentity(rel, cmd->name, cmd->missing_ok, lockmode);
+ break;
+ case AT_DropNotNull: /* ALTER COLUMN DROP NOT NULL */
+ address = ATExecDropNotNull(rel, cmd->name, lockmode);
+ break;
+ case AT_SetNotNull: /* ALTER COLUMN SET NOT NULL */
+ address = ATExecSetNotNull(tab, rel, cmd->name, lockmode);
+ break;
+ case AT_CheckNotNull: /* check column is already marked NOT NULL */
+ ATExecCheckNotNull(tab, rel, cmd->name, lockmode);
+ break;
+ case AT_DropExpression:
+ address = ATExecDropExpression(rel, cmd->name, cmd->missing_ok, lockmode);
+ break;
+ case AT_SetStatistics: /* ALTER COLUMN SET STATISTICS */
+ address = ATExecSetStatistics(rel, cmd->name, cmd->num, cmd->def, lockmode);
+ break;
+ case AT_SetOptions: /* ALTER COLUMN SET ( options ) */
+ address = ATExecSetOptions(rel, cmd->name, cmd->def, false, lockmode);
+ break;
+ case AT_ResetOptions: /* ALTER COLUMN RESET ( options ) */
+ address = ATExecSetOptions(rel, cmd->name, cmd->def, true, lockmode);
+ break;
+ case AT_SetStorage: /* ALTER COLUMN SET STORAGE */
+ address = ATExecSetStorage(rel, cmd->name, cmd->def, lockmode);
+ break;
+ case AT_SetCompression:
+ address = ATExecSetCompression(tab, rel, cmd->name, cmd->def,
+ lockmode);
+ break;
+ case AT_DropColumn: /* DROP COLUMN */
+ address = ATExecDropColumn(wqueue, rel, cmd->name,
+ cmd->behavior, false, false,
+ cmd->missing_ok, lockmode,
+ NULL);
+ break;
+ case AT_DropColumnRecurse: /* DROP COLUMN with recursion */
+ address = ATExecDropColumn(wqueue, rel, cmd->name,
+ cmd->behavior, true, false,
+ cmd->missing_ok, lockmode,
+ NULL);
+ break;
+ case AT_AddIndex: /* ADD INDEX */
+ address = ATExecAddIndex(tab, rel, (IndexStmt *) cmd->def, false,
+ lockmode);
+ break;
+ case AT_ReAddIndex: /* ADD INDEX */
+ address = ATExecAddIndex(tab, rel, (IndexStmt *) cmd->def, true,
+ lockmode);
+ break;
+ case AT_ReAddStatistics: /* ADD STATISTICS */
+ address = ATExecAddStatistics(tab, rel, (CreateStatsStmt *) cmd->def,
+ true, lockmode);
+ break;
+ case AT_AddConstraint: /* ADD CONSTRAINT */
+ /* Transform the command only during initial examination */
+ if (cur_pass == AT_PASS_ADD_CONSTR)
+ cmd = ATParseTransformCmd(wqueue, tab, rel, cmd,
+ false, lockmode,
+ cur_pass, context);
+ /* Depending on constraint type, might be no more work to do now */
+ if (cmd != NULL)
+ address =
+ ATExecAddConstraint(wqueue, tab, rel,
+ (Constraint *) cmd->def,
+ false, false, lockmode);
+ break;
+ case AT_AddConstraintRecurse: /* ADD CONSTRAINT with recursion */
+ /* Transform the command only during initial examination */
+ if (cur_pass == AT_PASS_ADD_CONSTR)
+ cmd = ATParseTransformCmd(wqueue, tab, rel, cmd,
+ true, lockmode,
+ cur_pass, context);
+ /* Depending on constraint type, might be no more work to do now */
+ if (cmd != NULL)
+ address =
+ ATExecAddConstraint(wqueue, tab, rel,
+ (Constraint *) cmd->def,
+ true, false, lockmode);
+ break;
+ case AT_ReAddConstraint: /* Re-add pre-existing check constraint */
+ address =
+ ATExecAddConstraint(wqueue, tab, rel, (Constraint *) cmd->def,
+ true, true, lockmode);
+ break;
+ case AT_ReAddDomainConstraint: /* Re-add pre-existing domain check
+ * constraint */
+ address =
+ AlterDomainAddConstraint(((AlterDomainStmt *) cmd->def)->typeName,
+ ((AlterDomainStmt *) cmd->def)->def,
+ NULL);
+ break;
+ case AT_ReAddComment: /* Re-add existing comment */
+ address = CommentObject((CommentStmt *) cmd->def);
+ break;
+ case AT_AddIndexConstraint: /* ADD CONSTRAINT USING INDEX */
+ address = ATExecAddIndexConstraint(tab, rel, (IndexStmt *) cmd->def,
+ lockmode);
+ break;
+ case AT_AlterConstraint: /* ALTER CONSTRAINT */
+ address = ATExecAlterConstraint(rel, cmd, false, false, lockmode);
+ break;
+ case AT_ValidateConstraint: /* VALIDATE CONSTRAINT */
+ address = ATExecValidateConstraint(wqueue, rel, cmd->name, false,
+ false, lockmode);
+ break;
+ case AT_ValidateConstraintRecurse: /* VALIDATE CONSTRAINT with
+ * recursion */
+ address = ATExecValidateConstraint(wqueue, rel, cmd->name, true,
+ false, lockmode);
+ break;
+ case AT_DropConstraint: /* DROP CONSTRAINT */
+ ATExecDropConstraint(rel, cmd->name, cmd->behavior,
+ false, false,
+ cmd->missing_ok, lockmode);
+ break;
+ case AT_DropConstraintRecurse: /* DROP CONSTRAINT with recursion */
+ ATExecDropConstraint(rel, cmd->name, cmd->behavior,
+ true, false,
+ cmd->missing_ok, lockmode);
+ break;
+ case AT_AlterColumnType: /* ALTER COLUMN TYPE */
+ /* parse transformation was done earlier */
+ address = ATExecAlterColumnType(tab, rel, cmd, lockmode);
+ break;
+ case AT_AlterColumnGenericOptions: /* ALTER COLUMN OPTIONS */
+ address =
+ ATExecAlterColumnGenericOptions(rel, cmd->name,
+ (List *) cmd->def, lockmode);
+ break;
+ case AT_ChangeOwner: /* ALTER OWNER */
+ ATExecChangeOwner(RelationGetRelid(rel),
+ get_rolespec_oid(cmd->newowner, false),
+ false, lockmode);
+ break;
+ case AT_ClusterOn: /* CLUSTER ON */
+ address = ATExecClusterOn(rel, cmd->name, lockmode);
+ break;
+ case AT_DropCluster: /* SET WITHOUT CLUSTER */
+ ATExecDropCluster(rel, lockmode);
+ break;
+ case AT_SetLogged: /* SET LOGGED */
+ case AT_SetUnLogged: /* SET UNLOGGED */
+ break;
+ case AT_DropOids: /* SET WITHOUT OIDS */
+ /* nothing to do here, oid columns don't exist anymore */
+ break;
+ case AT_SetAccessMethod: /* SET ACCESS METHOD */
+ /* handled specially in Phase 3 */
+ break;
+ case AT_SetTableSpace: /* SET TABLESPACE */
+
+ /*
+ * Only do this for partitioned tables and indexes, for which this
+ * is just a catalog change. Other relation types which have
+ * storage are handled by Phase 3.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
+ ATExecSetTableSpaceNoStorage(rel, tab->newTableSpace);
+
+ break;
+ case AT_SetRelOptions: /* SET (...) */
+ case AT_ResetRelOptions: /* RESET (...) */
+ case AT_ReplaceRelOptions: /* replace entire option list */
+ ATExecSetRelOptions(rel, (List *) cmd->def, cmd->subtype, lockmode);
+ break;
+ case AT_EnableTrig: /* ENABLE TRIGGER name */
+ ATExecEnableDisableTrigger(rel, cmd->name,
+ TRIGGER_FIRES_ON_ORIGIN, false,
+ cmd->recurse,
+ lockmode);
+ break;
+ case AT_EnableAlwaysTrig: /* ENABLE ALWAYS TRIGGER name */
+ ATExecEnableDisableTrigger(rel, cmd->name,
+ TRIGGER_FIRES_ALWAYS, false,
+ cmd->recurse,
+ lockmode);
+ break;
+ case AT_EnableReplicaTrig: /* ENABLE REPLICA TRIGGER name */
+ ATExecEnableDisableTrigger(rel, cmd->name,
+ TRIGGER_FIRES_ON_REPLICA, false,
+ cmd->recurse,
+ lockmode);
+ break;
+ case AT_DisableTrig: /* DISABLE TRIGGER name */
+ ATExecEnableDisableTrigger(rel, cmd->name,
+ TRIGGER_DISABLED, false,
+ cmd->recurse,
+ lockmode);
+ break;
+ case AT_EnableTrigAll: /* ENABLE TRIGGER ALL */
+ ATExecEnableDisableTrigger(rel, NULL,
+ TRIGGER_FIRES_ON_ORIGIN, false,
+ cmd->recurse,
+ lockmode);
+ break;
+ case AT_DisableTrigAll: /* DISABLE TRIGGER ALL */
+ ATExecEnableDisableTrigger(rel, NULL,
+ TRIGGER_DISABLED, false,
+ cmd->recurse,
+ lockmode);
+ break;
+ case AT_EnableTrigUser: /* ENABLE TRIGGER USER */
+ ATExecEnableDisableTrigger(rel, NULL,
+ TRIGGER_FIRES_ON_ORIGIN, true,
+ cmd->recurse,
+ lockmode);
+ break;
+ case AT_DisableTrigUser: /* DISABLE TRIGGER USER */
+ ATExecEnableDisableTrigger(rel, NULL,
+ TRIGGER_DISABLED, true,
+ cmd->recurse,
+ lockmode);
+ break;
+
+ case AT_EnableRule: /* ENABLE RULE name */
+ ATExecEnableDisableRule(rel, cmd->name,
+ RULE_FIRES_ON_ORIGIN, lockmode);
+ break;
+ case AT_EnableAlwaysRule: /* ENABLE ALWAYS RULE name */
+ ATExecEnableDisableRule(rel, cmd->name,
+ RULE_FIRES_ALWAYS, lockmode);
+ break;
+ case AT_EnableReplicaRule: /* ENABLE REPLICA RULE name */
+ ATExecEnableDisableRule(rel, cmd->name,
+ RULE_FIRES_ON_REPLICA, lockmode);
+ break;
+ case AT_DisableRule: /* DISABLE RULE name */
+ ATExecEnableDisableRule(rel, cmd->name,
+ RULE_DISABLED, lockmode);
+ break;
+
+ case AT_AddInherit:
+ address = ATExecAddInherit(rel, (RangeVar *) cmd->def, lockmode);
+ break;
+ case AT_DropInherit:
+ address = ATExecDropInherit(rel, (RangeVar *) cmd->def, lockmode);
+ break;
+ case AT_AddOf:
+ address = ATExecAddOf(rel, (TypeName *) cmd->def, lockmode);
+ break;
+ case AT_DropOf:
+ ATExecDropOf(rel, lockmode);
+ break;
+ case AT_ReplicaIdentity:
+ ATExecReplicaIdentity(rel, (ReplicaIdentityStmt *) cmd->def, lockmode);
+ break;
+ case AT_EnableRowSecurity:
+ ATExecSetRowSecurity(rel, true);
+ break;
+ case AT_DisableRowSecurity:
+ ATExecSetRowSecurity(rel, false);
+ break;
+ case AT_ForceRowSecurity:
+ ATExecForceNoForceRowSecurity(rel, true);
+ break;
+ case AT_NoForceRowSecurity:
+ ATExecForceNoForceRowSecurity(rel, false);
+ break;
+ case AT_GenericOptions:
+ ATExecGenericOptions(rel, (List *) cmd->def);
+ break;
+ case AT_AttachPartition:
+ cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode,
+ cur_pass, context);
+ Assert(cmd != NULL);
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ATExecAttachPartition(wqueue, rel, (PartitionCmd *) cmd->def,
+ context);
+ else
+ ATExecAttachPartitionIdx(wqueue, rel,
+ ((PartitionCmd *) cmd->def)->name);
+ break;
+ case AT_DetachPartition:
+ cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode,
+ cur_pass, context);
+ Assert(cmd != NULL);
+ /* ATPrepCmd ensures it must be a table */
+ Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+ ATExecDetachPartition(wqueue, tab, rel,
+ ((PartitionCmd *) cmd->def)->name,
+ ((PartitionCmd *) cmd->def)->concurrent);
+ break;
+ case AT_DetachPartitionFinalize:
+ ATExecDetachPartitionFinalize(rel, ((PartitionCmd *) cmd->def)->name);
+ break;
+ default: /* oops */
+ elog(ERROR, "unrecognized alter table type: %d",
+ (int) cmd->subtype);
+ break;
+ }
+
+ /*
+ * Report the subcommand to interested event triggers.
+ */
+ if (cmd)
+ EventTriggerCollectAlterTableSubcmd((Node *) cmd, address);
+
+ /*
+ * Bump the command counter to ensure the next subcommand in the sequence
+ * can see the changes so far
+ */
+ CommandCounterIncrement();
+}
+
+/*
+ * ATParseTransformCmd: perform parse transformation for one subcommand
+ *
+ * Returns the transformed subcommand tree, if there is one, else NULL.
+ *
+ * The parser may hand back additional AlterTableCmd(s) and/or other
+ * utility statements, either before or after the original subcommand.
+ * Other AlterTableCmds are scheduled into the appropriate slot of the
+ * AlteredTableInfo (they had better be for later passes than the current one).
+ * Utility statements that are supposed to happen before the AlterTableCmd
+ * are executed immediately. Those that are supposed to happen afterwards
+ * are added to the tab->afterStmts list to be done at the very end.
+ */
+static AlterTableCmd *
+ATParseTransformCmd(List **wqueue, AlteredTableInfo *tab, Relation rel,
+ AlterTableCmd *cmd, bool recurse, LOCKMODE lockmode,
+ int cur_pass, AlterTableUtilityContext *context)
+{
+ AlterTableCmd *newcmd = NULL;
+ AlterTableStmt *atstmt = makeNode(AlterTableStmt);
+ List *beforeStmts;
+ List *afterStmts;
+ ListCell *lc;
+
+ /* Gin up an AlterTableStmt with just this subcommand and this table */
+ atstmt->relation =
+ makeRangeVar(get_namespace_name(RelationGetNamespace(rel)),
+ pstrdup(RelationGetRelationName(rel)),
+ -1);
+ atstmt->relation->inh = recurse;
+ atstmt->cmds = list_make1(cmd);
+ atstmt->objtype = OBJECT_TABLE; /* needn't be picky here */
+ atstmt->missing_ok = false;
+
+ /* Transform the AlterTableStmt */
+ atstmt = transformAlterTableStmt(RelationGetRelid(rel),
+ atstmt,
+ context->queryString,
+ &beforeStmts,
+ &afterStmts);
+
+ /* Execute any statements that should happen before these subcommand(s) */
+ foreach(lc, beforeStmts)
+ {
+ Node *stmt = (Node *) lfirst(lc);
+
+ ProcessUtilityForAlterTable(stmt, context);
+ CommandCounterIncrement();
+ }
+
+ /* Examine the transformed subcommands and schedule them appropriately */
+ foreach(lc, atstmt->cmds)
+ {
+ AlterTableCmd *cmd2 = lfirst_node(AlterTableCmd, lc);
+ int pass;
+
+ /*
+ * This switch need only cover the subcommand types that can be added
+ * by parse_utilcmd.c; otherwise, we'll use the default strategy of
+ * executing the subcommand immediately, as a substitute for the
+ * original subcommand. (Note, however, that this does cause
+ * AT_AddConstraint subcommands to be rescheduled into later passes,
+ * which is important for index and foreign key constraints.)
+ *
+ * We assume we needn't do any phase-1 checks for added subcommands.
+ */
+ switch (cmd2->subtype)
+ {
+ case AT_SetNotNull:
+ /* Need command-specific recursion decision */
+ ATPrepSetNotNull(wqueue, rel, cmd2,
+ recurse, false,
+ lockmode, context);
+ pass = AT_PASS_COL_ATTRS;
+ break;
+ case AT_AddIndex:
+ /* This command never recurses */
+ /* No command-specific prep needed */
+ pass = AT_PASS_ADD_INDEX;
+ break;
+ case AT_AddIndexConstraint:
+ /* This command never recurses */
+ /* No command-specific prep needed */
+ pass = AT_PASS_ADD_INDEXCONSTR;
+ break;
+ case AT_AddConstraint:
+ /* Recursion occurs during execution phase */
+ if (recurse)
+ cmd2->subtype = AT_AddConstraintRecurse;
+ switch (castNode(Constraint, cmd2->def)->contype)
+ {
+ case CONSTR_PRIMARY:
+ case CONSTR_UNIQUE:
+ case CONSTR_EXCLUSION:
+ pass = AT_PASS_ADD_INDEXCONSTR;
+ break;
+ default:
+ pass = AT_PASS_ADD_OTHERCONSTR;
+ break;
+ }
+ break;
+ case AT_AlterColumnGenericOptions:
+ /* This command never recurses */
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
+ default:
+ pass = cur_pass;
+ break;
+ }
+
+ if (pass < cur_pass)
+ {
+ /* Cannot schedule into a pass we already finished */
+ elog(ERROR, "ALTER TABLE scheduling failure: too late for pass %d",
+ pass);
+ }
+ else if (pass > cur_pass)
+ {
+ /* OK, queue it up for later */
+ tab->subcmds[pass] = lappend(tab->subcmds[pass], cmd2);
+ }
+ else
+ {
+ /*
+ * We should see at most one subcommand for the current pass,
+ * which is the transformed version of the original subcommand.
+ */
+ if (newcmd == NULL && cmd->subtype == cmd2->subtype)
+ {
+ /* Found the transformed version of our subcommand */
+ newcmd = cmd2;
+ }
+ else
+ elog(ERROR, "ALTER TABLE scheduling failure: bogus item for pass %d",
+ pass);
+ }
+ }
+
+ /* Queue up any after-statements to happen at the end */
+ tab->afterStmts = list_concat(tab->afterStmts, afterStmts);
+
+ return newcmd;
+}
+
+/*
+ * ATRewriteTables: ALTER TABLE phase 3
+ */
+static void
+ATRewriteTables(AlterTableStmt *parsetree, List **wqueue, LOCKMODE lockmode,
+ AlterTableUtilityContext *context)
+{
+ ListCell *ltab;
+
+ /* Go through each table that needs to be checked or rewritten */
+ foreach(ltab, *wqueue)
+ {
+ AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+
+ /* Relations without storage may be ignored here */
+ if (!RELKIND_HAS_STORAGE(tab->relkind))
+ continue;
+
+ /*
+ * If we change column data types, the operation has to be propagated
+ * to tables that use this table's rowtype as a column type.
+ * tab->newvals will also be non-NULL in the case where we're adding a
+ * column with a default. We choose to forbid that case as well,
+ * since composite types might eventually support defaults.
+ *
+ * (Eventually we'll probably need to check for composite type
+ * dependencies even when we're just scanning the table without a
+ * rewrite, but at the moment a composite type does not enforce any
+ * constraints, so it's not necessary/appropriate to enforce them just
+ * during ALTER.)
+ */
+ if (tab->newvals != NIL || tab->rewrite > 0)
+ {
+ Relation rel;
+
+ rel = table_open(tab->relid, NoLock);
+ find_composite_type_dependencies(rel->rd_rel->reltype, rel, NULL);
+ table_close(rel, NoLock);
+ }
+
+ /*
+ * We only need to rewrite the table if at least one column needs to
+ * be recomputed, or we are changing its persistence or access method.
+ *
+ * There are two reasons for requiring a rewrite when changing
+ * persistence: on one hand, we need to ensure that the buffers
+ * belonging to each of the two relations are marked with or without
+ * BM_PERMANENT properly. On the other hand, since rewriting creates
+ * and assigns a new relfilenode, we automatically create or drop an
+ * init fork for the relation as appropriate.
+ */
+ if (tab->rewrite > 0 && tab->relkind != RELKIND_SEQUENCE)
+ {
+ /* Build a temporary relation and copy data */
+ Relation OldHeap;
+ Oid OIDNewHeap;
+ Oid NewAccessMethod;
+ Oid NewTableSpace;
+ char persistence;
+
+ OldHeap = table_open(tab->relid, NoLock);
+
+ /*
+ * We don't support rewriting of system catalogs; there are too
+ * many corner cases and too little benefit. In particular this
+ * is certainly not going to work for mapped catalogs.
+ */
+ if (IsSystemRelation(OldHeap))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot rewrite system relation \"%s\"",
+ RelationGetRelationName(OldHeap))));
+
+ if (RelationIsUsedAsCatalogTable(OldHeap))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot rewrite table \"%s\" used as a catalog table",
+ RelationGetRelationName(OldHeap))));
+
+ /*
+ * Don't allow rewrite on temp tables of other backends ... their
+ * local buffer manager is not going to cope.
+ */
+ if (RELATION_IS_OTHER_TEMP(OldHeap))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot rewrite temporary tables of other sessions")));
+
+ /*
+ * Select destination tablespace (same as original unless user
+ * requested a change)
+ */
+ if (tab->newTableSpace)
+ NewTableSpace = tab->newTableSpace;
+ else
+ NewTableSpace = OldHeap->rd_rel->reltablespace;
+
+ /*
+ * Select destination access method (same as original unless user
+ * requested a change)
+ */
+ if (OidIsValid(tab->newAccessMethod))
+ NewAccessMethod = tab->newAccessMethod;
+ else
+ NewAccessMethod = OldHeap->rd_rel->relam;
+
+ /*
+ * Select persistence of transient table (same as original unless
+ * user requested a change)
+ */
+ persistence = tab->chgPersistence ?
+ tab->newrelpersistence : OldHeap->rd_rel->relpersistence;
+
+ table_close(OldHeap, NoLock);
+
+ /*
+ * Fire off an Event Trigger now, before actually rewriting the
+ * table.
+ *
+ * We don't support Event Trigger for nested commands anywhere,
+ * here included, and parsetree is given NULL when coming from
+ * AlterTableInternal.
+ *
+ * And fire it only once.
+ */
+ if (parsetree)
+ EventTriggerTableRewrite((Node *) parsetree,
+ tab->relid,
+ tab->rewrite);
+
+ /*
+ * Create transient table that will receive the modified data.
+ *
+ * Ensure it is marked correctly as logged or unlogged. We have
+ * to do this here so that buffers for the new relfilenode will
+ * have the right persistence set, and at the same time ensure
+ * that the original filenode's buffers will get read in with the
+ * correct setting (i.e. the original one). Otherwise a rollback
+ * after the rewrite would possibly result with buffers for the
+ * original filenode having the wrong persistence setting.
+ *
+ * NB: This relies on swap_relation_files() also swapping the
+ * persistence. That wouldn't work for pg_class, but that can't be
+ * unlogged anyway.
+ */
+ OIDNewHeap = make_new_heap(tab->relid, NewTableSpace, NewAccessMethod,
+ persistence, lockmode);
+
+ /*
+ * Copy the heap data into the new table with the desired
+ * modifications, and test the current data within the table
+ * against new constraints generated by ALTER TABLE commands.
+ */
+ ATRewriteTable(tab, OIDNewHeap, lockmode);
+
+ /*
+ * Swap the physical files of the old and new heaps, then rebuild
+ * indexes and discard the old heap. We can use RecentXmin for
+ * the table's new relfrozenxid because we rewrote all the tuples
+ * in ATRewriteTable, so no older Xid remains in the table. Also,
+ * we never try to swap toast tables by content, since we have no
+ * interest in letting this code work on system catalogs.
+ */
+ finish_heap_swap(tab->relid, OIDNewHeap,
+ false, false, true,
+ !OidIsValid(tab->newTableSpace),
+ RecentXmin,
+ ReadNextMultiXactId(),
+ persistence);
+
+ InvokeObjectPostAlterHook(RelationRelationId, tab->relid, 0);
+ }
+ else if (tab->rewrite > 0 && tab->relkind == RELKIND_SEQUENCE)
+ {
+ if (tab->chgPersistence)
+ SequenceChangePersistence(tab->relid, tab->newrelpersistence);
+ }
+ else
+ {
+ /*
+ * If required, test the current data within the table against new
+ * constraints generated by ALTER TABLE commands, but don't
+ * rebuild data.
+ */
+ if (tab->constraints != NIL || tab->verify_new_notnull ||
+ tab->partition_constraint != NULL)
+ ATRewriteTable(tab, InvalidOid, lockmode);
+
+ /*
+ * If we had SET TABLESPACE but no reason to reconstruct tuples,
+ * just do a block-by-block copy.
+ */
+ if (tab->newTableSpace)
+ ATExecSetTableSpace(tab->relid, tab->newTableSpace, lockmode);
+ }
+
+ /*
+ * Also change persistence of owned sequences, so that it matches the
+ * table persistence.
+ */
+ if (tab->chgPersistence)
+ {
+ List *seqlist = getOwnedSequences(tab->relid);
+ ListCell *lc;
+
+ foreach(lc, seqlist)
+ {
+ Oid seq_relid = lfirst_oid(lc);
+
+ SequenceChangePersistence(seq_relid, tab->newrelpersistence);
+ }
+ }
+ }
+
+ /*
+ * Foreign key constraints are checked in a final pass, since (a) it's
+ * generally best to examine each one separately, and (b) it's at least
+ * theoretically possible that we have changed both relations of the
+ * foreign key, and we'd better have finished both rewrites before we try
+ * to read the tables.
+ */
+ foreach(ltab, *wqueue)
+ {
+ AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+ Relation rel = NULL;
+ ListCell *lcon;
+
+ /* Relations without storage may be ignored here too */
+ if (!RELKIND_HAS_STORAGE(tab->relkind))
+ continue;
+
+ foreach(lcon, tab->constraints)
+ {
+ NewConstraint *con = lfirst(lcon);
+
+ if (con->contype == CONSTR_FOREIGN)
+ {
+ Constraint *fkconstraint = (Constraint *) con->qual;
+ Relation refrel;
+
+ if (rel == NULL)
+ {
+ /* Long since locked, no need for another */
+ rel = table_open(tab->relid, NoLock);
+ }
+
+ refrel = table_open(con->refrelid, RowShareLock);
+
+ validateForeignKeyConstraint(fkconstraint->conname, rel, refrel,
+ con->refindid,
+ con->conid);
+
+ /*
+ * No need to mark the constraint row as validated, we did
+ * that when we inserted the row earlier.
+ */
+
+ table_close(refrel, NoLock);
+ }
+ }
+
+ if (rel)
+ table_close(rel, NoLock);
+ }
+
+ /* Finally, run any afterStmts that were queued up */
+ foreach(ltab, *wqueue)
+ {
+ AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+ ListCell *lc;
+
+ foreach(lc, tab->afterStmts)
+ {
+ Node *stmt = (Node *) lfirst(lc);
+
+ ProcessUtilityForAlterTable(stmt, context);
+ CommandCounterIncrement();
+ }
+ }
+}
+
+/*
+ * ATRewriteTable: scan or rewrite one table
+ *
+ * OIDNewHeap is InvalidOid if we don't need to rewrite
+ */
+static void
+ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
+{
+ Relation oldrel;
+ Relation newrel;
+ TupleDesc oldTupDesc;
+ TupleDesc newTupDesc;
+ bool needscan = false;
+ List *notnull_attrs;
+ int i;
+ ListCell *l;
+ EState *estate;
+ CommandId mycid;
+ BulkInsertState bistate;
+ int ti_options;
+ ExprState *partqualstate = NULL;
+
+ /*
+ * Open the relation(s). We have surely already locked the existing
+ * table.
+ */
+ oldrel = table_open(tab->relid, NoLock);
+ oldTupDesc = tab->oldDesc;
+ newTupDesc = RelationGetDescr(oldrel); /* includes all mods */
+
+ if (OidIsValid(OIDNewHeap))
+ newrel = table_open(OIDNewHeap, lockmode);
+ else
+ newrel = NULL;
+
+ /*
+ * Prepare a BulkInsertState and options for table_tuple_insert. The FSM
+ * is empty, so don't bother using it.
+ */
+ if (newrel)
+ {
+ mycid = GetCurrentCommandId(true);
+ bistate = GetBulkInsertState();
+ ti_options = TABLE_INSERT_SKIP_FSM;
+ }
+ else
+ {
+ /* keep compiler quiet about using these uninitialized */
+ mycid = 0;
+ bistate = NULL;
+ ti_options = 0;
+ }
+
+ /*
+ * Generate the constraint and default execution states
+ */
+
+ estate = CreateExecutorState();
+
+ /* Build the needed expression execution states */
+ foreach(l, tab->constraints)
+ {
+ NewConstraint *con = lfirst(l);
+
+ switch (con->contype)
+ {
+ case CONSTR_CHECK:
+ needscan = true;
+ con->qualstate = ExecPrepareExpr((Expr *) con->qual, estate);
+ break;
+ case CONSTR_FOREIGN:
+ /* Nothing to do here */
+ break;
+ default:
+ elog(ERROR, "unrecognized constraint type: %d",
+ (int) con->contype);
+ }
+ }
+
+ /* Build expression execution states for partition check quals */
+ if (tab->partition_constraint)
+ {
+ needscan = true;
+ partqualstate = ExecPrepareExpr(tab->partition_constraint, estate);
+ }
+
+ foreach(l, tab->newvals)
+ {
+ NewColumnValue *ex = lfirst(l);
+
+ /* expr already planned */
+ ex->exprstate = ExecInitExpr((Expr *) ex->expr, NULL);
+ }
+
+ notnull_attrs = NIL;
+ if (newrel || tab->verify_new_notnull)
+ {
+ /*
+ * If we are rebuilding the tuples OR if we added any new but not
+ * verified NOT NULL constraints, check all not-null constraints. This
+ * is a bit of overkill but it minimizes risk of bugs, and
+ * heap_attisnull is a pretty cheap test anyway.
+ */
+ for (i = 0; i < newTupDesc->natts; i++)
+ {
+ Form_pg_attribute attr = TupleDescAttr(newTupDesc, i);
+
+ if (attr->attnotnull && !attr->attisdropped)
+ notnull_attrs = lappend_int(notnull_attrs, i);
+ }
+ if (notnull_attrs)
+ needscan = true;
+ }
+
+ if (newrel || needscan)
+ {
+ ExprContext *econtext;
+ TupleTableSlot *oldslot;
+ TupleTableSlot *newslot;
+ TableScanDesc scan;
+ MemoryContext oldCxt;
+ List *dropped_attrs = NIL;
+ ListCell *lc;
+ Snapshot snapshot;
+
+ if (newrel)
+ ereport(DEBUG1,
+ (errmsg_internal("rewriting table \"%s\"",
+ RelationGetRelationName(oldrel))));
+ else
+ ereport(DEBUG1,
+ (errmsg_internal("verifying table \"%s\"",
+ RelationGetRelationName(oldrel))));
+
+ if (newrel)
+ {
+ /*
+ * All predicate locks on the tuples or pages are about to be made
+ * invalid, because we move tuples around. Promote them to
+ * relation locks.
+ */
+ TransferPredicateLocksToHeapRelation(oldrel);
+ }
+
+ econtext = GetPerTupleExprContext(estate);
+
+ /*
+ * Create necessary tuple slots. When rewriting, two slots are needed,
+ * otherwise one suffices. In the case where one slot suffices, we
+ * need to use the new tuple descriptor, otherwise some constraints
+ * can't be evaluated. Note that even when the tuple layout is the
+ * same and no rewrite is required, the tupDescs might not be
+ * (consider ADD COLUMN without a default).
+ */
+ if (tab->rewrite)
+ {
+ Assert(newrel != NULL);
+ oldslot = MakeSingleTupleTableSlot(oldTupDesc,
+ table_slot_callbacks(oldrel));
+ newslot = MakeSingleTupleTableSlot(newTupDesc,
+ table_slot_callbacks(newrel));
+
+ /*
+ * Set all columns in the new slot to NULL initially, to ensure
+ * columns added as part of the rewrite are initialized to NULL.
+ * That is necessary as tab->newvals will not contain an
+ * expression for columns with a NULL default, e.g. when adding a
+ * column without a default together with a column with a default
+ * requiring an actual rewrite.
+ */
+ ExecStoreAllNullTuple(newslot);
+ }
+ else
+ {
+ oldslot = MakeSingleTupleTableSlot(newTupDesc,
+ table_slot_callbacks(oldrel));
+ newslot = NULL;
+ }
+
+ /*
+ * Any attributes that are dropped according to the new tuple
+ * descriptor can be set to NULL. We precompute the list of dropped
+ * attributes to avoid needing to do so in the per-tuple loop.
+ */
+ for (i = 0; i < newTupDesc->natts; i++)
+ {
+ if (TupleDescAttr(newTupDesc, i)->attisdropped)
+ dropped_attrs = lappend_int(dropped_attrs, i);
+ }
+
+ /*
+ * Scan through the rows, generating a new row if needed and then
+ * checking all the constraints.
+ */
+ snapshot = RegisterSnapshot(GetLatestSnapshot());
+ scan = table_beginscan(oldrel, snapshot, 0, NULL);
+
+ /*
+ * Switch to per-tuple memory context and reset it for each tuple
+ * produced, so we don't leak memory.
+ */
+ oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+
+ while (table_scan_getnextslot(scan, ForwardScanDirection, oldslot))
+ {
+ TupleTableSlot *insertslot;
+
+ if (tab->rewrite > 0)
+ {
+ /* Extract data from old tuple */
+ slot_getallattrs(oldslot);
+ ExecClearTuple(newslot);
+
+ /* copy attributes */
+ memcpy(newslot->tts_values, oldslot->tts_values,
+ sizeof(Datum) * oldslot->tts_nvalid);
+ memcpy(newslot->tts_isnull, oldslot->tts_isnull,
+ sizeof(bool) * oldslot->tts_nvalid);
+
+ /* Set dropped attributes to null in new tuple */
+ foreach(lc, dropped_attrs)
+ newslot->tts_isnull[lfirst_int(lc)] = true;
+
+ /*
+ * Constraints and GENERATED expressions might reference the
+ * tableoid column, so fill tts_tableOid with the desired
+ * value. (We must do this each time, because it gets
+ * overwritten with newrel's OID during storing.)
+ */
+ newslot->tts_tableOid = RelationGetRelid(oldrel);
+
+ /*
+ * Process supplied expressions to replace selected columns.
+ *
+ * First, evaluate expressions whose inputs come from the old
+ * tuple.
+ */
+ econtext->ecxt_scantuple = oldslot;
+
+ foreach(l, tab->newvals)
+ {
+ NewColumnValue *ex = lfirst(l);
+
+ if (ex->is_generated)
+ continue;
+
+ newslot->tts_values[ex->attnum - 1]
+ = ExecEvalExpr(ex->exprstate,
+ econtext,
+ &newslot->tts_isnull[ex->attnum - 1]);
+ }
+
+ ExecStoreVirtualTuple(newslot);
+
+ /*
+ * Now, evaluate any expressions whose inputs come from the
+ * new tuple. We assume these columns won't reference each
+ * other, so that there's no ordering dependency.
+ */
+ econtext->ecxt_scantuple = newslot;
+
+ foreach(l, tab->newvals)
+ {
+ NewColumnValue *ex = lfirst(l);
+
+ if (!ex->is_generated)
+ continue;
+
+ newslot->tts_values[ex->attnum - 1]
+ = ExecEvalExpr(ex->exprstate,
+ econtext,
+ &newslot->tts_isnull[ex->attnum - 1]);
+ }
+
+ insertslot = newslot;
+ }
+ else
+ {
+ /*
+ * If there's no rewrite, old and new table are guaranteed to
+ * have the same AM, so we can just use the old slot to verify
+ * new constraints etc.
+ */
+ insertslot = oldslot;
+ }
+
+ /* Now check any constraints on the possibly-changed tuple */
+ econtext->ecxt_scantuple = insertslot;
+
+ foreach(l, notnull_attrs)
+ {
+ int attn = lfirst_int(l);
+
+ if (slot_attisnull(insertslot, attn + 1))
+ {
+ Form_pg_attribute attr = TupleDescAttr(newTupDesc, attn);
+
+ ereport(ERROR,
+ (errcode(ERRCODE_NOT_NULL_VIOLATION),
+ errmsg("column \"%s\" of relation \"%s\" contains null values",
+ NameStr(attr->attname),
+ RelationGetRelationName(oldrel)),
+ errtablecol(oldrel, attn + 1)));
+ }
+ }
+
+ foreach(l, tab->constraints)
+ {
+ NewConstraint *con = lfirst(l);
+
+ switch (con->contype)
+ {
+ case CONSTR_CHECK:
+ if (!ExecCheck(con->qualstate, econtext))
+ ereport(ERROR,
+ (errcode(ERRCODE_CHECK_VIOLATION),
+ errmsg("check constraint \"%s\" of relation \"%s\" is violated by some row",
+ con->name,
+ RelationGetRelationName(oldrel)),
+ errtableconstraint(oldrel, con->name)));
+ break;
+ case CONSTR_FOREIGN:
+ /* Nothing to do here */
+ break;
+ default:
+ elog(ERROR, "unrecognized constraint type: %d",
+ (int) con->contype);
+ }
+ }
+
+ if (partqualstate && !ExecCheck(partqualstate, econtext))
+ {
+ if (tab->validate_default)
+ ereport(ERROR,
+ (errcode(ERRCODE_CHECK_VIOLATION),
+ errmsg("updated partition constraint for default partition \"%s\" would be violated by some row",
+ RelationGetRelationName(oldrel)),
+ errtable(oldrel)));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_CHECK_VIOLATION),
+ errmsg("partition constraint of relation \"%s\" is violated by some row",
+ RelationGetRelationName(oldrel)),
+ errtable(oldrel)));
+ }
+
+ /* Write the tuple out to the new relation */
+ if (newrel)
+ table_tuple_insert(newrel, insertslot, mycid,
+ ti_options, bistate);
+
+ ResetExprContext(econtext);
+
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ MemoryContextSwitchTo(oldCxt);
+ table_endscan(scan);
+ UnregisterSnapshot(snapshot);
+
+ ExecDropSingleTupleTableSlot(oldslot);
+ if (newslot)
+ ExecDropSingleTupleTableSlot(newslot);
+ }
+
+ FreeExecutorState(estate);
+
+ table_close(oldrel, NoLock);
+ if (newrel)
+ {
+ FreeBulkInsertState(bistate);
+
+ table_finish_bulk_insert(newrel, ti_options);
+
+ table_close(newrel, NoLock);
+ }
+}
+
+/*
+ * ATGetQueueEntry: find or create an entry in the ALTER TABLE work queue
+ */
+static AlteredTableInfo *
+ATGetQueueEntry(List **wqueue, Relation rel)
+{
+ Oid relid = RelationGetRelid(rel);
+ AlteredTableInfo *tab;
+ ListCell *ltab;
+
+ foreach(ltab, *wqueue)
+ {
+ tab = (AlteredTableInfo *) lfirst(ltab);
+ if (tab->relid == relid)
+ return tab;
+ }
+
+ /*
+ * Not there, so add it. Note that we make a copy of the relation's
+ * existing descriptor before anything interesting can happen to it.
+ */
+ tab = (AlteredTableInfo *) palloc0(sizeof(AlteredTableInfo));
+ tab->relid = relid;
+ tab->rel = NULL; /* set later */
+ tab->relkind = rel->rd_rel->relkind;
+ tab->oldDesc = CreateTupleDescCopyConstr(RelationGetDescr(rel));
+ tab->newAccessMethod = InvalidOid;
+ tab->newTableSpace = InvalidOid;
+ tab->newrelpersistence = RELPERSISTENCE_PERMANENT;
+ tab->chgPersistence = false;
+
+ *wqueue = lappend(*wqueue, tab);
+
+ return tab;
+}
+
+static const char *
+alter_table_type_to_string(AlterTableType cmdtype)
+{
+ switch (cmdtype)
+ {
+ case AT_AddColumn:
+ case AT_AddColumnRecurse:
+ case AT_AddColumnToView:
+ return "ADD COLUMN";
+ case AT_ColumnDefault:
+ case AT_CookedColumnDefault:
+ return "ALTER COLUMN ... SET DEFAULT";
+ case AT_DropNotNull:
+ return "ALTER COLUMN ... DROP NOT NULL";
+ case AT_SetNotNull:
+ return "ALTER COLUMN ... SET NOT NULL";
+ case AT_DropExpression:
+ return "ALTER COLUMN ... DROP EXPRESSION";
+ case AT_CheckNotNull:
+ return NULL; /* not real grammar */
+ case AT_SetStatistics:
+ return "ALTER COLUMN ... SET STATISTICS";
+ case AT_SetOptions:
+ return "ALTER COLUMN ... SET";
+ case AT_ResetOptions:
+ return "ALTER COLUMN ... RESET";
+ case AT_SetStorage:
+ return "ALTER COLUMN ... SET STORAGE";
+ case AT_SetCompression:
+ return "ALTER COLUMN ... SET COMPRESSION";
+ case AT_DropColumn:
+ case AT_DropColumnRecurse:
+ return "DROP COLUMN";
+ case AT_AddIndex:
+ case AT_ReAddIndex:
+ return NULL; /* not real grammar */
+ case AT_AddConstraint:
+ case AT_AddConstraintRecurse:
+ case AT_ReAddConstraint:
+ case AT_ReAddDomainConstraint:
+ case AT_AddIndexConstraint:
+ return "ADD CONSTRAINT";
+ case AT_AlterConstraint:
+ return "ALTER CONSTRAINT";
+ case AT_ValidateConstraint:
+ case AT_ValidateConstraintRecurse:
+ return "VALIDATE CONSTRAINT";
+ case AT_DropConstraint:
+ case AT_DropConstraintRecurse:
+ return "DROP CONSTRAINT";
+ case AT_ReAddComment:
+ return NULL; /* not real grammar */
+ case AT_AlterColumnType:
+ return "ALTER COLUMN ... SET DATA TYPE";
+ case AT_AlterColumnGenericOptions:
+ return "ALTER COLUMN ... OPTIONS";
+ case AT_ChangeOwner:
+ return "OWNER TO";
+ case AT_ClusterOn:
+ return "CLUSTER ON";
+ case AT_DropCluster:
+ return "SET WITHOUT CLUSTER";
+ case AT_SetAccessMethod:
+ return "SET ACCESS METHOD";
+ case AT_SetLogged:
+ return "SET LOGGED";
+ case AT_SetUnLogged:
+ return "SET UNLOGGED";
+ case AT_DropOids:
+ return "SET WITHOUT OIDS";
+ case AT_SetTableSpace:
+ return "SET TABLESPACE";
+ case AT_SetRelOptions:
+ return "SET";
+ case AT_ResetRelOptions:
+ return "RESET";
+ case AT_ReplaceRelOptions:
+ return NULL; /* not real grammar */
+ case AT_EnableTrig:
+ return "ENABLE TRIGGER";
+ case AT_EnableAlwaysTrig:
+ return "ENABLE ALWAYS TRIGGER";
+ case AT_EnableReplicaTrig:
+ return "ENABLE REPLICA TRIGGER";
+ case AT_DisableTrig:
+ return "DISABLE TRIGGER";
+ case AT_EnableTrigAll:
+ return "ENABLE TRIGGER ALL";
+ case AT_DisableTrigAll:
+ return "DISABLE TRIGGER ALL";
+ case AT_EnableTrigUser:
+ return "ENABLE TRIGGER USER";
+ case AT_DisableTrigUser:
+ return "DISABLE TRIGGER USER";
+ case AT_EnableRule:
+ return "ENABLE RULE";
+ case AT_EnableAlwaysRule:
+ return "ENABLE ALWAYS RULE";
+ case AT_EnableReplicaRule:
+ return "ENABLE REPLICA RULE";
+ case AT_DisableRule:
+ return "DISABLE RULE";
+ case AT_AddInherit:
+ return "INHERIT";
+ case AT_DropInherit:
+ return "NO INHERIT";
+ case AT_AddOf:
+ return "OF";
+ case AT_DropOf:
+ return "NOT OF";
+ case AT_ReplicaIdentity:
+ return "REPLICA IDENTITY";
+ case AT_EnableRowSecurity:
+ return "ENABLE ROW SECURITY";
+ case AT_DisableRowSecurity:
+ return "DISABLE ROW SECURITY";
+ case AT_ForceRowSecurity:
+ return "FORCE ROW SECURITY";
+ case AT_NoForceRowSecurity:
+ return "NO FORCE ROW SECURITY";
+ case AT_GenericOptions:
+ return "OPTIONS";
+ case AT_AttachPartition:
+ return "ATTACH PARTITION";
+ case AT_DetachPartition:
+ return "DETACH PARTITION";
+ case AT_DetachPartitionFinalize:
+ return "DETACH PARTITION ... FINALIZE";
+ case AT_AddIdentity:
+ return "ALTER COLUMN ... ADD IDENTITY";
+ case AT_SetIdentity:
+ return "ALTER COLUMN ... SET";
+ case AT_DropIdentity:
+ return "ALTER COLUMN ... DROP IDENTITY";
+ case AT_ReAddStatistics:
+ return NULL; /* not real grammar */
+ }
+
+ return NULL;
+}
+
+/*
+ * ATSimplePermissions
+ *
+ * - Ensure that it is a relation (or possibly a view)
+ * - Ensure this user is the owner
+ * - Ensure that it is not a system table
+ */
+static void
+ATSimplePermissions(AlterTableType cmdtype, Relation rel, int allowed_targets)
+{
+ int actual_target;
+
+ switch (rel->rd_rel->relkind)
+ {
+ case RELKIND_RELATION:
+ case RELKIND_PARTITIONED_TABLE:
+ actual_target = ATT_TABLE;
+ break;
+ case RELKIND_VIEW:
+ actual_target = ATT_VIEW;
+ break;
+ case RELKIND_MATVIEW:
+ actual_target = ATT_MATVIEW;
+ break;
+ case RELKIND_INDEX:
+ actual_target = ATT_INDEX;
+ break;
+ case RELKIND_PARTITIONED_INDEX:
+ actual_target = ATT_PARTITIONED_INDEX;
+ break;
+ case RELKIND_COMPOSITE_TYPE:
+ actual_target = ATT_COMPOSITE_TYPE;
+ break;
+ case RELKIND_FOREIGN_TABLE:
+ actual_target = ATT_FOREIGN_TABLE;
+ break;
+ case RELKIND_SEQUENCE:
+ actual_target = ATT_SEQUENCE;
+ break;
+ default:
+ actual_target = 0;
+ break;
+ }
+
+ /* Wrong target type? */
+ if ((actual_target & allowed_targets) == 0)
+ {
+ const char *action_str = alter_table_type_to_string(cmdtype);
+
+ if (action_str)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ /* translator: %s is a group of some SQL keywords */
+ errmsg("ALTER action %s cannot be performed on relation \"%s\"",
+ action_str, RelationGetRelationName(rel)),
+ errdetail_relkind_not_supported(rel->rd_rel->relkind)));
+ else
+ /* internal error? */
+ elog(ERROR, "invalid ALTER action attempted on relation \"%s\"",
+ RelationGetRelationName(rel));
+ }
+
+ /* Permissions checks */
+ if (!pg_class_ownercheck(RelationGetRelid(rel), GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind),
+ RelationGetRelationName(rel));
+
+ if (!allowSystemTableMods && IsSystemRelation(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied: \"%s\" is a system catalog",
+ RelationGetRelationName(rel))));
+}
+
+/*
+ * ATSimpleRecursion
+ *
+ * Simple table recursion sufficient for most ALTER TABLE operations.
+ * All direct and indirect children are processed in an unspecified order.
+ * Note that if a child inherits from the original table via multiple
+ * inheritance paths, it will be visited just once.
+ */
+static void
+ATSimpleRecursion(List **wqueue, Relation rel,
+ AlterTableCmd *cmd, bool recurse, LOCKMODE lockmode,
+ AlterTableUtilityContext *context)
+{
+ /*
+ * Propagate to children, if desired and if there are (or might be) any
+ * children.
+ */
+ if (recurse && rel->rd_rel->relhassubclass)
+ {
+ Oid relid = RelationGetRelid(rel);
+ ListCell *child;
+ List *children;
+
+ children = find_all_inheritors(relid, lockmode, NULL);
+
+ /*
+ * find_all_inheritors does the recursive search of the inheritance
+ * hierarchy, so all we have to do is process all of the relids in the
+ * list that it returns.
+ */
+ foreach(child, children)
+ {
+ Oid childrelid = lfirst_oid(child);
+ Relation childrel;
+
+ if (childrelid == relid)
+ continue;
+ /* find_all_inheritors already got lock */
+ childrel = relation_open(childrelid, NoLock);
+ CheckTableNotInUse(childrel, "ALTER TABLE");
+ ATPrepCmd(wqueue, childrel, cmd, false, true, lockmode, context);
+ relation_close(childrel, NoLock);
+ }
+ }
+}
+
+/*
+ * Obtain list of partitions of the given table, locking them all at the given
+ * lockmode and ensuring that they all pass CheckTableNotInUse.
+ *
+ * This function is a no-op if the given relation is not a partitioned table;
+ * in particular, nothing is done if it's a legacy inheritance parent.
+ */
+static void
+ATCheckPartitionsNotInUse(Relation rel, LOCKMODE lockmode)
+{
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ List *inh;
+ ListCell *cell;
+
+ inh = find_all_inheritors(RelationGetRelid(rel), lockmode, NULL);
+ /* first element is the parent rel; must ignore it */
+ for_each_from(cell, inh, 1)
+ {
+ Relation childrel;
+
+ /* find_all_inheritors already got lock */
+ childrel = table_open(lfirst_oid(cell), NoLock);
+ CheckTableNotInUse(childrel, "ALTER TABLE");
+ table_close(childrel, NoLock);
+ }
+ list_free(inh);
+ }
+}
+
+/*
+ * ATTypedTableRecursion
+ *
+ * Propagate ALTER TYPE operations to the typed tables of that type.
+ * Also check the RESTRICT/CASCADE behavior. Given CASCADE, also permit
+ * recursion to inheritance children of the typed tables.
+ */
+static void
+ATTypedTableRecursion(List **wqueue, Relation rel, AlterTableCmd *cmd,
+ LOCKMODE lockmode, AlterTableUtilityContext *context)
+{
+ ListCell *child;
+ List *children;
+
+ Assert(rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE);
+
+ children = find_typed_table_dependencies(rel->rd_rel->reltype,
+ RelationGetRelationName(rel),
+ cmd->behavior);
+
+ foreach(child, children)
+ {
+ Oid childrelid = lfirst_oid(child);
+ Relation childrel;
+
+ childrel = relation_open(childrelid, lockmode);
+ CheckTableNotInUse(childrel, "ALTER TABLE");
+ ATPrepCmd(wqueue, childrel, cmd, true, true, lockmode, context);
+ relation_close(childrel, NoLock);
+ }
+}
+
+
+/*
+ * find_composite_type_dependencies
+ *
+ * Check to see if the type "typeOid" is being used as a column in some table
+ * (possibly nested several levels deep in composite types, arrays, etc!).
+ * Eventually, we'd like to propagate the check or rewrite operation
+ * into such tables, but for now, just error out if we find any.
+ *
+ * Caller should provide either the associated relation of a rowtype,
+ * or a type name (not both) for use in the error message, if any.
+ *
+ * Note that "typeOid" is not necessarily a composite type; it could also be
+ * another container type such as an array or range, or a domain over one of
+ * these things. The name of this function is therefore somewhat historical,
+ * but it's not worth changing.
+ *
+ * We assume that functions and views depending on the type are not reasons
+ * to reject the ALTER. (How safe is this really?)
+ */
+void
+find_composite_type_dependencies(Oid typeOid, Relation origRelation,
+ const char *origTypeName)
+{
+ Relation depRel;
+ ScanKeyData key[2];
+ SysScanDesc depScan;
+ HeapTuple depTup;
+
+ /* since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
+ /*
+ * We scan pg_depend to find those things that depend on the given type.
+ * (We assume we can ignore refobjsubid for a type.)
+ */
+ depRel = table_open(DependRelationId, AccessShareLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_depend_refclassid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(TypeRelationId));
+ ScanKeyInit(&key[1],
+ Anum_pg_depend_refobjid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(typeOid));
+
+ depScan = systable_beginscan(depRel, DependReferenceIndexId, true,
+ NULL, 2, key);
+
+ while (HeapTupleIsValid(depTup = systable_getnext(depScan)))
+ {
+ Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup);
+ Relation rel;
+ TupleDesc tupleDesc;
+ Form_pg_attribute att;
+
+ /* Check for directly dependent types */
+ if (pg_depend->classid == TypeRelationId)
+ {
+ /*
+ * This must be an array, domain, or range containing the given
+ * type, so recursively check for uses of this type. Note that
+ * any error message will mention the original type not the
+ * container; this is intentional.
+ */
+ find_composite_type_dependencies(pg_depend->objid,
+ origRelation, origTypeName);
+ continue;
+ }
+
+ /* Else, ignore dependees that aren't relations */
+ if (pg_depend->classid != RelationRelationId)
+ continue;
+
+ rel = relation_open(pg_depend->objid, AccessShareLock);
+ tupleDesc = RelationGetDescr(rel);
+
+ /*
+ * If objsubid identifies a specific column, refer to that in error
+ * messages. Otherwise, search to see if there's a user column of the
+ * type. (We assume system columns are never of interesting types.)
+ * The search is needed because an index containing an expression
+ * column of the target type will just be recorded as a whole-relation
+ * dependency. If we do not find a column of the type, the dependency
+ * must indicate that the type is transiently referenced in an index
+ * expression but not stored on disk, which we assume is OK, just as
+ * we do for references in views. (It could also be that the target
+ * type is embedded in some container type that is stored in an index
+ * column, but the previous recursion should catch such cases.)
+ */
+ if (pg_depend->objsubid > 0 && pg_depend->objsubid <= tupleDesc->natts)
+ att = TupleDescAttr(tupleDesc, pg_depend->objsubid - 1);
+ else
+ {
+ att = NULL;
+ for (int attno = 1; attno <= tupleDesc->natts; attno++)
+ {
+ att = TupleDescAttr(tupleDesc, attno - 1);
+ if (att->atttypid == typeOid && !att->attisdropped)
+ break;
+ att = NULL;
+ }
+ if (att == NULL)
+ {
+ /* No such column, so assume OK */
+ relation_close(rel, AccessShareLock);
+ continue;
+ }
+ }
+
+ /*
+ * We definitely should reject if the relation has storage. If it's
+ * partitioned, then perhaps we don't have to reject: if there are
+ * partitions then we'll fail when we find one, else there is no
+ * stored data to worry about. However, it's possible that the type
+ * change would affect conclusions about whether the type is sortable
+ * or hashable and thus (if it's a partitioning column) break the
+ * partitioning rule. For now, reject for partitioned rels too.
+ */
+ if (RELKIND_HAS_STORAGE(rel->rd_rel->relkind) ||
+ RELKIND_HAS_PARTITIONS(rel->rd_rel->relkind))
+ {
+ if (origTypeName)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter type \"%s\" because column \"%s.%s\" uses it",
+ origTypeName,
+ RelationGetRelationName(rel),
+ NameStr(att->attname))));
+ else if (origRelation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter type \"%s\" because column \"%s.%s\" uses it",
+ RelationGetRelationName(origRelation),
+ RelationGetRelationName(rel),
+ NameStr(att->attname))));
+ else if (origRelation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter foreign table \"%s\" because column \"%s.%s\" uses its row type",
+ RelationGetRelationName(origRelation),
+ RelationGetRelationName(rel),
+ NameStr(att->attname))));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter table \"%s\" because column \"%s.%s\" uses its row type",
+ RelationGetRelationName(origRelation),
+ RelationGetRelationName(rel),
+ NameStr(att->attname))));
+ }
+ else if (OidIsValid(rel->rd_rel->reltype))
+ {
+ /*
+ * A view or composite type itself isn't a problem, but we must
+ * recursively check for indirect dependencies via its rowtype.
+ */
+ find_composite_type_dependencies(rel->rd_rel->reltype,
+ origRelation, origTypeName);
+ }
+
+ relation_close(rel, AccessShareLock);
+ }
+
+ systable_endscan(depScan);
+
+ relation_close(depRel, AccessShareLock);
+}
+
+
+/*
+ * find_typed_table_dependencies
+ *
+ * Check to see if a composite type is being used as the type of a
+ * typed table. Abort if any are found and behavior is RESTRICT.
+ * Else return the list of tables.
+ */
+static List *
+find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior behavior)
+{
+ Relation classRel;
+ ScanKeyData key[1];
+ TableScanDesc scan;
+ HeapTuple tuple;
+ List *result = NIL;
+
+ classRel = table_open(RelationRelationId, AccessShareLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_class_reloftype,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(typeOid));
+
+ scan = table_beginscan_catalog(classRel, 1, key);
+
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ Form_pg_class classform = (Form_pg_class) GETSTRUCT(tuple);
+
+ if (behavior == DROP_RESTRICT)
+ ereport(ERROR,
+ (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST),
+ errmsg("cannot alter type \"%s\" because it is the type of a typed table",
+ typeName),
+ errhint("Use ALTER ... CASCADE to alter the typed tables too.")));
+ else
+ result = lappend_oid(result, classform->oid);
+ }
+
+ table_endscan(scan);
+ table_close(classRel, AccessShareLock);
+
+ return result;
+}
+
+
+/*
+ * check_of_type
+ *
+ * Check whether a type is suitable for CREATE TABLE OF/ALTER TABLE OF. If it
+ * isn't suitable, throw an error. Currently, we require that the type
+ * originated with CREATE TYPE AS. We could support any row type, but doing so
+ * would require handling a number of extra corner cases in the DDL commands.
+ * (Also, allowing domain-over-composite would open up a can of worms about
+ * whether and how the domain's constraints should apply to derived tables.)
+ */
+void
+check_of_type(HeapTuple typetuple)
+{
+ Form_pg_type typ = (Form_pg_type) GETSTRUCT(typetuple);
+ bool typeOk = false;
+
+ if (typ->typtype == TYPTYPE_COMPOSITE)
+ {
+ Relation typeRelation;
+
+ Assert(OidIsValid(typ->typrelid));
+ typeRelation = relation_open(typ->typrelid, AccessShareLock);
+ typeOk = (typeRelation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE);
+
+ /*
+ * Close the parent rel, but keep our AccessShareLock on it until xact
+ * commit. That will prevent someone else from deleting or ALTERing
+ * the type before the typed table creation/conversion commits.
+ */
+ relation_close(typeRelation, NoLock);
+ }
+ if (!typeOk)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("type %s is not a composite type",
+ format_type_be(typ->oid))));
+}
+
+
+/*
+ * ALTER TABLE ADD COLUMN
+ *
+ * Adds an additional attribute to a relation making the assumption that
+ * CHECK, NOT NULL, and FOREIGN KEY constraints will be removed from the
+ * AT_AddColumn AlterTableCmd by parse_utilcmd.c and added as independent
+ * AlterTableCmd's.
+ *
+ * ADD COLUMN cannot use the normal ALTER TABLE recursion mechanism, because we
+ * have to decide at runtime whether to recurse or not depending on whether we
+ * actually add a column or merely merge with an existing column. (We can't
+ * check this in a static pre-pass because it won't handle multiple inheritance
+ * situations correctly.)
+ */
+static void
+ATPrepAddColumn(List **wqueue, Relation rel, bool recurse, bool recursing,
+ bool is_view, AlterTableCmd *cmd, LOCKMODE lockmode,
+ AlterTableUtilityContext *context)
+{
+ if (rel->rd_rel->reloftype && !recursing)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot add column to typed table")));
+
+ if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+ ATTypedTableRecursion(wqueue, rel, cmd, lockmode, context);
+
+ if (recurse && !is_view)
+ cmd->subtype = AT_AddColumnRecurse;
+}
+
+/*
+ * Add a column to a table. The return value is the address of the
+ * new column in the parent relation.
+ *
+ * cmd is pass-by-ref so that we can replace it with the parse-transformed
+ * copy (but that happens only after we check for IF NOT EXISTS).
+ */
+static ObjectAddress
+ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
+ AlterTableCmd **cmd,
+ bool recurse, bool recursing,
+ LOCKMODE lockmode, int cur_pass,
+ AlterTableUtilityContext *context)
+{
+ Oid myrelid = RelationGetRelid(rel);
+ ColumnDef *colDef = castNode(ColumnDef, (*cmd)->def);
+ bool if_not_exists = (*cmd)->missing_ok;
+ Relation pgclass,
+ attrdesc;
+ HeapTuple reltup;
+ FormData_pg_attribute attribute;
+ int newattnum;
+ char relkind;
+ HeapTuple typeTuple;
+ Oid typeOid;
+ int32 typmod;
+ Oid collOid;
+ Form_pg_type tform;
+ Expr *defval;
+ List *children;
+ ListCell *child;
+ AlterTableCmd *childcmd;
+ AclResult aclresult;
+ ObjectAddress address;
+ TupleDesc tupdesc;
+ FormData_pg_attribute *aattr[] = {&attribute};
+
+ /* At top level, permission check was done in ATPrepCmd, else do it */
+ if (recursing)
+ ATSimplePermissions((*cmd)->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+ if (rel->rd_rel->relispartition && !recursing)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot add column to a partition")));
+
+ attrdesc = table_open(AttributeRelationId, RowExclusiveLock);
+
+ /*
+ * Are we adding the column to a recursion child? If so, check whether to
+ * merge with an existing definition for the column. If we do merge, we
+ * must not recurse. Children will already have the column, and recursing
+ * into them would mess up attinhcount.
+ */
+ if (colDef->inhcount > 0)
+ {
+ HeapTuple tuple;
+
+ /* Does child already have a column by this name? */
+ tuple = SearchSysCacheCopyAttName(myrelid, colDef->colname);
+ if (HeapTupleIsValid(tuple))
+ {
+ Form_pg_attribute childatt = (Form_pg_attribute) GETSTRUCT(tuple);
+ Oid ctypeId;
+ int32 ctypmod;
+ Oid ccollid;
+
+ /* Child column must match on type, typmod, and collation */
+ typenameTypeIdAndMod(NULL, colDef->typeName, &ctypeId, &ctypmod);
+ if (ctypeId != childatt->atttypid ||
+ ctypmod != childatt->atttypmod)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("child table \"%s\" has different type for column \"%s\"",
+ RelationGetRelationName(rel), colDef->colname)));
+ ccollid = GetColumnDefCollation(NULL, colDef, ctypeId);
+ if (ccollid != childatt->attcollation)
+ ereport(ERROR,
+ (errcode(ERRCODE_COLLATION_MISMATCH),
+ errmsg("child table \"%s\" has different collation for column \"%s\"",
+ RelationGetRelationName(rel), colDef->colname),
+ errdetail("\"%s\" versus \"%s\"",
+ get_collation_name(ccollid),
+ get_collation_name(childatt->attcollation))));
+
+ /* Bump the existing child att's inhcount */
+ childatt->attinhcount++;
+ CatalogTupleUpdate(attrdesc, &tuple->t_self, tuple);
+
+ heap_freetuple(tuple);
+
+ /* Inform the user about the merge */
+ ereport(NOTICE,
+ (errmsg("merging definition of column \"%s\" for child \"%s\"",
+ colDef->colname, RelationGetRelationName(rel))));
+
+ table_close(attrdesc, RowExclusiveLock);
+ return InvalidObjectAddress;
+ }
+ }
+
+ /* skip if the name already exists and if_not_exists is true */
+ if (!check_for_column_name_collision(rel, colDef->colname, if_not_exists))
+ {
+ table_close(attrdesc, RowExclusiveLock);
+ return InvalidObjectAddress;
+ }
+
+ /*
+ * Okay, we need to add the column, so go ahead and do parse
+ * transformation. This can result in queueing up, or even immediately
+ * executing, subsidiary operations (such as creation of unique indexes);
+ * so we mustn't do it until we have made the if_not_exists check.
+ *
+ * When recursing, the command was already transformed and we needn't do
+ * so again. Also, if context isn't given we can't transform. (That
+ * currently happens only for AT_AddColumnToView; we expect that view.c
+ * passed us a ColumnDef that doesn't need work.)
+ */
+ if (context != NULL && !recursing)
+ {
+ *cmd = ATParseTransformCmd(wqueue, tab, rel, *cmd, recurse, lockmode,
+ cur_pass, context);
+ Assert(*cmd != NULL);
+ colDef = castNode(ColumnDef, (*cmd)->def);
+ }
+
+ /*
+ * Cannot add identity column if table has children, because identity does
+ * not inherit. (Adding column and identity separately will work.)
+ */
+ if (colDef->identity &&
+ recurse &&
+ find_inheritance_children(myrelid, NoLock) != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot recursively add identity column to table that has child tables")));
+
+ pgclass = table_open(RelationRelationId, RowExclusiveLock);
+
+ reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(myrelid));
+ if (!HeapTupleIsValid(reltup))
+ elog(ERROR, "cache lookup failed for relation %u", myrelid);
+ relkind = ((Form_pg_class) GETSTRUCT(reltup))->relkind;
+
+ /* Determine the new attribute's number */
+ newattnum = ((Form_pg_class) GETSTRUCT(reltup))->relnatts + 1;
+ if (newattnum > MaxHeapAttributeNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_COLUMNS),
+ errmsg("tables can have at most %d columns",
+ MaxHeapAttributeNumber)));
+
+ typeTuple = typenameType(NULL, colDef->typeName, &typmod);
+ tform = (Form_pg_type) GETSTRUCT(typeTuple);
+ typeOid = tform->oid;
+
+ aclresult = pg_type_aclcheck(typeOid, GetUserId(), ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error_type(aclresult, typeOid);
+
+ collOid = GetColumnDefCollation(NULL, colDef, typeOid);
+
+ /* make sure datatype is legal for a column */
+ CheckAttributeType(colDef->colname, typeOid, collOid,
+ list_make1_oid(rel->rd_rel->reltype),
+ 0);
+
+ /*
+ * Construct new attribute's pg_attribute entry. (Variable-length fields
+ * are handled by InsertPgAttributeTuples().)
+ */
+ attribute.attrelid = myrelid;
+ namestrcpy(&(attribute.attname), colDef->colname);
+ attribute.atttypid = typeOid;
+ attribute.attstattarget = (newattnum > 0) ? -1 : 0;
+ attribute.attlen = tform->typlen;
+ attribute.attnum = newattnum;
+ attribute.attndims = list_length(colDef->typeName->arrayBounds);
+ attribute.atttypmod = typmod;
+ attribute.attbyval = tform->typbyval;
+ attribute.attalign = tform->typalign;
+ attribute.attstorage = tform->typstorage;
+ attribute.attcompression = GetAttributeCompression(typeOid,
+ colDef->compression);
+ attribute.attnotnull = colDef->is_not_null;
+ attribute.atthasdef = false;
+ attribute.atthasmissing = false;
+ attribute.attidentity = colDef->identity;
+ attribute.attgenerated = colDef->generated;
+ attribute.attisdropped = false;
+ attribute.attislocal = colDef->is_local;
+ attribute.attinhcount = colDef->inhcount;
+ attribute.attcollation = collOid;
+
+ ReleaseSysCache(typeTuple);
+
+ tupdesc = CreateTupleDesc(lengthof(aattr), (FormData_pg_attribute **) &aattr);
+
+ InsertPgAttributeTuples(attrdesc, tupdesc, myrelid, NULL, NULL);
+
+ table_close(attrdesc, RowExclusiveLock);
+
+ /*
+ * Update pg_class tuple as appropriate
+ */
+ ((Form_pg_class) GETSTRUCT(reltup))->relnatts = newattnum;
+
+ CatalogTupleUpdate(pgclass, &reltup->t_self, reltup);
+
+ heap_freetuple(reltup);
+
+ /* Post creation hook for new attribute */
+ InvokeObjectPostCreateHook(RelationRelationId, myrelid, newattnum);
+
+ table_close(pgclass, RowExclusiveLock);
+
+ /* Make the attribute's catalog entry visible */
+ CommandCounterIncrement();
+
+ /*
+ * Store the DEFAULT, if any, in the catalogs
+ */
+ if (colDef->raw_default)
+ {
+ RawColumnDefault *rawEnt;
+
+ rawEnt = (RawColumnDefault *) palloc(sizeof(RawColumnDefault));
+ rawEnt->attnum = attribute.attnum;
+ rawEnt->raw_default = copyObject(colDef->raw_default);
+
+ /*
+ * Attempt to skip a complete table rewrite by storing the specified
+ * DEFAULT value outside of the heap. This may be disabled inside
+ * AddRelationNewConstraints if the optimization cannot be applied.
+ */
+ rawEnt->missingMode = (!colDef->generated);
+
+ rawEnt->generated = colDef->generated;
+
+ /*
+ * This function is intended for CREATE TABLE, so it processes a
+ * _list_ of defaults, but we just do one.
+ */
+ AddRelationNewConstraints(rel, list_make1(rawEnt), NIL,
+ false, true, false, NULL);
+
+ /* Make the additional catalog changes visible */
+ CommandCounterIncrement();
+
+ /*
+ * Did the request for a missing value work? If not we'll have to do a
+ * rewrite
+ */
+ if (!rawEnt->missingMode)
+ tab->rewrite |= AT_REWRITE_DEFAULT_VAL;
+ }
+
+ /*
+ * Tell Phase 3 to fill in the default expression, if there is one.
+ *
+ * If there is no default, Phase 3 doesn't have to do anything, because
+ * that effectively means that the default is NULL. The heap tuple access
+ * routines always check for attnum > # of attributes in tuple, and return
+ * NULL if so, so without any modification of the tuple data we will get
+ * the effect of NULL values in the new column.
+ *
+ * An exception occurs when the new column is of a domain type: the domain
+ * might have a NOT NULL constraint, or a check constraint that indirectly
+ * rejects nulls. If there are any domain constraints then we construct
+ * an explicit NULL default value that will be passed through
+ * CoerceToDomain processing. (This is a tad inefficient, since it causes
+ * rewriting the table which we really don't have to do, but the present
+ * design of domain processing doesn't offer any simple way of checking
+ * the constraints more directly.)
+ *
+ * Note: we use build_column_default, and not just the cooked default
+ * returned by AddRelationNewConstraints, so that the right thing happens
+ * when a datatype's default applies.
+ *
+ * Note: it might seem that this should happen at the end of Phase 2, so
+ * that the effects of subsequent subcommands can be taken into account.
+ * It's intentional that we do it now, though. The new column should be
+ * filled according to what is said in the ADD COLUMN subcommand, so that
+ * the effects are the same as if this subcommand had been run by itself
+ * and the later subcommands had been issued in new ALTER TABLE commands.
+ *
+ * We can skip this entirely for relations without storage, since Phase 3
+ * is certainly not going to touch them. System attributes don't have
+ * interesting defaults, either.
+ */
+ if (RELKIND_HAS_STORAGE(relkind) && attribute.attnum > 0)
+ {
+ /*
+ * For an identity column, we can't use build_column_default(),
+ * because the sequence ownership isn't set yet. So do it manually.
+ */
+ if (colDef->identity)
+ {
+ NextValueExpr *nve = makeNode(NextValueExpr);
+
+ nve->seqid = RangeVarGetRelid(colDef->identitySequence, NoLock, false);
+ nve->typeId = typeOid;
+
+ defval = (Expr *) nve;
+
+ /* must do a rewrite for identity columns */
+ tab->rewrite |= AT_REWRITE_DEFAULT_VAL;
+ }
+ else
+ defval = (Expr *) build_column_default(rel, attribute.attnum);
+
+ if (!defval && DomainHasConstraints(typeOid))
+ {
+ Oid baseTypeId;
+ int32 baseTypeMod;
+ Oid baseTypeColl;
+
+ baseTypeMod = typmod;
+ baseTypeId = getBaseTypeAndTypmod(typeOid, &baseTypeMod);
+ baseTypeColl = get_typcollation(baseTypeId);
+ defval = (Expr *) makeNullConst(baseTypeId, baseTypeMod, baseTypeColl);
+ defval = (Expr *) coerce_to_target_type(NULL,
+ (Node *) defval,
+ baseTypeId,
+ typeOid,
+ typmod,
+ COERCION_ASSIGNMENT,
+ COERCE_IMPLICIT_CAST,
+ -1);
+ if (defval == NULL) /* should not happen */
+ elog(ERROR, "failed to coerce base type to domain");
+ }
+
+ if (defval)
+ {
+ NewColumnValue *newval;
+
+ newval = (NewColumnValue *) palloc0(sizeof(NewColumnValue));
+ newval->attnum = attribute.attnum;
+ newval->expr = expression_planner(defval);
+ newval->is_generated = (colDef->generated != '\0');
+
+ tab->newvals = lappend(tab->newvals, newval);
+ }
+
+ if (DomainHasConstraints(typeOid))
+ tab->rewrite |= AT_REWRITE_DEFAULT_VAL;
+
+ if (!TupleDescAttr(rel->rd_att, attribute.attnum - 1)->atthasmissing)
+ {
+ /*
+ * If the new column is NOT NULL, and there is no missing value,
+ * tell Phase 3 it needs to check for NULLs.
+ */
+ tab->verify_new_notnull |= colDef->is_not_null;
+ }
+ }
+
+ /*
+ * Add needed dependency entries for the new column.
+ */
+ add_column_datatype_dependency(myrelid, newattnum, attribute.atttypid);
+ add_column_collation_dependency(myrelid, newattnum, attribute.attcollation);
+
+ /*
+ * Propagate to children as appropriate. Unlike most other ALTER
+ * routines, we have to do this one level of recursion at a time; we can't
+ * use find_all_inheritors to do it in one pass.
+ */
+ children =
+ find_inheritance_children(RelationGetRelid(rel), lockmode);
+
+ /*
+ * If we are told not to recurse, there had better not be any child
+ * tables; else the addition would put them out of step.
+ */
+ if (children && !recurse)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("column must be added to child tables too")));
+
+ /* Children should see column as singly inherited */
+ if (!recursing)
+ {
+ childcmd = copyObject(*cmd);
+ colDef = castNode(ColumnDef, childcmd->def);
+ colDef->inhcount = 1;
+ colDef->is_local = false;
+ }
+ else
+ childcmd = *cmd; /* no need to copy again */
+
+ foreach(child, children)
+ {
+ Oid childrelid = lfirst_oid(child);
+ Relation childrel;
+ AlteredTableInfo *childtab;
+
+ /* find_inheritance_children already got lock */
+ childrel = table_open(childrelid, NoLock);
+ CheckTableNotInUse(childrel, "ALTER TABLE");
+
+ /* Find or create work queue entry for this table */
+ childtab = ATGetQueueEntry(wqueue, childrel);
+
+ /* Recurse to child; return value is ignored */
+ ATExecAddColumn(wqueue, childtab, childrel,
+ &childcmd, recurse, true,
+ lockmode, cur_pass, context);
+
+ table_close(childrel, NoLock);
+ }
+
+ ObjectAddressSubSet(address, RelationRelationId, myrelid, newattnum);
+ return address;
+}
+
+/*
+ * If a new or renamed column will collide with the name of an existing
+ * column and if_not_exists is false then error out, else do nothing.
+ */
+static bool
+check_for_column_name_collision(Relation rel, const char *colname,
+ bool if_not_exists)
+{
+ HeapTuple attTuple;
+ int attnum;
+
+ /*
+ * this test is deliberately not attisdropped-aware, since if one tries to
+ * add a column matching a dropped column name, it's gonna fail anyway.
+ */
+ attTuple = SearchSysCache2(ATTNAME,
+ ObjectIdGetDatum(RelationGetRelid(rel)),
+ PointerGetDatum(colname));
+ if (!HeapTupleIsValid(attTuple))
+ return true;
+
+ attnum = ((Form_pg_attribute) GETSTRUCT(attTuple))->attnum;
+ ReleaseSysCache(attTuple);
+
+ /*
+ * We throw a different error message for conflicts with system column
+ * names, since they are normally not shown and the user might otherwise
+ * be confused about the reason for the conflict.
+ */
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_COLUMN),
+ errmsg("column name \"%s\" conflicts with a system column name",
+ colname)));
+ else
+ {
+ if (if_not_exists)
+ {
+ ereport(NOTICE,
+ (errcode(ERRCODE_DUPLICATE_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" already exists, skipping",
+ colname, RelationGetRelationName(rel))));
+ return false;
+ }
+
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" already exists",
+ colname, RelationGetRelationName(rel))));
+ }
+
+ return true;
+}
+
+/*
+ * Install a column's dependency on its datatype.
+ */
+static void
+add_column_datatype_dependency(Oid relid, int32 attnum, Oid typid)
+{
+ ObjectAddress myself,
+ referenced;
+
+ myself.classId = RelationRelationId;
+ myself.objectId = relid;
+ myself.objectSubId = attnum;
+ referenced.classId = TypeRelationId;
+ referenced.objectId = typid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+}
+
+/*
+ * Install a column's dependency on its collation.
+ */
+static void
+add_column_collation_dependency(Oid relid, int32 attnum, Oid collid)
+{
+ ObjectAddress myself,
+ referenced;
+
+ /* We know the default collation is pinned, so don't bother recording it */
+ if (OidIsValid(collid) && collid != DEFAULT_COLLATION_OID)
+ {
+ myself.classId = RelationRelationId;
+ myself.objectId = relid;
+ myself.objectSubId = attnum;
+ referenced.classId = CollationRelationId;
+ referenced.objectId = collid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+ }
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN DROP NOT NULL
+ */
+
+static void
+ATPrepDropNotNull(Relation rel, bool recurse, bool recursing)
+{
+ /*
+ * If the parent is a partitioned table, like check constraints, we do not
+ * support removing the NOT NULL while partitions exist.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ PartitionDesc partdesc = RelationGetPartitionDesc(rel, true);
+
+ Assert(partdesc != NULL);
+ if (partdesc->nparts > 0 && !recurse && !recursing)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot remove constraint from only the partitioned table when partitions exist"),
+ errhint("Do not specify the ONLY keyword.")));
+ }
+}
+
+/*
+ * Return the address of the modified column. If the column was already
+ * nullable, InvalidObjectAddress is returned.
+ */
+static ObjectAddress
+ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode)
+{
+ HeapTuple tuple;
+ Form_pg_attribute attTup;
+ AttrNumber attnum;
+ Relation attr_rel;
+ List *indexoidlist;
+ ListCell *indexoidscan;
+ ObjectAddress address;
+
+ /*
+ * lookup the attribute
+ */
+ attr_rel = table_open(AttributeRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(rel))));
+ attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+ attnum = attTup->attnum;
+
+ /* Prevent them from altering a system attribute */
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter system column \"%s\"",
+ colName)));
+
+ if (attTup->attidentity)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("column \"%s\" of relation \"%s\" is an identity column",
+ colName, RelationGetRelationName(rel))));
+
+ /*
+ * Check that the attribute is not in a primary key or in an index used as
+ * a replica identity.
+ *
+ * Note: we'll throw error even if the pkey index is not valid.
+ */
+
+ /* Loop over all indexes on the relation */
+ indexoidlist = RelationGetIndexList(rel);
+
+ foreach(indexoidscan, indexoidlist)
+ {
+ Oid indexoid = lfirst_oid(indexoidscan);
+ HeapTuple indexTuple;
+ Form_pg_index indexStruct;
+ int i;
+
+ indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
+ if (!HeapTupleIsValid(indexTuple))
+ elog(ERROR, "cache lookup failed for index %u", indexoid);
+ indexStruct = (Form_pg_index) GETSTRUCT(indexTuple);
+
+ /*
+ * If the index is not a primary key or an index used as replica
+ * identity, skip the check.
+ */
+ if (indexStruct->indisprimary || indexStruct->indisreplident)
+ {
+ /*
+ * Loop over each attribute in the primary key or the index used
+ * as replica identity and see if it matches the to-be-altered
+ * attribute.
+ */
+ for (i = 0; i < indexStruct->indnkeyatts; i++)
+ {
+ if (indexStruct->indkey.values[i] == attnum)
+ {
+ if (indexStruct->indisprimary)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("column \"%s\" is in a primary key",
+ colName)));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("column \"%s\" is in index used as replica identity",
+ colName)));
+ }
+ }
+ }
+
+ ReleaseSysCache(indexTuple);
+ }
+
+ list_free(indexoidlist);
+
+ /* If rel is partition, shouldn't drop NOT NULL if parent has the same */
+ if (rel->rd_rel->relispartition)
+ {
+ Oid parentId = get_partition_parent(RelationGetRelid(rel), false);
+ Relation parent = table_open(parentId, AccessShareLock);
+ TupleDesc tupDesc = RelationGetDescr(parent);
+ AttrNumber parent_attnum;
+
+ parent_attnum = get_attnum(parentId, colName);
+ if (TupleDescAttr(tupDesc, parent_attnum - 1)->attnotnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("column \"%s\" is marked NOT NULL in parent table",
+ colName)));
+ table_close(parent, AccessShareLock);
+ }
+
+ /*
+ * Okay, actually perform the catalog change ... if needed
+ */
+ if (attTup->attnotnull)
+ {
+ attTup->attnotnull = false;
+
+ CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple);
+
+ ObjectAddressSubSet(address, RelationRelationId,
+ RelationGetRelid(rel), attnum);
+ }
+ else
+ address = InvalidObjectAddress;
+
+ InvokeObjectPostAlterHook(RelationRelationId,
+ RelationGetRelid(rel), attnum);
+
+ table_close(attr_rel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET NOT NULL
+ */
+
+static void
+ATPrepSetNotNull(List **wqueue, Relation rel,
+ AlterTableCmd *cmd, bool recurse, bool recursing,
+ LOCKMODE lockmode, AlterTableUtilityContext *context)
+{
+ /*
+ * If we're already recursing, there's nothing to do; the topmost
+ * invocation of ATSimpleRecursion already visited all children.
+ */
+ if (recursing)
+ return;
+
+ /*
+ * If the target column is already marked NOT NULL, we can skip recursing
+ * to children, because their columns should already be marked NOT NULL as
+ * well. But there's no point in checking here unless the relation has
+ * some children; else we can just wait till execution to check. (If it
+ * does have children, however, this can save taking per-child locks
+ * unnecessarily. This greatly improves concurrency in some parallel
+ * restore scenarios.)
+ *
+ * Unfortunately, we can only apply this optimization to partitioned
+ * tables, because traditional inheritance doesn't enforce that child
+ * columns be NOT NULL when their parent is. (That's a bug that should
+ * get fixed someday.)
+ */
+ if (rel->rd_rel->relhassubclass &&
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ HeapTuple tuple;
+ bool attnotnull;
+
+ tuple = SearchSysCacheAttName(RelationGetRelid(rel), cmd->name);
+
+ /* Might as well throw the error now, if name is bad */
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ cmd->name, RelationGetRelationName(rel))));
+
+ attnotnull = ((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull;
+ ReleaseSysCache(tuple);
+ if (attnotnull)
+ return;
+ }
+
+ /*
+ * If we have ALTER TABLE ONLY ... SET NOT NULL on a partitioned table,
+ * apply ALTER TABLE ... CHECK NOT NULL to every child. Otherwise, use
+ * normal recursion logic.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+ !recurse)
+ {
+ AlterTableCmd *newcmd = makeNode(AlterTableCmd);
+
+ newcmd->subtype = AT_CheckNotNull;
+ newcmd->name = pstrdup(cmd->name);
+ ATSimpleRecursion(wqueue, rel, newcmd, true, lockmode, context);
+ }
+ else
+ ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context);
+}
+
+/*
+ * Return the address of the modified column. If the column was already NOT
+ * NULL, InvalidObjectAddress is returned.
+ */
+static ObjectAddress
+ATExecSetNotNull(AlteredTableInfo *tab, Relation rel,
+ const char *colName, LOCKMODE lockmode)
+{
+ HeapTuple tuple;
+ AttrNumber attnum;
+ Relation attr_rel;
+ ObjectAddress address;
+
+ /*
+ * lookup the attribute
+ */
+ attr_rel = table_open(AttributeRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(rel))));
+
+ attnum = ((Form_pg_attribute) GETSTRUCT(tuple))->attnum;
+
+ /* Prevent them from altering a system attribute */
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter system column \"%s\"",
+ colName)));
+
+ /*
+ * Okay, actually perform the catalog change ... if needed
+ */
+ if (!((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull)
+ {
+ ((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull = true;
+
+ CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple);
+
+ /*
+ * Ordinarily phase 3 must ensure that no NULLs exist in columns that
+ * are set NOT NULL; however, if we can find a constraint which proves
+ * this then we can skip that. We needn't bother looking if we've
+ * already found that we must verify some other NOT NULL constraint.
+ */
+ if (!tab->verify_new_notnull &&
+ !NotNullImpliedByRelConstraints(rel, (Form_pg_attribute) GETSTRUCT(tuple)))
+ {
+ /* Tell Phase 3 it needs to test the constraint */
+ tab->verify_new_notnull = true;
+ }
+
+ ObjectAddressSubSet(address, RelationRelationId,
+ RelationGetRelid(rel), attnum);
+ }
+ else
+ address = InvalidObjectAddress;
+
+ InvokeObjectPostAlterHook(RelationRelationId,
+ RelationGetRelid(rel), attnum);
+
+ table_close(attr_rel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN CHECK NOT NULL
+ *
+ * This doesn't exist in the grammar, but we generate AT_CheckNotNull
+ * commands against the partitions of a partitioned table if the user
+ * writes ALTER TABLE ONLY ... SET NOT NULL on the partitioned table,
+ * or tries to create a primary key on it (which internally creates
+ * AT_SetNotNull on the partitioned table). Such a command doesn't
+ * allow us to actually modify any partition, but we want to let it
+ * go through if the partitions are already properly marked.
+ *
+ * In future, this might need to adjust the child table's state, likely
+ * by incrementing an inheritance count for the attnotnull constraint.
+ * For now we need only check for the presence of the flag.
+ */
+static void
+ATExecCheckNotNull(AlteredTableInfo *tab, Relation rel,
+ const char *colName, LOCKMODE lockmode)
+{
+ HeapTuple tuple;
+
+ tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName);
+
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(rel))));
+
+ if (!((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("constraint must be added to child tables too"),
+ errdetail("Column \"%s\" of relation \"%s\" is not already NOT NULL.",
+ colName, RelationGetRelationName(rel)),
+ errhint("Do not specify the ONLY keyword.")));
+
+ ReleaseSysCache(tuple);
+}
+
+/*
+ * NotNullImpliedByRelConstraints
+ * Does rel's existing constraints imply NOT NULL for the given attribute?
+ */
+static bool
+NotNullImpliedByRelConstraints(Relation rel, Form_pg_attribute attr)
+{
+ NullTest *nnulltest = makeNode(NullTest);
+
+ nnulltest->arg = (Expr *) makeVar(1,
+ attr->attnum,
+ attr->atttypid,
+ attr->atttypmod,
+ attr->attcollation,
+ 0);
+ nnulltest->nulltesttype = IS_NOT_NULL;
+
+ /*
+ * argisrow = false is correct even for a composite column, because
+ * attnotnull does not represent a SQL-spec IS NOT NULL test in such a
+ * case, just IS DISTINCT FROM NULL.
+ */
+ nnulltest->argisrow = false;
+ nnulltest->location = -1;
+
+ if (ConstraintImpliedByRelConstraint(rel, list_make1(nnulltest), NIL))
+ {
+ ereport(DEBUG1,
+ (errmsg_internal("existing constraints on column \"%s.%s\" are sufficient to prove that it does not contain nulls",
+ RelationGetRelationName(rel), NameStr(attr->attname))));
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET/DROP DEFAULT
+ *
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecColumnDefault(Relation rel, const char *colName,
+ Node *newDefault, LOCKMODE lockmode)
+{
+ TupleDesc tupdesc = RelationGetDescr(rel);
+ AttrNumber attnum;
+ ObjectAddress address;
+
+ /*
+ * get the number of the attribute
+ */
+ attnum = get_attnum(RelationGetRelid(rel), colName);
+ if (attnum == InvalidAttrNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(rel))));
+
+ /* Prevent them from altering a system attribute */
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter system column \"%s\"",
+ colName)));
+
+ if (TupleDescAttr(tupdesc, attnum - 1)->attidentity)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("column \"%s\" of relation \"%s\" is an identity column",
+ colName, RelationGetRelationName(rel)),
+ newDefault ? 0 : errhint("Use ALTER TABLE ... ALTER COLUMN ... DROP IDENTITY instead.")));
+
+ if (TupleDescAttr(tupdesc, attnum - 1)->attgenerated)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("column \"%s\" of relation \"%s\" is a generated column",
+ colName, RelationGetRelationName(rel)),
+ newDefault || TupleDescAttr(tupdesc, attnum - 1)->attgenerated != ATTRIBUTE_GENERATED_STORED ? 0 :
+ errhint("Use ALTER TABLE ... ALTER COLUMN ... DROP EXPRESSION instead.")));
+
+ /*
+ * Remove any old default for the column. We use RESTRICT here for
+ * safety, but at present we do not expect anything to depend on the
+ * default.
+ *
+ * We treat removing the existing default as an internal operation when it
+ * is preparatory to adding a new default, but as a user-initiated
+ * operation when the user asked for a drop.
+ */
+ RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT, false,
+ newDefault != NULL);
+
+ if (newDefault)
+ {
+ /* SET DEFAULT */
+ RawColumnDefault *rawEnt;
+
+ rawEnt = (RawColumnDefault *) palloc(sizeof(RawColumnDefault));
+ rawEnt->attnum = attnum;
+ rawEnt->raw_default = newDefault;
+ rawEnt->missingMode = false;
+ rawEnt->generated = '\0';
+
+ /*
+ * This function is intended for CREATE TABLE, so it processes a
+ * _list_ of defaults, but we just do one.
+ */
+ AddRelationNewConstraints(rel, list_make1(rawEnt), NIL,
+ false, true, false, NULL);
+ }
+
+ ObjectAddressSubSet(address, RelationRelationId,
+ RelationGetRelid(rel), attnum);
+ return address;
+}
+
+/*
+ * Add a pre-cooked default expression.
+ *
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecCookedColumnDefault(Relation rel, AttrNumber attnum,
+ Node *newDefault)
+{
+ ObjectAddress address;
+
+ /* We assume no checking is required */
+
+ /*
+ * Remove any old default for the column. We use RESTRICT here for
+ * safety, but at present we do not expect anything to depend on the
+ * default. (In ordinary cases, there could not be a default in place
+ * anyway, but it's possible when combining LIKE with inheritance.)
+ */
+ RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT, false,
+ true);
+
+ (void) StoreAttrDefault(rel, attnum, newDefault, true, false);
+
+ ObjectAddressSubSet(address, RelationRelationId,
+ RelationGetRelid(rel), attnum);
+ return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN ADD IDENTITY
+ *
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecAddIdentity(Relation rel, const char *colName,
+ Node *def, LOCKMODE lockmode)
+{
+ Relation attrelation;
+ HeapTuple tuple;
+ Form_pg_attribute attTup;
+ AttrNumber attnum;
+ ObjectAddress address;
+ ColumnDef *cdef = castNode(ColumnDef, def);
+
+ attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(rel))));
+ attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+ attnum = attTup->attnum;
+
+ /* Can't alter a system attribute */
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter system column \"%s\"",
+ colName)));
+
+ /*
+ * Creating a column as identity implies NOT NULL, so adding the identity
+ * to an existing column that is not NOT NULL would create a state that
+ * cannot be reproduced without contortions.
+ */
+ if (!attTup->attnotnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("column \"%s\" of relation \"%s\" must be declared NOT NULL before identity can be added",
+ colName, RelationGetRelationName(rel))));
+
+ if (attTup->attidentity)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("column \"%s\" of relation \"%s\" is already an identity column",
+ colName, RelationGetRelationName(rel))));
+
+ if (attTup->atthasdef)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("column \"%s\" of relation \"%s\" already has a default value",
+ colName, RelationGetRelationName(rel))));
+
+ attTup->attidentity = cdef->identity;
+ CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+ InvokeObjectPostAlterHook(RelationRelationId,
+ RelationGetRelid(rel),
+ attTup->attnum);
+ ObjectAddressSubSet(address, RelationRelationId,
+ RelationGetRelid(rel), attnum);
+ heap_freetuple(tuple);
+
+ table_close(attrelation, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET { GENERATED or sequence options }
+ *
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecSetIdentity(Relation rel, const char *colName, Node *def, LOCKMODE lockmode)
+{
+ ListCell *option;
+ DefElem *generatedEl = NULL;
+ HeapTuple tuple;
+ Form_pg_attribute attTup;
+ AttrNumber attnum;
+ Relation attrelation;
+ ObjectAddress address;
+
+ foreach(option, castNode(List, def))
+ {
+ DefElem *defel = lfirst_node(DefElem, option);
+
+ if (strcmp(defel->defname, "generated") == 0)
+ {
+ if (generatedEl)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+ generatedEl = defel;
+ }
+ else
+ elog(ERROR, "option \"%s\" not recognized",
+ defel->defname);
+ }
+
+ /*
+ * Even if there is nothing to change here, we run all the checks. There
+ * will be a subsequent ALTER SEQUENCE that relies on everything being
+ * there.
+ */
+
+ attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(rel))));
+
+ attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+ attnum = attTup->attnum;
+
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter system column \"%s\"",
+ colName)));
+
+ if (!attTup->attidentity)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("column \"%s\" of relation \"%s\" is not an identity column",
+ colName, RelationGetRelationName(rel))));
+
+ if (generatedEl)
+ {
+ attTup->attidentity = defGetInt32(generatedEl);
+ CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+ InvokeObjectPostAlterHook(RelationRelationId,
+ RelationGetRelid(rel),
+ attTup->attnum);
+ ObjectAddressSubSet(address, RelationRelationId,
+ RelationGetRelid(rel), attnum);
+ }
+ else
+ address = InvalidObjectAddress;
+
+ heap_freetuple(tuple);
+ table_close(attrelation, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN DROP IDENTITY
+ *
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecDropIdentity(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode)
+{
+ HeapTuple tuple;
+ Form_pg_attribute attTup;
+ AttrNumber attnum;
+ Relation attrelation;
+ ObjectAddress address;
+ Oid seqid;
+ ObjectAddress seqaddress;
+
+ attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(rel))));
+
+ attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+ attnum = attTup->attnum;
+
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter system column \"%s\"",
+ colName)));
+
+ if (!attTup->attidentity)
+ {
+ if (!missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("column \"%s\" of relation \"%s\" is not an identity column",
+ colName, RelationGetRelationName(rel))));
+ else
+ {
+ ereport(NOTICE,
+ (errmsg("column \"%s\" of relation \"%s\" is not an identity column, skipping",
+ colName, RelationGetRelationName(rel))));
+ heap_freetuple(tuple);
+ table_close(attrelation, RowExclusiveLock);
+ return InvalidObjectAddress;
+ }
+ }
+
+ attTup->attidentity = '\0';
+ CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+ InvokeObjectPostAlterHook(RelationRelationId,
+ RelationGetRelid(rel),
+ attTup->attnum);
+ ObjectAddressSubSet(address, RelationRelationId,
+ RelationGetRelid(rel), attnum);
+ heap_freetuple(tuple);
+
+ table_close(attrelation, RowExclusiveLock);
+
+ /* drop the internal sequence */
+ seqid = getIdentitySequence(RelationGetRelid(rel), attnum, false);
+ deleteDependencyRecordsForClass(RelationRelationId, seqid,
+ RelationRelationId, DEPENDENCY_INTERNAL);
+ CommandCounterIncrement();
+ seqaddress.classId = RelationRelationId;
+ seqaddress.objectId = seqid;
+ seqaddress.objectSubId = 0;
+ performDeletion(&seqaddress, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
+
+ return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN DROP EXPRESSION
+ */
+static void
+ATPrepDropExpression(Relation rel, AlterTableCmd *cmd, bool recurse, bool recursing, LOCKMODE lockmode)
+{
+ /*
+ * Reject ONLY if there are child tables. We could implement this, but it
+ * is a bit complicated. GENERATED clauses must be attached to the column
+ * definition and cannot be added later like DEFAULT, so if a child table
+ * has a generation expression that the parent does not have, the child
+ * column will necessarily be an attlocal column. So to implement ONLY
+ * here, we'd need extra code to update attislocal of the direct child
+ * tables, somewhat similar to how DROP COLUMN does it, so that the
+ * resulting state can be properly dumped and restored.
+ */
+ if (!recurse &&
+ find_inheritance_children(RelationGetRelid(rel), lockmode))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ALTER TABLE / DROP EXPRESSION must be applied to child tables too")));
+
+ /*
+ * Cannot drop generation expression from inherited columns.
+ */
+ if (!recursing)
+ {
+ HeapTuple tuple;
+ Form_pg_attribute attTup;
+
+ tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), cmd->name);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ cmd->name, RelationGetRelationName(rel))));
+
+ attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+
+ if (attTup->attinhcount > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot drop generation expression from inherited column")));
+ }
+}
+
+/*
+ * Return the address of the affected column.
+ */
+static ObjectAddress
+ATExecDropExpression(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode)
+{
+ HeapTuple tuple;
+ Form_pg_attribute attTup;
+ AttrNumber attnum;
+ Relation attrelation;
+ Oid attrdefoid;
+ ObjectAddress address;
+
+ attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(rel))));
+
+ attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+ attnum = attTup->attnum;
+
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter system column \"%s\"",
+ colName)));
+
+ if (attTup->attgenerated != ATTRIBUTE_GENERATED_STORED)
+ {
+ if (!missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("column \"%s\" of relation \"%s\" is not a stored generated column",
+ colName, RelationGetRelationName(rel))));
+ else
+ {
+ ereport(NOTICE,
+ (errmsg("column \"%s\" of relation \"%s\" is not a stored generated column, skipping",
+ colName, RelationGetRelationName(rel))));
+ heap_freetuple(tuple);
+ table_close(attrelation, RowExclusiveLock);
+ return InvalidObjectAddress;
+ }
+ }
+
+ /*
+ * Mark the column as no longer generated. (The atthasdef flag needs to
+ * get cleared too, but RemoveAttrDefault will handle that.)
+ */
+ attTup->attgenerated = '\0';
+ CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+ InvokeObjectPostAlterHook(RelationRelationId,
+ RelationGetRelid(rel),
+ attnum);
+ heap_freetuple(tuple);
+
+ table_close(attrelation, RowExclusiveLock);
+
+ /*
+ * Drop the dependency records of the GENERATED expression, in particular
+ * its INTERNAL dependency on the column, which would otherwise cause
+ * dependency.c to refuse to perform the deletion.
+ */
+ attrdefoid = GetAttrDefaultOid(RelationGetRelid(rel), attnum);
+ if (!OidIsValid(attrdefoid))
+ elog(ERROR, "could not find attrdef tuple for relation %u attnum %d",
+ RelationGetRelid(rel), attnum);
+ (void) deleteDependencyRecordsFor(AttrDefaultRelationId, attrdefoid, false);
+
+ /* Make above changes visible */
+ CommandCounterIncrement();
+
+ /*
+ * Get rid of the GENERATED expression itself. We use RESTRICT here for
+ * safety, but at present we do not expect anything to depend on the
+ * default.
+ */
+ RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT,
+ false, false);
+
+ ObjectAddressSubSet(address, RelationRelationId,
+ RelationGetRelid(rel), attnum);
+ return address;
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET STATISTICS
+ *
+ * Return value is the address of the modified column
+ */
+static ObjectAddress
+ATExecSetStatistics(Relation rel, const char *colName, int16 colNum, Node *newValue, LOCKMODE lockmode)
+{
+ int newtarget;
+ Relation attrelation;
+ HeapTuple tuple;
+ Form_pg_attribute attrtuple;
+ AttrNumber attnum;
+ ObjectAddress address;
+
+ /*
+ * We allow referencing columns by numbers only for indexes, since table
+ * column numbers could contain gaps if columns are later dropped.
+ */
+ if (rel->rd_rel->relkind != RELKIND_INDEX &&
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX &&
+ !colName)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot refer to non-index column by number")));
+
+ Assert(IsA(newValue, Integer));
+ newtarget = intVal(newValue);
+
+ /*
+ * Limit target to a sane range
+ */
+ if (newtarget < -1)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("statistics target %d is too low",
+ newtarget)));
+ }
+ else if (newtarget > 10000)
+ {
+ newtarget = 10000;
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("lowering statistics target to %d",
+ newtarget)));
+ }
+
+ attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+ if (colName)
+ {
+ tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(rel))));
+ }
+ else
+ {
+ tuple = SearchSysCacheCopyAttNum(RelationGetRelid(rel), colNum);
+
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column number %d of relation \"%s\" does not exist",
+ colNum, RelationGetRelationName(rel))));
+ }
+
+ attrtuple = (Form_pg_attribute) GETSTRUCT(tuple);
+
+ attnum = attrtuple->attnum;
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter system column \"%s\"",
+ colName)));
+
+ if (rel->rd_rel->relkind == RELKIND_INDEX ||
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
+ {
+ if (attnum > rel->rd_index->indnkeyatts)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter statistics on included column \"%s\" of index \"%s\"",
+ NameStr(attrtuple->attname), RelationGetRelationName(rel))));
+ else if (rel->rd_index->indkey.values[attnum - 1] != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter statistics on non-expression column \"%s\" of index \"%s\"",
+ NameStr(attrtuple->attname), RelationGetRelationName(rel)),
+ errhint("Alter statistics on table column instead.")));
+ }
+
+ attrtuple->attstattarget = newtarget;
+
+ CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+ InvokeObjectPostAlterHook(RelationRelationId,
+ RelationGetRelid(rel),
+ attrtuple->attnum);
+ ObjectAddressSubSet(address, RelationRelationId,
+ RelationGetRelid(rel), attnum);
+ heap_freetuple(tuple);
+
+ table_close(attrelation, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * Return value is the address of the modified column
+ */
+static ObjectAddress
+ATExecSetOptions(Relation rel, const char *colName, Node *options,
+ bool isReset, LOCKMODE lockmode)
+{
+ Relation attrelation;
+ HeapTuple tuple,
+ newtuple;
+ Form_pg_attribute attrtuple;
+ AttrNumber attnum;
+ Datum datum,
+ newOptions;
+ bool isnull;
+ ObjectAddress address;
+ Datum repl_val[Natts_pg_attribute];
+ bool repl_null[Natts_pg_attribute];
+ bool repl_repl[Natts_pg_attribute];
+
+ attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName);
+
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(rel))));
+ attrtuple = (Form_pg_attribute) GETSTRUCT(tuple);
+
+ attnum = attrtuple->attnum;
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter system column \"%s\"",
+ colName)));
+
+ /* Generate new proposed attoptions (text array) */
+ datum = SysCacheGetAttr(ATTNAME, tuple, Anum_pg_attribute_attoptions,
+ &isnull);
+ newOptions = transformRelOptions(isnull ? (Datum) 0 : datum,
+ castNode(List, options), NULL, NULL,
+ false, isReset);
+ /* Validate new options */
+ (void) attribute_reloptions(newOptions, true);
+
+ /* Build new tuple. */
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+ if (newOptions != (Datum) 0)
+ repl_val[Anum_pg_attribute_attoptions - 1] = newOptions;
+ else
+ repl_null[Anum_pg_attribute_attoptions - 1] = true;
+ repl_repl[Anum_pg_attribute_attoptions - 1] = true;
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(attrelation),
+ repl_val, repl_null, repl_repl);
+
+ /* Update system catalog. */
+ CatalogTupleUpdate(attrelation, &newtuple->t_self, newtuple);
+
+ InvokeObjectPostAlterHook(RelationRelationId,
+ RelationGetRelid(rel),
+ attrtuple->attnum);
+ ObjectAddressSubSet(address, RelationRelationId,
+ RelationGetRelid(rel), attnum);
+
+ heap_freetuple(newtuple);
+
+ ReleaseSysCache(tuple);
+
+ table_close(attrelation, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * Helper function for ATExecSetStorage and ATExecSetCompression
+ *
+ * Set the attstorage and/or attcompression fields for index columns
+ * associated with the specified table column.
+ */
+static void
+SetIndexStorageProperties(Relation rel, Relation attrelation,
+ AttrNumber attnum,
+ bool setstorage, char newstorage,
+ bool setcompression, char newcompression,
+ LOCKMODE lockmode)
+{
+ ListCell *lc;
+
+ foreach(lc, RelationGetIndexList(rel))
+ {
+ Oid indexoid = lfirst_oid(lc);
+ Relation indrel;
+ AttrNumber indattnum = 0;
+ HeapTuple tuple;
+
+ indrel = index_open(indexoid, lockmode);
+
+ for (int i = 0; i < indrel->rd_index->indnatts; i++)
+ {
+ if (indrel->rd_index->indkey.values[i] == attnum)
+ {
+ indattnum = i + 1;
+ break;
+ }
+ }
+
+ if (indattnum == 0)
+ {
+ index_close(indrel, lockmode);
+ continue;
+ }
+
+ tuple = SearchSysCacheCopyAttNum(RelationGetRelid(indrel), indattnum);
+
+ if (HeapTupleIsValid(tuple))
+ {
+ Form_pg_attribute attrtuple = (Form_pg_attribute) GETSTRUCT(tuple);
+
+ if (setstorage)
+ attrtuple->attstorage = newstorage;
+
+ if (setcompression)
+ attrtuple->attcompression = newcompression;
+
+ CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+ InvokeObjectPostAlterHook(RelationRelationId,
+ RelationGetRelid(rel),
+ attrtuple->attnum);
+
+ heap_freetuple(tuple);
+ }
+
+ index_close(indrel, lockmode);
+ }
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET STORAGE
+ *
+ * Return value is the address of the modified column
+ */
+static ObjectAddress
+ATExecSetStorage(Relation rel, const char *colName, Node *newValue, LOCKMODE lockmode)
+{
+ char *storagemode;
+ char newstorage;
+ Relation attrelation;
+ HeapTuple tuple;
+ Form_pg_attribute attrtuple;
+ AttrNumber attnum;
+ ObjectAddress address;
+
+ Assert(IsA(newValue, String));
+ storagemode = strVal(newValue);
+
+ if (pg_strcasecmp(storagemode, "plain") == 0)
+ newstorage = TYPSTORAGE_PLAIN;
+ else if (pg_strcasecmp(storagemode, "external") == 0)
+ newstorage = TYPSTORAGE_EXTERNAL;
+ else if (pg_strcasecmp(storagemode, "extended") == 0)
+ newstorage = TYPSTORAGE_EXTENDED;
+ else if (pg_strcasecmp(storagemode, "main") == 0)
+ newstorage = TYPSTORAGE_MAIN;
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid storage type \"%s\"",
+ storagemode)));
+ newstorage = 0; /* keep compiler quiet */
+ }
+
+ attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(rel))));
+ attrtuple = (Form_pg_attribute) GETSTRUCT(tuple);
+
+ attnum = attrtuple->attnum;
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter system column \"%s\"",
+ colName)));
+
+ /*
+ * safety check: do not allow toasted storage modes unless column datatype
+ * is TOAST-aware.
+ */
+ if (newstorage == TYPSTORAGE_PLAIN || TypeIsToastable(attrtuple->atttypid))
+ attrtuple->attstorage = newstorage;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("column data type %s can only have storage PLAIN",
+ format_type_be(attrtuple->atttypid))));
+
+ CatalogTupleUpdate(attrelation, &tuple->t_self, tuple);
+
+ InvokeObjectPostAlterHook(RelationRelationId,
+ RelationGetRelid(rel),
+ attrtuple->attnum);
+
+ heap_freetuple(tuple);
+
+ /*
+ * Apply the change to indexes as well (only for simple index columns,
+ * matching behavior of index.c ConstructTupleDescriptor()).
+ */
+ SetIndexStorageProperties(rel, attrelation, attnum,
+ true, newstorage,
+ false, 0,
+ lockmode);
+
+ table_close(attrelation, RowExclusiveLock);
+
+ ObjectAddressSubSet(address, RelationRelationId,
+ RelationGetRelid(rel), attnum);
+ return address;
+}
+
+
+/*
+ * ALTER TABLE DROP COLUMN
+ *
+ * DROP COLUMN cannot use the normal ALTER TABLE recursion mechanism,
+ * because we have to decide at runtime whether to recurse or not depending
+ * on whether attinhcount goes to zero or not. (We can't check this in a
+ * static pre-pass because it won't handle multiple inheritance situations
+ * correctly.)
+ */
+static void
+ATPrepDropColumn(List **wqueue, Relation rel, bool recurse, bool recursing,
+ AlterTableCmd *cmd, LOCKMODE lockmode,
+ AlterTableUtilityContext *context)
+{
+ if (rel->rd_rel->reloftype && !recursing)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot drop column from typed table")));
+
+ if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+ ATTypedTableRecursion(wqueue, rel, cmd, lockmode, context);
+
+ if (recurse)
+ cmd->subtype = AT_DropColumnRecurse;
+}
+
+/*
+ * Drops column 'colName' from relation 'rel' and returns the address of the
+ * dropped column. The column is also dropped (or marked as no longer
+ * inherited from relation) from the relation's inheritance children, if any.
+ *
+ * In the recursive invocations for inheritance child relations, instead of
+ * dropping the column directly (if to be dropped at all), its object address
+ * is added to 'addrs', which must be non-NULL in such invocations. All
+ * columns are dropped at the same time after all the children have been
+ * checked recursively.
+ */
+static ObjectAddress
+ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
+ DropBehavior behavior,
+ bool recurse, bool recursing,
+ bool missing_ok, LOCKMODE lockmode,
+ ObjectAddresses *addrs)
+{
+ HeapTuple tuple;
+ Form_pg_attribute targetatt;
+ AttrNumber attnum;
+ List *children;
+ ObjectAddress object;
+ bool is_expr;
+
+ /* At top level, permission check was done in ATPrepCmd, else do it */
+ if (recursing)
+ ATSimplePermissions(AT_DropColumn, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+ /* Initialize addrs on the first invocation */
+ Assert(!recursing || addrs != NULL);
+ if (!recursing)
+ addrs = new_object_addresses();
+
+ /*
+ * get the number of the attribute
+ */
+ tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName);
+ if (!HeapTupleIsValid(tuple))
+ {
+ if (!missing_ok)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(rel))));
+ }
+ else
+ {
+ ereport(NOTICE,
+ (errmsg("column \"%s\" of relation \"%s\" does not exist, skipping",
+ colName, RelationGetRelationName(rel))));
+ return InvalidObjectAddress;
+ }
+ }
+ targetatt = (Form_pg_attribute) GETSTRUCT(tuple);
+
+ attnum = targetatt->attnum;
+
+ /* Can't drop a system attribute */
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot drop system column \"%s\"",
+ colName)));
+
+ /*
+ * Don't drop inherited columns, unless recursing (presumably from a drop
+ * of the parent column)
+ */
+ if (targetatt->attinhcount > 0 && !recursing)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot drop inherited column \"%s\"",
+ colName)));
+
+ /*
+ * Don't drop columns used in the partition key, either. (If we let this
+ * go through, the key column's dependencies would cause a cascaded drop
+ * of the whole table, which is surely not what the user expected.)
+ */
+ if (has_partition_attrs(rel,
+ bms_make_singleton(attnum - FirstLowInvalidHeapAttributeNumber),
+ &is_expr))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot drop column \"%s\" because it is part of the partition key of relation \"%s\"",
+ colName, RelationGetRelationName(rel))));
+
+ ReleaseSysCache(tuple);
+
+ /*
+ * Propagate to children as appropriate. Unlike most other ALTER
+ * routines, we have to do this one level of recursion at a time; we can't
+ * use find_all_inheritors to do it in one pass.
+ */
+ children =
+ find_inheritance_children(RelationGetRelid(rel), lockmode);
+
+ if (children)
+ {
+ Relation attr_rel;
+ ListCell *child;
+
+ /*
+ * In case of a partitioned table, the column must be dropped from the
+ * partitions as well.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && !recurse)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot drop column from only the partitioned table when partitions exist"),
+ errhint("Do not specify the ONLY keyword.")));
+
+ attr_rel = table_open(AttributeRelationId, RowExclusiveLock);
+ foreach(child, children)
+ {
+ Oid childrelid = lfirst_oid(child);
+ Relation childrel;
+ Form_pg_attribute childatt;
+
+ /* find_inheritance_children already got lock */
+ childrel = table_open(childrelid, NoLock);
+ CheckTableNotInUse(childrel, "ALTER TABLE");
+
+ tuple = SearchSysCacheCopyAttName(childrelid, colName);
+ if (!HeapTupleIsValid(tuple)) /* shouldn't happen */
+ elog(ERROR, "cache lookup failed for attribute \"%s\" of relation %u",
+ colName, childrelid);
+ childatt = (Form_pg_attribute) GETSTRUCT(tuple);
+
+ if (childatt->attinhcount <= 0) /* shouldn't happen */
+ elog(ERROR, "relation %u has non-inherited attribute \"%s\"",
+ childrelid, colName);
+
+ if (recurse)
+ {
+ /*
+ * If the child column has other definition sources, just
+ * decrement its inheritance count; if not, recurse to delete
+ * it.
+ */
+ if (childatt->attinhcount == 1 && !childatt->attislocal)
+ {
+ /* Time to delete this child column, too */
+ ATExecDropColumn(wqueue, childrel, colName,
+ behavior, true, true,
+ false, lockmode, addrs);
+ }
+ else
+ {
+ /* Child column must survive my deletion */
+ childatt->attinhcount--;
+
+ CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple);
+
+ /* Make update visible */
+ CommandCounterIncrement();
+ }
+ }
+ else
+ {
+ /*
+ * If we were told to drop ONLY in this table (no recursion),
+ * we need to mark the inheritors' attributes as locally
+ * defined rather than inherited.
+ */
+ childatt->attinhcount--;
+ childatt->attislocal = true;
+
+ CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple);
+
+ /* Make update visible */
+ CommandCounterIncrement();
+ }
+
+ heap_freetuple(tuple);
+
+ table_close(childrel, NoLock);
+ }
+ table_close(attr_rel, RowExclusiveLock);
+ }
+
+ /* Add object to delete */
+ object.classId = RelationRelationId;
+ object.objectId = RelationGetRelid(rel);
+ object.objectSubId = attnum;
+ add_exact_object_address(&object, addrs);
+
+ if (!recursing)
+ {
+ /* Recursion has ended, drop everything that was collected */
+ performMultipleDeletions(addrs, behavior, 0);
+ free_object_addresses(addrs);
+ }
+
+ return object;
+}
+
+/*
+ * ALTER TABLE ADD INDEX
+ *
+ * There is no such command in the grammar, but parse_utilcmd.c converts
+ * UNIQUE and PRIMARY KEY constraints into AT_AddIndex subcommands. This lets
+ * us schedule creation of the index at the appropriate time during ALTER.
+ *
+ * Return value is the address of the new index.
+ */
+static ObjectAddress
+ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
+ IndexStmt *stmt, bool is_rebuild, LOCKMODE lockmode)
+{
+ bool check_rights;
+ bool skip_build;
+ bool quiet;
+ ObjectAddress address;
+
+ Assert(IsA(stmt, IndexStmt));
+ Assert(!stmt->concurrent);
+
+ /* The IndexStmt has already been through transformIndexStmt */
+ Assert(stmt->transformed);
+
+ /* suppress schema rights check when rebuilding existing index */
+ check_rights = !is_rebuild;
+ /* skip index build if phase 3 will do it or we're reusing an old one */
+ skip_build = tab->rewrite > 0 || OidIsValid(stmt->oldNode);
+ /* suppress notices when rebuilding existing index */
+ quiet = is_rebuild;
+
+ address = DefineIndex(RelationGetRelid(rel),
+ stmt,
+ InvalidOid, /* no predefined OID */
+ InvalidOid, /* no parent index */
+ InvalidOid, /* no parent constraint */
+ true, /* is_alter_table */
+ check_rights,
+ false, /* check_not_in_use - we did it already */
+ skip_build,
+ quiet);
+
+ /*
+ * If TryReuseIndex() stashed a relfilenode for us, we used it for the new
+ * index instead of building from scratch. Restore associated fields.
+ * This may store InvalidSubTransactionId in both fields, in which case
+ * relcache.c will assume it can rebuild the relcache entry. Hence, do
+ * this after the CCI that made catalog rows visible to any rebuild. The
+ * DROP of the old edition of this index will have scheduled the storage
+ * for deletion at commit, so cancel that pending deletion.
+ */
+ if (OidIsValid(stmt->oldNode))
+ {
+ Relation irel = index_open(address.objectId, NoLock);
+
+ irel->rd_createSubid = stmt->oldCreateSubid;
+ irel->rd_firstRelfilenodeSubid = stmt->oldFirstRelfilenodeSubid;
+ RelationPreserveStorage(irel->rd_node, true);
+ index_close(irel, NoLock);
+ }
+
+ return address;
+}
+
+/*
+ * ALTER TABLE ADD STATISTICS
+ *
+ * This is no such command in the grammar, but we use this internally to add
+ * AT_ReAddStatistics subcommands to rebuild extended statistics after a table
+ * column type change.
+ */
+static ObjectAddress
+ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
+ CreateStatsStmt *stmt, bool is_rebuild, LOCKMODE lockmode)
+{
+ ObjectAddress address;
+
+ Assert(IsA(stmt, CreateStatsStmt));
+
+ /* The CreateStatsStmt has already been through transformStatsStmt */
+ Assert(stmt->transformed);
+
+ address = CreateStatistics(stmt);
+
+ return address;
+}
+
+/*
+ * ALTER TABLE ADD CONSTRAINT USING INDEX
+ *
+ * Returns the address of the new constraint.
+ */
+static ObjectAddress
+ATExecAddIndexConstraint(AlteredTableInfo *tab, Relation rel,
+ IndexStmt *stmt, LOCKMODE lockmode)
+{
+ Oid index_oid = stmt->indexOid;
+ Relation indexRel;
+ char *indexName;
+ IndexInfo *indexInfo;
+ char *constraintName;
+ char constraintType;
+ ObjectAddress address;
+ bits16 flags;
+
+ Assert(IsA(stmt, IndexStmt));
+ Assert(OidIsValid(index_oid));
+ Assert(stmt->isconstraint);
+
+ /*
+ * Doing this on partitioned tables is not a simple feature to implement,
+ * so let's punt for now.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ALTER TABLE / ADD CONSTRAINT USING INDEX is not supported on partitioned tables")));
+
+ indexRel = index_open(index_oid, AccessShareLock);
+
+ indexName = pstrdup(RelationGetRelationName(indexRel));
+
+ indexInfo = BuildIndexInfo(indexRel);
+
+ /* this should have been checked at parse time */
+ if (!indexInfo->ii_Unique)
+ elog(ERROR, "index \"%s\" is not unique", indexName);
+
+ /*
+ * Determine name to assign to constraint. We require a constraint to
+ * have the same name as the underlying index; therefore, use the index's
+ * existing name as the default constraint name, and if the user
+ * explicitly gives some other name for the constraint, rename the index
+ * to match.
+ */
+ constraintName = stmt->idxname;
+ if (constraintName == NULL)
+ constraintName = indexName;
+ else if (strcmp(constraintName, indexName) != 0)
+ {
+ ereport(NOTICE,
+ (errmsg("ALTER TABLE / ADD CONSTRAINT USING INDEX will rename index \"%s\" to \"%s\"",
+ indexName, constraintName)));
+ RenameRelationInternal(index_oid, constraintName, false, true);
+ }
+
+ /* Extra checks needed if making primary key */
+ if (stmt->primary)
+ index_check_primary_key(rel, indexInfo, true, stmt);
+
+ /* Note we currently don't support EXCLUSION constraints here */
+ if (stmt->primary)
+ constraintType = CONSTRAINT_PRIMARY;
+ else
+ constraintType = CONSTRAINT_UNIQUE;
+
+ /* Create the catalog entries for the constraint */
+ flags = INDEX_CONSTR_CREATE_UPDATE_INDEX |
+ INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS |
+ (stmt->initdeferred ? INDEX_CONSTR_CREATE_INIT_DEFERRED : 0) |
+ (stmt->deferrable ? INDEX_CONSTR_CREATE_DEFERRABLE : 0) |
+ (stmt->primary ? INDEX_CONSTR_CREATE_MARK_AS_PRIMARY : 0);
+
+ address = index_constraint_create(rel,
+ index_oid,
+ InvalidOid,
+ indexInfo,
+ constraintName,
+ constraintType,
+ flags,
+ allowSystemTableMods,
+ false); /* is_internal */
+
+ index_close(indexRel, NoLock);
+
+ return address;
+}
+
+/*
+ * ALTER TABLE ADD CONSTRAINT
+ *
+ * Return value is the address of the new constraint; if no constraint was
+ * added, InvalidObjectAddress is returned.
+ */
+static ObjectAddress
+ATExecAddConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
+ Constraint *newConstraint, bool recurse, bool is_readd,
+ LOCKMODE lockmode)
+{
+ ObjectAddress address = InvalidObjectAddress;
+
+ Assert(IsA(newConstraint, Constraint));
+
+ /*
+ * Currently, we only expect to see CONSTR_CHECK and CONSTR_FOREIGN nodes
+ * arriving here (see the preprocessing done in parse_utilcmd.c). Use a
+ * switch anyway to make it easier to add more code later.
+ */
+ switch (newConstraint->contype)
+ {
+ case CONSTR_CHECK:
+ address =
+ ATAddCheckConstraint(wqueue, tab, rel,
+ newConstraint, recurse, false, is_readd,
+ lockmode);
+ break;
+
+ case CONSTR_FOREIGN:
+
+ /*
+ * Assign or validate constraint name
+ */
+ if (newConstraint->conname)
+ {
+ if (ConstraintNameIsUsed(CONSTRAINT_RELATION,
+ RelationGetRelid(rel),
+ newConstraint->conname))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("constraint \"%s\" for relation \"%s\" already exists",
+ newConstraint->conname,
+ RelationGetRelationName(rel))));
+ }
+ else
+ newConstraint->conname =
+ ChooseConstraintName(RelationGetRelationName(rel),
+ ChooseForeignKeyConstraintNameAddition(newConstraint->fk_attrs),
+ "fkey",
+ RelationGetNamespace(rel),
+ NIL);
+
+ address = ATAddForeignKeyConstraint(wqueue, tab, rel,
+ newConstraint,
+ recurse, false,
+ lockmode);
+ break;
+
+ default:
+ elog(ERROR, "unrecognized constraint type: %d",
+ (int) newConstraint->contype);
+ }
+
+ return address;
+}
+
+/*
+ * Generate the column-name portion of the constraint name for a new foreign
+ * key given the list of column names that reference the referenced
+ * table. This will be passed to ChooseConstraintName along with the parent
+ * table name and the "fkey" suffix.
+ *
+ * We know that less than NAMEDATALEN characters will actually be used, so we
+ * can truncate the result once we've generated that many.
+ *
+ * XXX see also ChooseExtendedStatisticNameAddition and
+ * ChooseIndexNameAddition.
+ */
+static char *
+ChooseForeignKeyConstraintNameAddition(List *colnames)
+{
+ char buf[NAMEDATALEN * 2];
+ int buflen = 0;
+ ListCell *lc;
+
+ buf[0] = '\0';
+ foreach(lc, colnames)
+ {
+ const char *name = strVal(lfirst(lc));
+
+ if (buflen > 0)
+ buf[buflen++] = '_'; /* insert _ between names */
+
+ /*
+ * At this point we have buflen <= NAMEDATALEN. name should be less
+ * than NAMEDATALEN already, but use strlcpy for paranoia.
+ */
+ strlcpy(buf + buflen, name, NAMEDATALEN);
+ buflen += strlen(buf + buflen);
+ if (buflen >= NAMEDATALEN)
+ break;
+ }
+ return pstrdup(buf);
+}
+
+/*
+ * Add a check constraint to a single table and its children. Returns the
+ * address of the constraint added to the parent relation, if one gets added,
+ * or InvalidObjectAddress otherwise.
+ *
+ * Subroutine for ATExecAddConstraint.
+ *
+ * We must recurse to child tables during execution, rather than using
+ * ALTER TABLE's normal prep-time recursion. The reason is that all the
+ * constraints *must* be given the same name, else they won't be seen as
+ * related later. If the user didn't explicitly specify a name, then
+ * AddRelationNewConstraints would normally assign different names to the
+ * child constraints. To fix that, we must capture the name assigned at
+ * the parent table and pass that down.
+ */
+static ObjectAddress
+ATAddCheckConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
+ Constraint *constr, bool recurse, bool recursing,
+ bool is_readd, LOCKMODE lockmode)
+{
+ List *newcons;
+ ListCell *lcon;
+ List *children;
+ ListCell *child;
+ ObjectAddress address = InvalidObjectAddress;
+
+ /* At top level, permission check was done in ATPrepCmd, else do it */
+ if (recursing)
+ ATSimplePermissions(AT_AddConstraint, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+ /*
+ * Call AddRelationNewConstraints to do the work, making sure it works on
+ * a copy of the Constraint so transformExpr can't modify the original. It
+ * returns a list of cooked constraints.
+ *
+ * If the constraint ends up getting merged with a pre-existing one, it's
+ * omitted from the returned list, which is what we want: we do not need
+ * to do any validation work. That can only happen at child tables,
+ * though, since we disallow merging at the top level.
+ */
+ newcons = AddRelationNewConstraints(rel, NIL,
+ list_make1(copyObject(constr)),
+ recursing | is_readd, /* allow_merge */
+ !recursing, /* is_local */
+ is_readd, /* is_internal */
+ NULL); /* queryString not available
+ * here */
+
+ /* we don't expect more than one constraint here */
+ Assert(list_length(newcons) <= 1);
+
+ /* Add each to-be-validated constraint to Phase 3's queue */
+ foreach(lcon, newcons)
+ {
+ CookedConstraint *ccon = (CookedConstraint *) lfirst(lcon);
+
+ if (!ccon->skip_validation)
+ {
+ NewConstraint *newcon;
+
+ newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
+ newcon->name = ccon->name;
+ newcon->contype = ccon->contype;
+ newcon->qual = ccon->expr;
+
+ tab->constraints = lappend(tab->constraints, newcon);
+ }
+
+ /* Save the actually assigned name if it was defaulted */
+ if (constr->conname == NULL)
+ constr->conname = ccon->name;
+
+ ObjectAddressSet(address, ConstraintRelationId, ccon->conoid);
+ }
+
+ /* At this point we must have a locked-down name to use */
+ Assert(constr->conname != NULL);
+
+ /* Advance command counter in case same table is visited multiple times */
+ CommandCounterIncrement();
+
+ /*
+ * If the constraint got merged with an existing constraint, we're done.
+ * We mustn't recurse to child tables in this case, because they've
+ * already got the constraint, and visiting them again would lead to an
+ * incorrect value for coninhcount.
+ */
+ if (newcons == NIL)
+ return address;
+
+ /*
+ * If adding a NO INHERIT constraint, no need to find our children.
+ */
+ if (constr->is_no_inherit)
+ return address;
+
+ /*
+ * Propagate to children as appropriate. Unlike most other ALTER
+ * routines, we have to do this one level of recursion at a time; we can't
+ * use find_all_inheritors to do it in one pass.
+ */
+ children =
+ find_inheritance_children(RelationGetRelid(rel), lockmode);
+
+ /*
+ * Check if ONLY was specified with ALTER TABLE. If so, allow the
+ * constraint creation only if there are no children currently. Error out
+ * otherwise.
+ */
+ if (!recurse && children != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("constraint must be added to child tables too")));
+
+ foreach(child, children)
+ {
+ Oid childrelid = lfirst_oid(child);
+ Relation childrel;
+ AlteredTableInfo *childtab;
+
+ /* find_inheritance_children already got lock */
+ childrel = table_open(childrelid, NoLock);
+ CheckTableNotInUse(childrel, "ALTER TABLE");
+
+ /* Find or create work queue entry for this table */
+ childtab = ATGetQueueEntry(wqueue, childrel);
+
+ /* Recurse to child */
+ ATAddCheckConstraint(wqueue, childtab, childrel,
+ constr, recurse, true, is_readd, lockmode);
+
+ table_close(childrel, NoLock);
+ }
+
+ return address;
+}
+
+/*
+ * Add a foreign-key constraint to a single table; return the new constraint's
+ * address.
+ *
+ * Subroutine for ATExecAddConstraint. Must already hold exclusive
+ * lock on the rel, and have done appropriate validity checks for it.
+ * We do permissions checks here, however.
+ *
+ * When the referenced or referencing tables (or both) are partitioned,
+ * multiple pg_constraint rows are required -- one for each partitioned table
+ * and each partition on each side (fortunately, not one for every combination
+ * thereof). We also need action triggers on each leaf partition on the
+ * referenced side, and check triggers on each leaf partition on the
+ * referencing side.
+ */
+static ObjectAddress
+ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
+ Constraint *fkconstraint,
+ bool recurse, bool recursing, LOCKMODE lockmode)
+{
+ Relation pkrel;
+ int16 pkattnum[INDEX_MAX_KEYS];
+ int16 fkattnum[INDEX_MAX_KEYS];
+ Oid pktypoid[INDEX_MAX_KEYS];
+ Oid fktypoid[INDEX_MAX_KEYS];
+ Oid opclasses[INDEX_MAX_KEYS];
+ Oid pfeqoperators[INDEX_MAX_KEYS];
+ Oid ppeqoperators[INDEX_MAX_KEYS];
+ Oid ffeqoperators[INDEX_MAX_KEYS];
+ int16 fkdelsetcols[INDEX_MAX_KEYS];
+ int i;
+ int numfks,
+ numpks,
+ numfkdelsetcols;
+ Oid indexOid;
+ bool old_check_ok;
+ ObjectAddress address;
+ ListCell *old_pfeqop_item = list_head(fkconstraint->old_conpfeqop);
+
+ /*
+ * Grab ShareRowExclusiveLock on the pk table, so that someone doesn't
+ * delete rows out from under us.
+ */
+ if (OidIsValid(fkconstraint->old_pktable_oid))
+ pkrel = table_open(fkconstraint->old_pktable_oid, ShareRowExclusiveLock);
+ else
+ pkrel = table_openrv(fkconstraint->pktable, ShareRowExclusiveLock);
+
+ /*
+ * Validity checks (permission checks wait till we have the column
+ * numbers)
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ if (!recurse)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot use ONLY for foreign key on partitioned table \"%s\" referencing relation \"%s\"",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(pkrel))));
+ if (fkconstraint->skip_validation && !fkconstraint->initially_valid)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot add NOT VALID foreign key on partitioned table \"%s\" referencing relation \"%s\"",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(pkrel)),
+ errdetail("This feature is not yet supported on partitioned tables.")));
+ }
+
+ if (pkrel->rd_rel->relkind != RELKIND_RELATION &&
+ pkrel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("referenced relation \"%s\" is not a table",
+ RelationGetRelationName(pkrel))));
+
+ if (!allowSystemTableMods && IsSystemRelation(pkrel))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied: \"%s\" is a system catalog",
+ RelationGetRelationName(pkrel))));
+
+ /*
+ * References from permanent or unlogged tables to temp tables, and from
+ * permanent tables to unlogged tables, are disallowed because the
+ * referenced data can vanish out from under us. References from temp
+ * tables to any other table type are also disallowed, because other
+ * backends might need to run the RI triggers on the perm table, but they
+ * can't reliably see tuples in the local buffers of other backends.
+ */
+ switch (rel->rd_rel->relpersistence)
+ {
+ case RELPERSISTENCE_PERMANENT:
+ if (!RelationIsPermanent(pkrel))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("constraints on permanent tables may reference only permanent tables")));
+ break;
+ case RELPERSISTENCE_UNLOGGED:
+ if (!RelationIsPermanent(pkrel)
+ && pkrel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("constraints on unlogged tables may reference only permanent or unlogged tables")));
+ break;
+ case RELPERSISTENCE_TEMP:
+ if (pkrel->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("constraints on temporary tables may reference only temporary tables")));
+ if (!pkrel->rd_islocaltemp || !rel->rd_islocaltemp)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("constraints on temporary tables must involve temporary tables of this session")));
+ break;
+ }
+
+ /*
+ * Look up the referencing attributes to make sure they exist, and record
+ * their attnums and type OIDs.
+ */
+ MemSet(pkattnum, 0, sizeof(pkattnum));
+ MemSet(fkattnum, 0, sizeof(fkattnum));
+ MemSet(pktypoid, 0, sizeof(pktypoid));
+ MemSet(fktypoid, 0, sizeof(fktypoid));
+ MemSet(opclasses, 0, sizeof(opclasses));
+ MemSet(pfeqoperators, 0, sizeof(pfeqoperators));
+ MemSet(ppeqoperators, 0, sizeof(ppeqoperators));
+ MemSet(ffeqoperators, 0, sizeof(ffeqoperators));
+ MemSet(fkdelsetcols, 0, sizeof(fkdelsetcols));
+
+ numfks = transformColumnNameList(RelationGetRelid(rel),
+ fkconstraint->fk_attrs,
+ fkattnum, fktypoid);
+
+ numfkdelsetcols = transformColumnNameList(RelationGetRelid(rel),
+ fkconstraint->fk_del_set_cols,
+ fkdelsetcols, NULL);
+ validateFkOnDeleteSetColumns(numfks, fkattnum,
+ numfkdelsetcols, fkdelsetcols,
+ fkconstraint->fk_del_set_cols);
+
+ /*
+ * If the attribute list for the referenced table was omitted, lookup the
+ * definition of the primary key and use it. Otherwise, validate the
+ * supplied attribute list. In either case, discover the index OID and
+ * index opclasses, and the attnums and type OIDs of the attributes.
+ */
+ if (fkconstraint->pk_attrs == NIL)
+ {
+ numpks = transformFkeyGetPrimaryKey(pkrel, &indexOid,
+ &fkconstraint->pk_attrs,
+ pkattnum, pktypoid,
+ opclasses);
+ }
+ else
+ {
+ numpks = transformColumnNameList(RelationGetRelid(pkrel),
+ fkconstraint->pk_attrs,
+ pkattnum, pktypoid);
+ /* Look for an index matching the column list */
+ indexOid = transformFkeyCheckAttrs(pkrel, numpks, pkattnum,
+ opclasses);
+ }
+
+ /*
+ * Now we can check permissions.
+ */
+ checkFkeyPermissions(pkrel, pkattnum, numpks);
+
+ /*
+ * Check some things for generated columns.
+ */
+ for (i = 0; i < numfks; i++)
+ {
+ char attgenerated = TupleDescAttr(RelationGetDescr(rel), fkattnum[i] - 1)->attgenerated;
+
+ if (attgenerated)
+ {
+ /*
+ * Check restrictions on UPDATE/DELETE actions, per SQL standard
+ */
+ if (fkconstraint->fk_upd_action == FKCONSTR_ACTION_SETNULL ||
+ fkconstraint->fk_upd_action == FKCONSTR_ACTION_SETDEFAULT ||
+ fkconstraint->fk_upd_action == FKCONSTR_ACTION_CASCADE)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid %s action for foreign key constraint containing generated column",
+ "ON UPDATE")));
+ if (fkconstraint->fk_del_action == FKCONSTR_ACTION_SETNULL ||
+ fkconstraint->fk_del_action == FKCONSTR_ACTION_SETDEFAULT)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid %s action for foreign key constraint containing generated column",
+ "ON DELETE")));
+ }
+ }
+
+ /*
+ * Look up the equality operators to use in the constraint.
+ *
+ * Note that we have to be careful about the difference between the actual
+ * PK column type and the opclass' declared input type, which might be
+ * only binary-compatible with it. The declared opcintype is the right
+ * thing to probe pg_amop with.
+ */
+ if (numfks != numpks)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FOREIGN_KEY),
+ errmsg("number of referencing and referenced columns for foreign key disagree")));
+
+ /*
+ * On the strength of a previous constraint, we might avoid scanning
+ * tables to validate this one. See below.
+ */
+ old_check_ok = (fkconstraint->old_conpfeqop != NIL);
+ Assert(!old_check_ok || numfks == list_length(fkconstraint->old_conpfeqop));
+
+ for (i = 0; i < numpks; i++)
+ {
+ Oid pktype = pktypoid[i];
+ Oid fktype = fktypoid[i];
+ Oid fktyped;
+ HeapTuple cla_ht;
+ Form_pg_opclass cla_tup;
+ Oid amid;
+ Oid opfamily;
+ Oid opcintype;
+ Oid pfeqop;
+ Oid ppeqop;
+ Oid ffeqop;
+ int16 eqstrategy;
+ Oid pfeqop_right;
+
+ /* We need several fields out of the pg_opclass entry */
+ cla_ht = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclasses[i]));
+ if (!HeapTupleIsValid(cla_ht))
+ elog(ERROR, "cache lookup failed for opclass %u", opclasses[i]);
+ cla_tup = (Form_pg_opclass) GETSTRUCT(cla_ht);
+ amid = cla_tup->opcmethod;
+ opfamily = cla_tup->opcfamily;
+ opcintype = cla_tup->opcintype;
+ ReleaseSysCache(cla_ht);
+
+ /*
+ * Check it's a btree; currently this can never fail since no other
+ * index AMs support unique indexes. If we ever did have other types
+ * of unique indexes, we'd need a way to determine which operator
+ * strategy number is equality. (Is it reasonable to insist that
+ * every such index AM use btree's number for equality?)
+ */
+ if (amid != BTREE_AM_OID)
+ elog(ERROR, "only b-tree indexes are supported for foreign keys");
+ eqstrategy = BTEqualStrategyNumber;
+
+ /*
+ * There had better be a primary equality operator for the index.
+ * We'll use it for PK = PK comparisons.
+ */
+ ppeqop = get_opfamily_member(opfamily, opcintype, opcintype,
+ eqstrategy);
+
+ if (!OidIsValid(ppeqop))
+ elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+ eqstrategy, opcintype, opcintype, opfamily);
+
+ /*
+ * Are there equality operators that take exactly the FK type? Assume
+ * we should look through any domain here.
+ */
+ fktyped = getBaseType(fktype);
+
+ pfeqop = get_opfamily_member(opfamily, opcintype, fktyped,
+ eqstrategy);
+ if (OidIsValid(pfeqop))
+ {
+ pfeqop_right = fktyped;
+ ffeqop = get_opfamily_member(opfamily, fktyped, fktyped,
+ eqstrategy);
+ }
+ else
+ {
+ /* keep compiler quiet */
+ pfeqop_right = InvalidOid;
+ ffeqop = InvalidOid;
+ }
+
+ if (!(OidIsValid(pfeqop) && OidIsValid(ffeqop)))
+ {
+ /*
+ * Otherwise, look for an implicit cast from the FK type to the
+ * opcintype, and if found, use the primary equality operator.
+ * This is a bit tricky because opcintype might be a polymorphic
+ * type such as ANYARRAY or ANYENUM; so what we have to test is
+ * whether the two actual column types can be concurrently cast to
+ * that type. (Otherwise, we'd fail to reject combinations such
+ * as int[] and point[].)
+ */
+ Oid input_typeids[2];
+ Oid target_typeids[2];
+
+ input_typeids[0] = pktype;
+ input_typeids[1] = fktype;
+ target_typeids[0] = opcintype;
+ target_typeids[1] = opcintype;
+ if (can_coerce_type(2, input_typeids, target_typeids,
+ COERCION_IMPLICIT))
+ {
+ pfeqop = ffeqop = ppeqop;
+ pfeqop_right = opcintype;
+ }
+ }
+
+ if (!(OidIsValid(pfeqop) && OidIsValid(ffeqop)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("foreign key constraint \"%s\" cannot be implemented",
+ fkconstraint->conname),
+ errdetail("Key columns \"%s\" and \"%s\" "
+ "are of incompatible types: %s and %s.",
+ strVal(list_nth(fkconstraint->fk_attrs, i)),
+ strVal(list_nth(fkconstraint->pk_attrs, i)),
+ format_type_be(fktype),
+ format_type_be(pktype))));
+
+ if (old_check_ok)
+ {
+ /*
+ * When a pfeqop changes, revalidate the constraint. We could
+ * permit intra-opfamily changes, but that adds subtle complexity
+ * without any concrete benefit for core types. We need not
+ * assess ppeqop or ffeqop, which RI_Initial_Check() does not use.
+ */
+ old_check_ok = (pfeqop == lfirst_oid(old_pfeqop_item));
+ old_pfeqop_item = lnext(fkconstraint->old_conpfeqop,
+ old_pfeqop_item);
+ }
+ if (old_check_ok)
+ {
+ Oid old_fktype;
+ Oid new_fktype;
+ CoercionPathType old_pathtype;
+ CoercionPathType new_pathtype;
+ Oid old_castfunc;
+ Oid new_castfunc;
+ Form_pg_attribute attr = TupleDescAttr(tab->oldDesc,
+ fkattnum[i] - 1);
+
+ /*
+ * Identify coercion pathways from each of the old and new FK-side
+ * column types to the right (foreign) operand type of the pfeqop.
+ * We may assume that pg_constraint.conkey is not changing.
+ */
+ old_fktype = attr->atttypid;
+ new_fktype = fktype;
+ old_pathtype = findFkeyCast(pfeqop_right, old_fktype,
+ &old_castfunc);
+ new_pathtype = findFkeyCast(pfeqop_right, new_fktype,
+ &new_castfunc);
+
+ /*
+ * Upon a change to the cast from the FK column to its pfeqop
+ * operand, revalidate the constraint. For this evaluation, a
+ * binary coercion cast is equivalent to no cast at all. While
+ * type implementors should design implicit casts with an eye
+ * toward consistency of operations like equality, we cannot
+ * assume here that they have done so.
+ *
+ * A function with a polymorphic argument could change behavior
+ * arbitrarily in response to get_fn_expr_argtype(). Therefore,
+ * when the cast destination is polymorphic, we only avoid
+ * revalidation if the input type has not changed at all. Given
+ * just the core data types and operator classes, this requirement
+ * prevents no would-be optimizations.
+ *
+ * If the cast converts from a base type to a domain thereon, then
+ * that domain type must be the opcintype of the unique index.
+ * Necessarily, the primary key column must then be of the domain
+ * type. Since the constraint was previously valid, all values on
+ * the foreign side necessarily exist on the primary side and in
+ * turn conform to the domain. Consequently, we need not treat
+ * domains specially here.
+ *
+ * Since we require that all collations share the same notion of
+ * equality (which they do, because texteq reduces to bitwise
+ * equality), we don't compare collation here.
+ *
+ * We need not directly consider the PK type. It's necessarily
+ * binary coercible to the opcintype of the unique index column,
+ * and ri_triggers.c will only deal with PK datums in terms of
+ * that opcintype. Changing the opcintype also changes pfeqop.
+ */
+ old_check_ok = (new_pathtype == old_pathtype &&
+ new_castfunc == old_castfunc &&
+ (!IsPolymorphicType(pfeqop_right) ||
+ new_fktype == old_fktype));
+ }
+
+ pfeqoperators[i] = pfeqop;
+ ppeqoperators[i] = ppeqop;
+ ffeqoperators[i] = ffeqop;
+ }
+
+ /*
+ * Create all the constraint and trigger objects, recursing to partitions
+ * as necessary. First handle the referenced side.
+ */
+ address = addFkRecurseReferenced(wqueue, fkconstraint, rel, pkrel,
+ indexOid,
+ InvalidOid, /* no parent constraint */
+ numfks,
+ pkattnum,
+ fkattnum,
+ pfeqoperators,
+ ppeqoperators,
+ ffeqoperators,
+ numfkdelsetcols,
+ fkdelsetcols,
+ old_check_ok,
+ InvalidOid, InvalidOid);
+
+ /* Now handle the referencing side. */
+ addFkRecurseReferencing(wqueue, fkconstraint, rel, pkrel,
+ indexOid,
+ address.objectId,
+ numfks,
+ pkattnum,
+ fkattnum,
+ pfeqoperators,
+ ppeqoperators,
+ ffeqoperators,
+ numfkdelsetcols,
+ fkdelsetcols,
+ old_check_ok,
+ lockmode,
+ InvalidOid, InvalidOid);
+
+ /*
+ * Done. Close pk table, but keep lock until we've committed.
+ */
+ table_close(pkrel, NoLock);
+
+ return address;
+}
+
+/*
+ * validateFkOnDeleteSetColumns
+ * Verifies that columns used in ON DELETE SET NULL/DEFAULT (...)
+ * column lists are valid.
+ */
+void
+validateFkOnDeleteSetColumns(int numfks, const int16 *fkattnums,
+ int numfksetcols, const int16 *fksetcolsattnums,
+ List *fksetcols)
+{
+ for (int i = 0; i < numfksetcols; i++)
+ {
+ int16 setcol_attnum = fksetcolsattnums[i];
+ bool seen = false;
+
+ for (int j = 0; j < numfks; j++)
+ {
+ if (fkattnums[j] == setcol_attnum)
+ {
+ seen = true;
+ break;
+ }
+ }
+
+ if (!seen)
+ {
+ char *col = strVal(list_nth(fksetcols, i));
+
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("column \"%s\" referenced in ON DELETE SET action must be part of foreign key", col)));
+ }
+ }
+}
+
+/*
+ * addFkRecurseReferenced
+ * subroutine for ATAddForeignKeyConstraint; recurses on the referenced
+ * side of the constraint
+ *
+ * Create pg_constraint rows for the referenced side of the constraint,
+ * referencing the parent of the referencing side; also create action triggers
+ * on leaf partitions. If the table is partitioned, recurse to handle each
+ * partition.
+ *
+ * wqueue is the ALTER TABLE work queue; can be NULL when not running as part
+ * of an ALTER TABLE sequence.
+ * fkconstraint is the constraint being added.
+ * rel is the root referencing relation.
+ * pkrel is the referenced relation; might be a partition, if recursing.
+ * indexOid is the OID of the index (on pkrel) implementing this constraint.
+ * parentConstr is the OID of a parent constraint; InvalidOid if this is a
+ * top-level constraint.
+ * numfks is the number of columns in the foreign key
+ * pkattnum is the attnum array of referenced attributes.
+ * fkattnum is the attnum array of referencing attributes.
+ * numfkdelsetcols is the number of columns in the ON DELETE SET NULL/DEFAULT
+ * (...) clause
+ * fkdelsetcols is the attnum array of the columns in the ON DELETE SET
+ * NULL/DEFAULT clause
+ * pf/pp/ffeqoperators are OID array of operators between columns.
+ * old_check_ok signals that this constraint replaces an existing one that
+ * was already validated (thus this one doesn't need validation).
+ * parentDelTrigger and parentUpdTrigger, when being recursively called on
+ * a partition, are the OIDs of the parent action triggers for DELETE and
+ * UPDATE respectively.
+ */
+static ObjectAddress
+addFkRecurseReferenced(List **wqueue, Constraint *fkconstraint, Relation rel,
+ Relation pkrel, Oid indexOid, Oid parentConstr,
+ int numfks,
+ int16 *pkattnum, int16 *fkattnum, Oid *pfeqoperators,
+ Oid *ppeqoperators, Oid *ffeqoperators,
+ int numfkdelsetcols, int16 *fkdelsetcols,
+ bool old_check_ok,
+ Oid parentDelTrigger, Oid parentUpdTrigger)
+{
+ ObjectAddress address;
+ Oid constrOid;
+ char *conname;
+ bool conislocal;
+ int coninhcount;
+ bool connoinherit;
+ Oid deleteTriggerOid,
+ updateTriggerOid;
+
+ /*
+ * Verify relkind for each referenced partition. At the top level, this
+ * is redundant with a previous check, but we need it when recursing.
+ */
+ if (pkrel->rd_rel->relkind != RELKIND_RELATION &&
+ pkrel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("referenced relation \"%s\" is not a table",
+ RelationGetRelationName(pkrel))));
+
+ /*
+ * Caller supplies us with a constraint name; however, it may be used in
+ * this partition, so come up with a different one in that case.
+ */
+ if (ConstraintNameIsUsed(CONSTRAINT_RELATION,
+ RelationGetRelid(rel),
+ fkconstraint->conname))
+ conname = ChooseConstraintName(RelationGetRelationName(rel),
+ ChooseForeignKeyConstraintNameAddition(fkconstraint->fk_attrs),
+ "fkey",
+ RelationGetNamespace(rel), NIL);
+ else
+ conname = fkconstraint->conname;
+
+ if (OidIsValid(parentConstr))
+ {
+ conislocal = false;
+ coninhcount = 1;
+ connoinherit = false;
+ }
+ else
+ {
+ conislocal = true;
+ coninhcount = 0;
+
+ /*
+ * always inherit for partitioned tables, never for legacy inheritance
+ */
+ connoinherit = rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE;
+ }
+
+ /*
+ * Record the FK constraint in pg_constraint.
+ */
+ constrOid = CreateConstraintEntry(conname,
+ RelationGetNamespace(rel),
+ CONSTRAINT_FOREIGN,
+ fkconstraint->deferrable,
+ fkconstraint->initdeferred,
+ fkconstraint->initially_valid,
+ parentConstr,
+ RelationGetRelid(rel),
+ fkattnum,
+ numfks,
+ numfks,
+ InvalidOid, /* not a domain constraint */
+ indexOid,
+ RelationGetRelid(pkrel),
+ pkattnum,
+ pfeqoperators,
+ ppeqoperators,
+ ffeqoperators,
+ numfks,
+ fkconstraint->fk_upd_action,
+ fkconstraint->fk_del_action,
+ fkdelsetcols,
+ numfkdelsetcols,
+ fkconstraint->fk_matchtype,
+ NULL, /* no exclusion constraint */
+ NULL, /* no check constraint */
+ NULL,
+ conislocal, /* islocal */
+ coninhcount, /* inhcount */
+ connoinherit, /* conNoInherit */
+ false); /* is_internal */
+
+ ObjectAddressSet(address, ConstraintRelationId, constrOid);
+
+ /*
+ * Mark the child constraint as part of the parent constraint; it must not
+ * be dropped on its own. (This constraint is deleted when the partition
+ * is detached, but a special check needs to occur that the partition
+ * contains no referenced values.)
+ */
+ if (OidIsValid(parentConstr))
+ {
+ ObjectAddress referenced;
+
+ ObjectAddressSet(referenced, ConstraintRelationId, parentConstr);
+ recordDependencyOn(&address, &referenced, DEPENDENCY_INTERNAL);
+ }
+
+ /* make new constraint visible, in case we add more */
+ CommandCounterIncrement();
+
+ /*
+ * Create the action triggers that enforce the constraint.
+ */
+ createForeignKeyActionTriggers(rel, RelationGetRelid(pkrel),
+ fkconstraint,
+ constrOid, indexOid,
+ parentDelTrigger, parentUpdTrigger,
+ &deleteTriggerOid, &updateTriggerOid);
+
+ /*
+ * If the referenced table is partitioned, recurse on ourselves to handle
+ * each partition. We need one pg_constraint row created for each
+ * partition in addition to the pg_constraint row for the parent table.
+ */
+ if (pkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ PartitionDesc pd = RelationGetPartitionDesc(pkrel, true);
+
+ for (int i = 0; i < pd->nparts; i++)
+ {
+ Relation partRel;
+ AttrMap *map;
+ AttrNumber *mapped_pkattnum;
+ Oid partIndexId;
+
+ partRel = table_open(pd->oids[i], ShareRowExclusiveLock);
+
+ /*
+ * Map the attribute numbers in the referenced side of the FK
+ * definition to match the partition's column layout.
+ */
+ map = build_attrmap_by_name_if_req(RelationGetDescr(partRel),
+ RelationGetDescr(pkrel));
+ if (map)
+ {
+ mapped_pkattnum = palloc(sizeof(AttrNumber) * numfks);
+ for (int j = 0; j < numfks; j++)
+ mapped_pkattnum[j] = map->attnums[pkattnum[j] - 1];
+ }
+ else
+ mapped_pkattnum = pkattnum;
+
+ /* do the deed */
+ partIndexId = index_get_partition(partRel, indexOid);
+ if (!OidIsValid(partIndexId))
+ elog(ERROR, "index for %u not found in partition %s",
+ indexOid, RelationGetRelationName(partRel));
+ addFkRecurseReferenced(wqueue, fkconstraint, rel, partRel,
+ partIndexId, constrOid, numfks,
+ mapped_pkattnum, fkattnum,
+ pfeqoperators, ppeqoperators, ffeqoperators,
+ numfkdelsetcols, fkdelsetcols,
+ old_check_ok,
+ deleteTriggerOid, updateTriggerOid);
+
+ /* Done -- clean up (but keep the lock) */
+ table_close(partRel, NoLock);
+ if (map)
+ {
+ pfree(mapped_pkattnum);
+ free_attrmap(map);
+ }
+ }
+ }
+
+ return address;
+}
+
+/*
+ * addFkRecurseReferencing
+ * subroutine for ATAddForeignKeyConstraint and CloneFkReferencing
+ *
+ * If the referencing relation is a plain relation, create the necessary check
+ * triggers that implement the constraint, and set up for Phase 3 constraint
+ * verification. If the referencing relation is a partitioned table, then
+ * we create a pg_constraint row for it and recurse on this routine for each
+ * partition.
+ *
+ * We assume that the referenced relation is locked against concurrent
+ * deletions. If it's a partitioned relation, every partition must be so
+ * locked.
+ *
+ * wqueue is the ALTER TABLE work queue; can be NULL when not running as part
+ * of an ALTER TABLE sequence.
+ * fkconstraint is the constraint being added.
+ * rel is the referencing relation; might be a partition, if recursing.
+ * pkrel is the root referenced relation.
+ * indexOid is the OID of the index (on pkrel) implementing this constraint.
+ * parentConstr is the OID of the parent constraint (there is always one).
+ * numfks is the number of columns in the foreign key
+ * pkattnum is the attnum array of referenced attributes.
+ * fkattnum is the attnum array of referencing attributes.
+ * pf/pp/ffeqoperators are OID array of operators between columns.
+ * numfkdelsetcols is the number of columns in the ON DELETE SET NULL/DEFAULT
+ * (...) clause
+ * fkdelsetcols is the attnum array of the columns in the ON DELETE SET
+ * NULL/DEFAULT clause
+ * old_check_ok signals that this constraint replaces an existing one that
+ * was already validated (thus this one doesn't need validation).
+ * lockmode is the lockmode to acquire on partitions when recursing.
+ * parentInsTrigger and parentUpdTrigger, when being recursively called on
+ * a partition, are the OIDs of the parent check triggers for INSERT and
+ * UPDATE respectively.
+ */
+static void
+addFkRecurseReferencing(List **wqueue, Constraint *fkconstraint, Relation rel,
+ Relation pkrel, Oid indexOid, Oid parentConstr,
+ int numfks, int16 *pkattnum, int16 *fkattnum,
+ Oid *pfeqoperators, Oid *ppeqoperators, Oid *ffeqoperators,
+ int numfkdelsetcols, int16 *fkdelsetcols,
+ bool old_check_ok, LOCKMODE lockmode,
+ Oid parentInsTrigger, Oid parentUpdTrigger)
+{
+ Oid insertTriggerOid,
+ updateTriggerOid;
+
+ AssertArg(OidIsValid(parentConstr));
+
+ if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("foreign key constraints are not supported on foreign tables")));
+
+ /*
+ * Add the check triggers to it and, if necessary, schedule it to be
+ * checked in Phase 3.
+ *
+ * If the relation is partitioned, drill down to do it to its partitions.
+ */
+ createForeignKeyCheckTriggers(RelationGetRelid(rel),
+ RelationGetRelid(pkrel),
+ fkconstraint,
+ parentConstr,
+ indexOid,
+ parentInsTrigger, parentUpdTrigger,
+ &insertTriggerOid, &updateTriggerOid);
+
+ if (rel->rd_rel->relkind == RELKIND_RELATION)
+ {
+ /*
+ * Tell Phase 3 to check that the constraint is satisfied by existing
+ * rows. We can skip this during table creation, when requested
+ * explicitly by specifying NOT VALID in an ADD FOREIGN KEY command,
+ * and when we're recreating a constraint following a SET DATA TYPE
+ * operation that did not impugn its validity.
+ */
+ if (wqueue && !old_check_ok && !fkconstraint->skip_validation)
+ {
+ NewConstraint *newcon;
+ AlteredTableInfo *tab;
+
+ tab = ATGetQueueEntry(wqueue, rel);
+
+ newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
+ newcon->name = get_constraint_name(parentConstr);
+ newcon->contype = CONSTR_FOREIGN;
+ newcon->refrelid = RelationGetRelid(pkrel);
+ newcon->refindid = indexOid;
+ newcon->conid = parentConstr;
+ newcon->qual = (Node *) fkconstraint;
+
+ tab->constraints = lappend(tab->constraints, newcon);
+ }
+ }
+ else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ PartitionDesc pd = RelationGetPartitionDesc(rel, true);
+ Relation trigrel;
+
+ /*
+ * Triggers of the foreign keys will be manipulated a bunch of times
+ * in the loop below. To avoid repeatedly opening/closing the trigger
+ * catalog relation, we open it here and pass it to the subroutines
+ * called below.
+ */
+ trigrel = table_open(TriggerRelationId, RowExclusiveLock);
+
+ /*
+ * Recurse to take appropriate action on each partition; either we
+ * find an existing constraint to reparent to ours, or we create a new
+ * one.
+ */
+ for (int i = 0; i < pd->nparts; i++)
+ {
+ Oid partitionId = pd->oids[i];
+ Relation partition = table_open(partitionId, lockmode);
+ List *partFKs;
+ AttrMap *attmap;
+ AttrNumber mapped_fkattnum[INDEX_MAX_KEYS];
+ bool attached;
+ char *conname;
+ Oid constrOid;
+ ObjectAddress address,
+ referenced;
+ ListCell *cell;
+
+ CheckTableNotInUse(partition, "ALTER TABLE");
+
+ attmap = build_attrmap_by_name(RelationGetDescr(partition),
+ RelationGetDescr(rel));
+ for (int j = 0; j < numfks; j++)
+ mapped_fkattnum[j] = attmap->attnums[fkattnum[j] - 1];
+
+ /* Check whether an existing constraint can be repurposed */
+ partFKs = copyObject(RelationGetFKeyList(partition));
+ attached = false;
+ foreach(cell, partFKs)
+ {
+ ForeignKeyCacheInfo *fk;
+
+ fk = lfirst_node(ForeignKeyCacheInfo, cell);
+ if (tryAttachPartitionForeignKey(fk,
+ partitionId,
+ parentConstr,
+ numfks,
+ mapped_fkattnum,
+ pkattnum,
+ pfeqoperators,
+ insertTriggerOid,
+ updateTriggerOid,
+ trigrel))
+ {
+ attached = true;
+ break;
+ }
+ }
+ if (attached)
+ {
+ table_close(partition, NoLock);
+ continue;
+ }
+
+ /*
+ * No luck finding a good constraint to reuse; create our own.
+ */
+ if (ConstraintNameIsUsed(CONSTRAINT_RELATION,
+ RelationGetRelid(partition),
+ fkconstraint->conname))
+ conname = ChooseConstraintName(RelationGetRelationName(partition),
+ ChooseForeignKeyConstraintNameAddition(fkconstraint->fk_attrs),
+ "fkey",
+ RelationGetNamespace(partition), NIL);
+ else
+ conname = fkconstraint->conname;
+ constrOid =
+ CreateConstraintEntry(conname,
+ RelationGetNamespace(partition),
+ CONSTRAINT_FOREIGN,
+ fkconstraint->deferrable,
+ fkconstraint->initdeferred,
+ fkconstraint->initially_valid,
+ parentConstr,
+ partitionId,
+ mapped_fkattnum,
+ numfks,
+ numfks,
+ InvalidOid,
+ indexOid,
+ RelationGetRelid(pkrel),
+ pkattnum,
+ pfeqoperators,
+ ppeqoperators,
+ ffeqoperators,
+ numfks,
+ fkconstraint->fk_upd_action,
+ fkconstraint->fk_del_action,
+ fkdelsetcols,
+ numfkdelsetcols,
+ fkconstraint->fk_matchtype,
+ NULL,
+ NULL,
+ NULL,
+ false,
+ 1,
+ false,
+ false);
+
+ /*
+ * Give this constraint partition-type dependencies on the parent
+ * constraint as well as the table.
+ */
+ ObjectAddressSet(address, ConstraintRelationId, constrOid);
+ ObjectAddressSet(referenced, ConstraintRelationId, parentConstr);
+ recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_PRI);
+ ObjectAddressSet(referenced, RelationRelationId, partitionId);
+ recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_SEC);
+
+ /* Make all this visible before recursing */
+ CommandCounterIncrement();
+
+ /* call ourselves to finalize the creation and we're done */
+ addFkRecurseReferencing(wqueue, fkconstraint, partition, pkrel,
+ indexOid,
+ constrOid,
+ numfks,
+ pkattnum,
+ mapped_fkattnum,
+ pfeqoperators,
+ ppeqoperators,
+ ffeqoperators,
+ numfkdelsetcols,
+ fkdelsetcols,
+ old_check_ok,
+ lockmode,
+ insertTriggerOid,
+ updateTriggerOid);
+
+ table_close(partition, NoLock);
+ }
+
+ table_close(trigrel, RowExclusiveLock);
+ }
+}
+
+/*
+ * CloneForeignKeyConstraints
+ * Clone foreign keys from a partitioned table to a newly acquired
+ * partition.
+ *
+ * partitionRel is a partition of parentRel, so we can be certain that it has
+ * the same columns with the same datatypes. The columns may be in different
+ * order, though.
+ *
+ * wqueue must be passed to set up phase 3 constraint checking, unless the
+ * referencing-side partition is known to be empty (such as in CREATE TABLE /
+ * PARTITION OF).
+ */
+static void
+CloneForeignKeyConstraints(List **wqueue, Relation parentRel,
+ Relation partitionRel)
+{
+ /* This only works for declarative partitioning */
+ Assert(parentRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+
+ /*
+ * Clone constraints for which the parent is on the referenced side.
+ */
+ CloneFkReferenced(parentRel, partitionRel);
+
+ /*
+ * Now clone constraints where the parent is on the referencing side.
+ */
+ CloneFkReferencing(wqueue, parentRel, partitionRel);
+}
+
+/*
+ * CloneFkReferenced
+ * Subroutine for CloneForeignKeyConstraints
+ *
+ * Find all the FKs that have the parent relation on the referenced side;
+ * clone those constraints to the given partition. This is to be called
+ * when the partition is being created or attached.
+ *
+ * This ignores self-referencing FKs; those are handled by CloneFkReferencing.
+ *
+ * This recurses to partitions, if the relation being attached is partitioned.
+ * Recursion is done by calling addFkRecurseReferenced.
+ */
+static void
+CloneFkReferenced(Relation parentRel, Relation partitionRel)
+{
+ Relation pg_constraint;
+ AttrMap *attmap;
+ ListCell *cell;
+ SysScanDesc scan;
+ ScanKeyData key[2];
+ HeapTuple tuple;
+ List *clone = NIL;
+ Relation trigrel;
+
+ /*
+ * Search for any constraints where this partition's parent is in the
+ * referenced side. However, we must not clone any constraint whose
+ * parent constraint is also going to be cloned, to avoid duplicates. So
+ * do it in two steps: first construct the list of constraints to clone,
+ * then go over that list cloning those whose parents are not in the list.
+ * (We must not rely on the parent being seen first, since the catalog
+ * scan could return children first.)
+ */
+ pg_constraint = table_open(ConstraintRelationId, RowShareLock);
+ ScanKeyInit(&key[0],
+ Anum_pg_constraint_confrelid, BTEqualStrategyNumber,
+ F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(parentRel)));
+ ScanKeyInit(&key[1],
+ Anum_pg_constraint_contype, BTEqualStrategyNumber,
+ F_CHAREQ, CharGetDatum(CONSTRAINT_FOREIGN));
+ /* This is a seqscan, as we don't have a usable index ... */
+ scan = systable_beginscan(pg_constraint, InvalidOid, true,
+ NULL, 2, key);
+ while ((tuple = systable_getnext(scan)) != NULL)
+ {
+ Form_pg_constraint constrForm = (Form_pg_constraint) GETSTRUCT(tuple);
+
+ clone = lappend_oid(clone, constrForm->oid);
+ }
+ systable_endscan(scan);
+ table_close(pg_constraint, RowShareLock);
+
+ /*
+ * Triggers of the foreign keys will be manipulated a bunch of times in
+ * the loop below. To avoid repeatedly opening/closing the trigger
+ * catalog relation, we open it here and pass it to the subroutines called
+ * below.
+ */
+ trigrel = table_open(TriggerRelationId, RowExclusiveLock);
+
+ attmap = build_attrmap_by_name(RelationGetDescr(partitionRel),
+ RelationGetDescr(parentRel));
+ foreach(cell, clone)
+ {
+ Oid constrOid = lfirst_oid(cell);
+ Form_pg_constraint constrForm;
+ Relation fkRel;
+ Oid indexOid;
+ Oid partIndexId;
+ int numfks;
+ AttrNumber conkey[INDEX_MAX_KEYS];
+ AttrNumber mapped_confkey[INDEX_MAX_KEYS];
+ AttrNumber confkey[INDEX_MAX_KEYS];
+ Oid conpfeqop[INDEX_MAX_KEYS];
+ Oid conppeqop[INDEX_MAX_KEYS];
+ Oid conffeqop[INDEX_MAX_KEYS];
+ int numfkdelsetcols;
+ AttrNumber confdelsetcols[INDEX_MAX_KEYS];
+ Constraint *fkconstraint;
+ Oid deleteTriggerOid,
+ updateTriggerOid;
+
+ tuple = SearchSysCache1(CONSTROID, constrOid);
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for constraint %u", constrOid);
+ constrForm = (Form_pg_constraint) GETSTRUCT(tuple);
+
+ /*
+ * As explained above: don't try to clone a constraint for which we're
+ * going to clone the parent.
+ */
+ if (list_member_oid(clone, constrForm->conparentid))
+ {
+ ReleaseSysCache(tuple);
+ continue;
+ }
+
+ /*
+ * Don't clone self-referencing foreign keys, which can be in the
+ * partitioned table or in the partition-to-be.
+ */
+ if (constrForm->conrelid == RelationGetRelid(parentRel) ||
+ constrForm->conrelid == RelationGetRelid(partitionRel))
+ {
+ ReleaseSysCache(tuple);
+ continue;
+ }
+
+ /*
+ * Because we're only expanding the key space at the referenced side,
+ * we don't need to prevent any operation in the referencing table, so
+ * AccessShareLock suffices (assumes that dropping the constraint
+ * acquires AEL).
+ */
+ fkRel = table_open(constrForm->conrelid, AccessShareLock);
+
+ indexOid = constrForm->conindid;
+ DeconstructFkConstraintRow(tuple,
+ &numfks,
+ conkey,
+ confkey,
+ conpfeqop,
+ conppeqop,
+ conffeqop,
+ &numfkdelsetcols,
+ confdelsetcols);
+
+ for (int i = 0; i < numfks; i++)
+ mapped_confkey[i] = attmap->attnums[confkey[i] - 1];
+
+ fkconstraint = makeNode(Constraint);
+ fkconstraint->contype = CONSTRAINT_FOREIGN;
+ fkconstraint->conname = NameStr(constrForm->conname);
+ fkconstraint->deferrable = constrForm->condeferrable;
+ fkconstraint->initdeferred = constrForm->condeferred;
+ fkconstraint->location = -1;
+ fkconstraint->pktable = NULL;
+ /* ->fk_attrs determined below */
+ fkconstraint->pk_attrs = NIL;
+ fkconstraint->fk_matchtype = constrForm->confmatchtype;
+ fkconstraint->fk_upd_action = constrForm->confupdtype;
+ fkconstraint->fk_del_action = constrForm->confdeltype;
+ fkconstraint->fk_del_set_cols = NIL;
+ fkconstraint->old_conpfeqop = NIL;
+ fkconstraint->old_pktable_oid = InvalidOid;
+ fkconstraint->skip_validation = false;
+ fkconstraint->initially_valid = true;
+
+ /* set up colnames that are used to generate the constraint name */
+ for (int i = 0; i < numfks; i++)
+ {
+ Form_pg_attribute att;
+
+ att = TupleDescAttr(RelationGetDescr(fkRel),
+ conkey[i] - 1);
+ fkconstraint->fk_attrs = lappend(fkconstraint->fk_attrs,
+ makeString(NameStr(att->attname)));
+ }
+
+ /*
+ * Add the new foreign key constraint pointing to the new partition.
+ * Because this new partition appears in the referenced side of the
+ * constraint, we don't need to set up for Phase 3 check.
+ */
+ partIndexId = index_get_partition(partitionRel, indexOid);
+ if (!OidIsValid(partIndexId))
+ elog(ERROR, "index for %u not found in partition %s",
+ indexOid, RelationGetRelationName(partitionRel));
+
+ /*
+ * Get the "action" triggers belonging to the constraint to pass as
+ * parent OIDs for similar triggers that will be created on the
+ * partition in addFkRecurseReferenced().
+ */
+ GetForeignKeyActionTriggers(trigrel, constrOid,
+ constrForm->confrelid, constrForm->conrelid,
+ &deleteTriggerOid, &updateTriggerOid);
+
+ addFkRecurseReferenced(NULL,
+ fkconstraint,
+ fkRel,
+ partitionRel,
+ partIndexId,
+ constrOid,
+ numfks,
+ mapped_confkey,
+ conkey,
+ conpfeqop,
+ conppeqop,
+ conffeqop,
+ numfkdelsetcols,
+ confdelsetcols,
+ true,
+ deleteTriggerOid,
+ updateTriggerOid);
+
+ table_close(fkRel, NoLock);
+ ReleaseSysCache(tuple);
+ }
+
+ table_close(trigrel, RowExclusiveLock);
+}
+
+/*
+ * CloneFkReferencing
+ * Subroutine for CloneForeignKeyConstraints
+ *
+ * For each FK constraint of the parent relation in the given list, find an
+ * equivalent constraint in its partition relation that can be reparented;
+ * if one cannot be found, create a new constraint in the partition as its
+ * child.
+ *
+ * If wqueue is given, it is used to set up phase-3 verification for each
+ * cloned constraint; if omitted, we assume that such verification is not
+ * needed (example: the partition is being created anew).
+ */
+static void
+CloneFkReferencing(List **wqueue, Relation parentRel, Relation partRel)
+{
+ AttrMap *attmap;
+ List *partFKs;
+ List *clone = NIL;
+ ListCell *cell;
+ Relation trigrel;
+
+ /* obtain a list of constraints that we need to clone */
+ foreach(cell, RelationGetFKeyList(parentRel))
+ {
+ ForeignKeyCacheInfo *fk = lfirst(cell);
+
+ clone = lappend_oid(clone, fk->conoid);
+ }
+
+ /*
+ * Silently do nothing if there's nothing to do. In particular, this
+ * avoids throwing a spurious error for foreign tables.
+ */
+ if (clone == NIL)
+ return;
+
+ if (partRel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("foreign key constraints are not supported on foreign tables")));
+
+ /*
+ * Triggers of the foreign keys will be manipulated a bunch of times in
+ * the loop below. To avoid repeatedly opening/closing the trigger
+ * catalog relation, we open it here and pass it to the subroutines called
+ * below.
+ */
+ trigrel = table_open(TriggerRelationId, RowExclusiveLock);
+
+ /*
+ * The constraint key may differ, if the columns in the partition are
+ * different. This map is used to convert them.
+ */
+ attmap = build_attrmap_by_name(RelationGetDescr(partRel),
+ RelationGetDescr(parentRel));
+
+ partFKs = copyObject(RelationGetFKeyList(partRel));
+
+ foreach(cell, clone)
+ {
+ Oid parentConstrOid = lfirst_oid(cell);
+ Form_pg_constraint constrForm;
+ Relation pkrel;
+ HeapTuple tuple;
+ int numfks;
+ AttrNumber conkey[INDEX_MAX_KEYS];
+ AttrNumber mapped_conkey[INDEX_MAX_KEYS];
+ AttrNumber confkey[INDEX_MAX_KEYS];
+ Oid conpfeqop[INDEX_MAX_KEYS];
+ Oid conppeqop[INDEX_MAX_KEYS];
+ Oid conffeqop[INDEX_MAX_KEYS];
+ int numfkdelsetcols;
+ AttrNumber confdelsetcols[INDEX_MAX_KEYS];
+ Constraint *fkconstraint;
+ bool attached;
+ Oid indexOid;
+ Oid constrOid;
+ ObjectAddress address,
+ referenced;
+ ListCell *cell;
+ Oid insertTriggerOid,
+ updateTriggerOid;
+
+ tuple = SearchSysCache1(CONSTROID, parentConstrOid);
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for constraint %u",
+ parentConstrOid);
+ constrForm = (Form_pg_constraint) GETSTRUCT(tuple);
+
+ /* Don't clone constraints whose parents are being cloned */
+ if (list_member_oid(clone, constrForm->conparentid))
+ {
+ ReleaseSysCache(tuple);
+ continue;
+ }
+
+ /*
+ * Need to prevent concurrent deletions. If pkrel is a partitioned
+ * relation, that means to lock all partitions.
+ */
+ pkrel = table_open(constrForm->confrelid, ShareRowExclusiveLock);
+ if (pkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ (void) find_all_inheritors(RelationGetRelid(pkrel),
+ ShareRowExclusiveLock, NULL);
+
+ DeconstructFkConstraintRow(tuple, &numfks, conkey, confkey,
+ conpfeqop, conppeqop, conffeqop,
+ &numfkdelsetcols, confdelsetcols);
+ for (int i = 0; i < numfks; i++)
+ mapped_conkey[i] = attmap->attnums[conkey[i] - 1];
+
+ /*
+ * Get the "check" triggers belonging to the constraint to pass as
+ * parent OIDs for similar triggers that will be created on the
+ * partition in addFkRecurseReferencing(). They are also passed to
+ * tryAttachPartitionForeignKey() below to simply assign as parents to
+ * the partition's existing "check" triggers, that is, if the
+ * corresponding constraints is deemed attachable to the parent
+ * constraint.
+ */
+ GetForeignKeyCheckTriggers(trigrel, constrForm->oid,
+ constrForm->confrelid, constrForm->conrelid,
+ &insertTriggerOid, &updateTriggerOid);
+
+ /*
+ * Before creating a new constraint, see whether any existing FKs are
+ * fit for the purpose. If one is, attach the parent constraint to
+ * it, and don't clone anything. This way we avoid the expensive
+ * verification step and don't end up with a duplicate FK, and we
+ * don't need to recurse to partitions for this constraint.
+ */
+ attached = false;
+ foreach(cell, partFKs)
+ {
+ ForeignKeyCacheInfo *fk = lfirst_node(ForeignKeyCacheInfo, cell);
+
+ if (tryAttachPartitionForeignKey(fk,
+ RelationGetRelid(partRel),
+ parentConstrOid,
+ numfks,
+ mapped_conkey,
+ confkey,
+ conpfeqop,
+ insertTriggerOid,
+ updateTriggerOid,
+ trigrel))
+ {
+ attached = true;
+ table_close(pkrel, NoLock);
+ break;
+ }
+ }
+ if (attached)
+ {
+ ReleaseSysCache(tuple);
+ continue;
+ }
+
+ /* No dice. Set up to create our own constraint */
+ fkconstraint = makeNode(Constraint);
+ fkconstraint->contype = CONSTRAINT_FOREIGN;
+ /* ->conname determined below */
+ fkconstraint->deferrable = constrForm->condeferrable;
+ fkconstraint->initdeferred = constrForm->condeferred;
+ fkconstraint->location = -1;
+ fkconstraint->pktable = NULL;
+ /* ->fk_attrs determined below */
+ fkconstraint->pk_attrs = NIL;
+ fkconstraint->fk_matchtype = constrForm->confmatchtype;
+ fkconstraint->fk_upd_action = constrForm->confupdtype;
+ fkconstraint->fk_del_action = constrForm->confdeltype;
+ fkconstraint->fk_del_set_cols = NIL;
+ fkconstraint->old_conpfeqop = NIL;
+ fkconstraint->old_pktable_oid = InvalidOid;
+ fkconstraint->skip_validation = false;
+ fkconstraint->initially_valid = true;
+ for (int i = 0; i < numfks; i++)
+ {
+ Form_pg_attribute att;
+
+ att = TupleDescAttr(RelationGetDescr(partRel),
+ mapped_conkey[i] - 1);
+ fkconstraint->fk_attrs = lappend(fkconstraint->fk_attrs,
+ makeString(NameStr(att->attname)));
+ }
+ if (ConstraintNameIsUsed(CONSTRAINT_RELATION,
+ RelationGetRelid(partRel),
+ NameStr(constrForm->conname)))
+ fkconstraint->conname =
+ ChooseConstraintName(RelationGetRelationName(partRel),
+ ChooseForeignKeyConstraintNameAddition(fkconstraint->fk_attrs),
+ "fkey",
+ RelationGetNamespace(partRel), NIL);
+ else
+ fkconstraint->conname = pstrdup(NameStr(constrForm->conname));
+
+ indexOid = constrForm->conindid;
+ constrOid =
+ CreateConstraintEntry(fkconstraint->conname,
+ constrForm->connamespace,
+ CONSTRAINT_FOREIGN,
+ fkconstraint->deferrable,
+ fkconstraint->initdeferred,
+ constrForm->convalidated,
+ parentConstrOid,
+ RelationGetRelid(partRel),
+ mapped_conkey,
+ numfks,
+ numfks,
+ InvalidOid, /* not a domain constraint */
+ indexOid,
+ constrForm->confrelid, /* same foreign rel */
+ confkey,
+ conpfeqop,
+ conppeqop,
+ conffeqop,
+ numfks,
+ fkconstraint->fk_upd_action,
+ fkconstraint->fk_del_action,
+ confdelsetcols,
+ numfkdelsetcols,
+ fkconstraint->fk_matchtype,
+ NULL,
+ NULL,
+ NULL,
+ false, /* islocal */
+ 1, /* inhcount */
+ false, /* conNoInherit */
+ true);
+
+ /* Set up partition dependencies for the new constraint */
+ ObjectAddressSet(address, ConstraintRelationId, constrOid);
+ ObjectAddressSet(referenced, ConstraintRelationId, parentConstrOid);
+ recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_PRI);
+ ObjectAddressSet(referenced, RelationRelationId,
+ RelationGetRelid(partRel));
+ recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_SEC);
+
+ /* Done with the cloned constraint's tuple */
+ ReleaseSysCache(tuple);
+
+ /* Make all this visible before recursing */
+ CommandCounterIncrement();
+
+ addFkRecurseReferencing(wqueue,
+ fkconstraint,
+ partRel,
+ pkrel,
+ indexOid,
+ constrOid,
+ numfks,
+ confkey,
+ mapped_conkey,
+ conpfeqop,
+ conppeqop,
+ conffeqop,
+ numfkdelsetcols,
+ confdelsetcols,
+ false, /* no old check exists */
+ AccessExclusiveLock,
+ insertTriggerOid,
+ updateTriggerOid);
+ table_close(pkrel, NoLock);
+ }
+
+ table_close(trigrel, RowExclusiveLock);
+}
+
+/*
+ * When the parent of a partition receives [the referencing side of] a foreign
+ * key, we must propagate that foreign key to the partition. However, the
+ * partition might already have an equivalent foreign key; this routine
+ * compares the given ForeignKeyCacheInfo (in the partition) to the FK defined
+ * by the other parameters. If they are equivalent, create the link between
+ * the two constraints and return true.
+ *
+ * If the given FK does not match the one defined by rest of the params,
+ * return false.
+ */
+static bool
+tryAttachPartitionForeignKey(ForeignKeyCacheInfo *fk,
+ Oid partRelid,
+ Oid parentConstrOid,
+ int numfks,
+ AttrNumber *mapped_conkey,
+ AttrNumber *confkey,
+ Oid *conpfeqop,
+ Oid parentInsTrigger,
+ Oid parentUpdTrigger,
+ Relation trigrel)
+{
+ HeapTuple parentConstrTup;
+ Form_pg_constraint parentConstr;
+ HeapTuple partcontup;
+ Form_pg_constraint partConstr;
+ ScanKeyData key;
+ SysScanDesc scan;
+ HeapTuple trigtup;
+ Oid insertTriggerOid,
+ updateTriggerOid;
+
+ parentConstrTup = SearchSysCache1(CONSTROID,
+ ObjectIdGetDatum(parentConstrOid));
+ if (!HeapTupleIsValid(parentConstrTup))
+ elog(ERROR, "cache lookup failed for constraint %u", parentConstrOid);
+ parentConstr = (Form_pg_constraint) GETSTRUCT(parentConstrTup);
+
+ /*
+ * Do some quick & easy initial checks. If any of these fail, we cannot
+ * use this constraint.
+ */
+ if (fk->confrelid != parentConstr->confrelid || fk->nkeys != numfks)
+ {
+ ReleaseSysCache(parentConstrTup);
+ return false;
+ }
+ for (int i = 0; i < numfks; i++)
+ {
+ if (fk->conkey[i] != mapped_conkey[i] ||
+ fk->confkey[i] != confkey[i] ||
+ fk->conpfeqop[i] != conpfeqop[i])
+ {
+ ReleaseSysCache(parentConstrTup);
+ return false;
+ }
+ }
+
+ /*
+ * Looks good so far; do some more extensive checks. Presumably the check
+ * for 'convalidated' could be dropped, since we don't really care about
+ * that, but let's be careful for now.
+ */
+ partcontup = SearchSysCache1(CONSTROID,
+ ObjectIdGetDatum(fk->conoid));
+ if (!HeapTupleIsValid(partcontup))
+ elog(ERROR, "cache lookup failed for constraint %u", fk->conoid);
+ partConstr = (Form_pg_constraint) GETSTRUCT(partcontup);
+ if (OidIsValid(partConstr->conparentid) ||
+ !partConstr->convalidated ||
+ partConstr->condeferrable != parentConstr->condeferrable ||
+ partConstr->condeferred != parentConstr->condeferred ||
+ partConstr->confupdtype != parentConstr->confupdtype ||
+ partConstr->confdeltype != parentConstr->confdeltype ||
+ partConstr->confmatchtype != parentConstr->confmatchtype)
+ {
+ ReleaseSysCache(parentConstrTup);
+ ReleaseSysCache(partcontup);
+ return false;
+ }
+
+ ReleaseSysCache(partcontup);
+ ReleaseSysCache(parentConstrTup);
+
+ /*
+ * Looks good! Attach this constraint. The action triggers in the new
+ * partition become redundant -- the parent table already has equivalent
+ * ones, and those will be able to reach the partition. Remove the ones
+ * in the partition. We identify them because they have our constraint
+ * OID, as well as being on the referenced rel.
+ */
+ ScanKeyInit(&key,
+ Anum_pg_trigger_tgconstraint,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(fk->conoid));
+ scan = systable_beginscan(trigrel, TriggerConstraintIndexId, true,
+ NULL, 1, &key);
+ while ((trigtup = systable_getnext(scan)) != NULL)
+ {
+ Form_pg_trigger trgform = (Form_pg_trigger) GETSTRUCT(trigtup);
+ ObjectAddress trigger;
+
+ if (trgform->tgconstrrelid != fk->conrelid)
+ continue;
+ if (trgform->tgrelid != fk->confrelid)
+ continue;
+
+ /*
+ * The constraint is originally set up to contain this trigger as an
+ * implementation object, so there's a dependency record that links
+ * the two; however, since the trigger is no longer needed, we remove
+ * the dependency link in order to be able to drop the trigger while
+ * keeping the constraint intact.
+ */
+ deleteDependencyRecordsFor(TriggerRelationId,
+ trgform->oid,
+ false);
+ /* make dependency deletion visible to performDeletion */
+ CommandCounterIncrement();
+ ObjectAddressSet(trigger, TriggerRelationId,
+ trgform->oid);
+ performDeletion(&trigger, DROP_RESTRICT, 0);
+ /* make trigger drop visible, in case the loop iterates */
+ CommandCounterIncrement();
+ }
+
+ systable_endscan(scan);
+
+ ConstraintSetParentConstraint(fk->conoid, parentConstrOid, partRelid);
+
+ /*
+ * Like the constraint, attach partition's "check" triggers to the
+ * corresponding parent triggers.
+ */
+ GetForeignKeyCheckTriggers(trigrel,
+ fk->conoid, fk->confrelid, fk->conrelid,
+ &insertTriggerOid, &updateTriggerOid);
+ Assert(OidIsValid(insertTriggerOid) && OidIsValid(parentInsTrigger));
+ TriggerSetParentTrigger(trigrel, insertTriggerOid, parentInsTrigger,
+ partRelid);
+ Assert(OidIsValid(updateTriggerOid) && OidIsValid(parentUpdTrigger));
+ TriggerSetParentTrigger(trigrel, updateTriggerOid, parentUpdTrigger,
+ partRelid);
+
+ CommandCounterIncrement();
+ return true;
+}
+
+/*
+ * GetForeignKeyActionTriggers
+ * Returns delete and update "action" triggers of the given relation
+ * belonging to the given constraint
+ */
+static void
+GetForeignKeyActionTriggers(Relation trigrel,
+ Oid conoid, Oid confrelid, Oid conrelid,
+ Oid *deleteTriggerOid,
+ Oid *updateTriggerOid)
+{
+ ScanKeyData key;
+ SysScanDesc scan;
+ HeapTuple trigtup;
+
+ *deleteTriggerOid = *updateTriggerOid = InvalidOid;
+ ScanKeyInit(&key,
+ Anum_pg_trigger_tgconstraint,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(conoid));
+
+ scan = systable_beginscan(trigrel, TriggerConstraintIndexId, true,
+ NULL, 1, &key);
+ while ((trigtup = systable_getnext(scan)) != NULL)
+ {
+ Form_pg_trigger trgform = (Form_pg_trigger) GETSTRUCT(trigtup);
+
+ if (trgform->tgconstrrelid != conrelid)
+ continue;
+ if (trgform->tgrelid != confrelid)
+ continue;
+ /* Only ever look at "action" triggers on the PK side. */
+ if (RI_FKey_trigger_type(trgform->tgfoid) != RI_TRIGGER_PK)
+ continue;
+ if (TRIGGER_FOR_DELETE(trgform->tgtype))
+ {
+ Assert(*deleteTriggerOid == InvalidOid);
+ *deleteTriggerOid = trgform->oid;
+ }
+ else if (TRIGGER_FOR_UPDATE(trgform->tgtype))
+ {
+ Assert(*updateTriggerOid == InvalidOid);
+ *updateTriggerOid = trgform->oid;
+ }
+#ifndef USE_ASSERT_CHECKING
+ /* In an assert-enabled build, continue looking to find duplicates */
+ if (OidIsValid(*deleteTriggerOid) && OidIsValid(*updateTriggerOid))
+ break;
+#endif
+ }
+
+ if (!OidIsValid(*deleteTriggerOid))
+ elog(ERROR, "could not find ON DELETE action trigger of foreign key constraint %u",
+ conoid);
+ if (!OidIsValid(*updateTriggerOid))
+ elog(ERROR, "could not find ON UPDATE action trigger of foreign key constraint %u",
+ conoid);
+
+ systable_endscan(scan);
+}
+
+/*
+ * GetForeignKeyCheckTriggers
+ * Returns insert and update "check" triggers of the given relation
+ * belonging to the given constraint
+ */
+static void
+GetForeignKeyCheckTriggers(Relation trigrel,
+ Oid conoid, Oid confrelid, Oid conrelid,
+ Oid *insertTriggerOid,
+ Oid *updateTriggerOid)
+{
+ ScanKeyData key;
+ SysScanDesc scan;
+ HeapTuple trigtup;
+
+ *insertTriggerOid = *updateTriggerOid = InvalidOid;
+ ScanKeyInit(&key,
+ Anum_pg_trigger_tgconstraint,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(conoid));
+
+ scan = systable_beginscan(trigrel, TriggerConstraintIndexId, true,
+ NULL, 1, &key);
+ while ((trigtup = systable_getnext(scan)) != NULL)
+ {
+ Form_pg_trigger trgform = (Form_pg_trigger) GETSTRUCT(trigtup);
+
+ if (trgform->tgconstrrelid != confrelid)
+ continue;
+ if (trgform->tgrelid != conrelid)
+ continue;
+ /* Only ever look at "check" triggers on the FK side. */
+ if (RI_FKey_trigger_type(trgform->tgfoid) != RI_TRIGGER_FK)
+ continue;
+ if (TRIGGER_FOR_INSERT(trgform->tgtype))
+ {
+ Assert(*insertTriggerOid == InvalidOid);
+ *insertTriggerOid = trgform->oid;
+ }
+ else if (TRIGGER_FOR_UPDATE(trgform->tgtype))
+ {
+ Assert(*updateTriggerOid == InvalidOid);
+ *updateTriggerOid = trgform->oid;
+ }
+#ifndef USE_ASSERT_CHECKING
+ /* In an assert-enabled build, continue looking to find duplicates. */
+ if (OidIsValid(*insertTriggerOid) && OidIsValid(*updateTriggerOid))
+ break;
+#endif
+ }
+
+ if (!OidIsValid(*insertTriggerOid))
+ elog(ERROR, "could not find ON INSERT check triggers of foreign key constraint %u",
+ conoid);
+ if (!OidIsValid(*updateTriggerOid))
+ elog(ERROR, "could not find ON UPDATE check triggers of foreign key constraint %u",
+ conoid);
+
+ systable_endscan(scan);
+}
+
+/*
+ * ALTER TABLE ALTER CONSTRAINT
+ *
+ * Update the attributes of a constraint.
+ *
+ * Currently only works for Foreign Key constraints.
+ *
+ * If the constraint is modified, returns its address; otherwise, return
+ * InvalidObjectAddress.
+ */
+static ObjectAddress
+ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd, bool recurse,
+ bool recursing, LOCKMODE lockmode)
+{
+ Constraint *cmdcon;
+ Relation conrel;
+ Relation tgrel;
+ SysScanDesc scan;
+ ScanKeyData skey[3];
+ HeapTuple contuple;
+ Form_pg_constraint currcon;
+ ObjectAddress address;
+ List *otherrelids = NIL;
+ ListCell *lc;
+
+ cmdcon = castNode(Constraint, cmd->def);
+
+ conrel = table_open(ConstraintRelationId, RowExclusiveLock);
+ tgrel = table_open(TriggerRelationId, RowExclusiveLock);
+
+ /*
+ * Find and check the target constraint
+ */
+ ScanKeyInit(&skey[0],
+ Anum_pg_constraint_conrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+ ScanKeyInit(&skey[1],
+ Anum_pg_constraint_contypid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(InvalidOid));
+ ScanKeyInit(&skey[2],
+ Anum_pg_constraint_conname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(cmdcon->conname));
+ scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId,
+ true, NULL, 3, skey);
+
+ /* There can be at most one matching row */
+ if (!HeapTupleIsValid(contuple = systable_getnext(scan)))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("constraint \"%s\" of relation \"%s\" does not exist",
+ cmdcon->conname, RelationGetRelationName(rel))));
+
+ currcon = (Form_pg_constraint) GETSTRUCT(contuple);
+ if (currcon->contype != CONSTRAINT_FOREIGN)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("constraint \"%s\" of relation \"%s\" is not a foreign key constraint",
+ cmdcon->conname, RelationGetRelationName(rel))));
+
+ /*
+ * If it's not the topmost constraint, raise an error.
+ *
+ * Altering a non-topmost constraint leaves some triggers untouched, since
+ * they are not directly connected to this constraint; also, pg_dump would
+ * ignore the deferrability status of the individual constraint, since it
+ * only dumps topmost constraints. Avoid these problems by refusing this
+ * operation and telling the user to alter the parent constraint instead.
+ */
+ if (OidIsValid(currcon->conparentid))
+ {
+ HeapTuple tp;
+ Oid parent = currcon->conparentid;
+ char *ancestorname = NULL;
+ char *ancestortable = NULL;
+
+ /* Loop to find the topmost constraint */
+ while (HeapTupleIsValid(tp = SearchSysCache1(CONSTROID, ObjectIdGetDatum(parent))))
+ {
+ Form_pg_constraint contup = (Form_pg_constraint) GETSTRUCT(tp);
+
+ /* If no parent, this is the constraint we want */
+ if (!OidIsValid(contup->conparentid))
+ {
+ ancestorname = pstrdup(NameStr(contup->conname));
+ ancestortable = get_rel_name(contup->conrelid);
+ ReleaseSysCache(tp);
+ break;
+ }
+
+ parent = contup->conparentid;
+ ReleaseSysCache(tp);
+ }
+
+ ereport(ERROR,
+ (errmsg("cannot alter constraint \"%s\" on relation \"%s\"",
+ cmdcon->conname, RelationGetRelationName(rel)),
+ ancestorname && ancestortable ?
+ errdetail("Constraint \"%s\" is derived from constraint \"%s\" of relation \"%s\".",
+ cmdcon->conname, ancestorname, ancestortable) : 0,
+ errhint("You may alter the constraint it derives from, instead.")));
+ }
+
+ /*
+ * Do the actual catalog work. We can skip changing if already in the
+ * desired state, but not if a partitioned table: partitions need to be
+ * processed regardless, in case they had the constraint locally changed.
+ */
+ address = InvalidObjectAddress;
+ if (currcon->condeferrable != cmdcon->deferrable ||
+ currcon->condeferred != cmdcon->initdeferred ||
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ if (ATExecAlterConstrRecurse(cmdcon, conrel, tgrel, rel, contuple,
+ &otherrelids, lockmode))
+ ObjectAddressSet(address, ConstraintRelationId, currcon->oid);
+ }
+
+ /*
+ * ATExecConstrRecurse already invalidated relcache for the relations
+ * having the constraint itself; here we also invalidate for relations
+ * that have any triggers that are part of the constraint.
+ */
+ foreach(lc, otherrelids)
+ CacheInvalidateRelcacheByRelid(lfirst_oid(lc));
+
+ systable_endscan(scan);
+
+ table_close(tgrel, RowExclusiveLock);
+ table_close(conrel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * Recursive subroutine of ATExecAlterConstraint. Returns true if the
+ * constraint is altered.
+ *
+ * *otherrelids is appended OIDs of relations containing affected triggers.
+ *
+ * Note that we must recurse even when the values are correct, in case
+ * indirect descendants have had their constraints altered locally.
+ * (This could be avoided if we forbade altering constraints in partitions
+ * but existing releases don't do that.)
+ */
+static bool
+ATExecAlterConstrRecurse(Constraint *cmdcon, Relation conrel, Relation tgrel,
+ Relation rel, HeapTuple contuple, List **otherrelids,
+ LOCKMODE lockmode)
+{
+ Form_pg_constraint currcon;
+ Oid conoid;
+ Oid refrelid;
+ bool changed = false;
+
+ currcon = (Form_pg_constraint) GETSTRUCT(contuple);
+ conoid = currcon->oid;
+ refrelid = currcon->confrelid;
+
+ /*
+ * Update pg_constraint with the flags from cmdcon.
+ *
+ * If called to modify a constraint that's already in the desired state,
+ * silently do nothing.
+ */
+ if (currcon->condeferrable != cmdcon->deferrable ||
+ currcon->condeferred != cmdcon->initdeferred)
+ {
+ HeapTuple copyTuple;
+ Form_pg_constraint copy_con;
+ HeapTuple tgtuple;
+ ScanKeyData tgkey;
+ SysScanDesc tgscan;
+
+ copyTuple = heap_copytuple(contuple);
+ copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple);
+ copy_con->condeferrable = cmdcon->deferrable;
+ copy_con->condeferred = cmdcon->initdeferred;
+ CatalogTupleUpdate(conrel, &copyTuple->t_self, copyTuple);
+
+ InvokeObjectPostAlterHook(ConstraintRelationId,
+ conoid, 0);
+
+ heap_freetuple(copyTuple);
+ changed = true;
+
+ /* Make new constraint flags visible to others */
+ CacheInvalidateRelcache(rel);
+
+ /*
+ * Now we need to update the multiple entries in pg_trigger that
+ * implement the constraint.
+ */
+ ScanKeyInit(&tgkey,
+ Anum_pg_trigger_tgconstraint,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(conoid));
+ tgscan = systable_beginscan(tgrel, TriggerConstraintIndexId, true,
+ NULL, 1, &tgkey);
+ while (HeapTupleIsValid(tgtuple = systable_getnext(tgscan)))
+ {
+ Form_pg_trigger tgform = (Form_pg_trigger) GETSTRUCT(tgtuple);
+ Form_pg_trigger copy_tg;
+ HeapTuple copyTuple;
+
+ /*
+ * Remember OIDs of other relation(s) involved in FK constraint.
+ * (Note: it's likely that we could skip forcing a relcache inval
+ * for other rels that don't have a trigger whose properties
+ * change, but let's be conservative.)
+ */
+ if (tgform->tgrelid != RelationGetRelid(rel))
+ *otherrelids = list_append_unique_oid(*otherrelids,
+ tgform->tgrelid);
+
+ /*
+ * Update deferrability of RI_FKey_noaction_del,
+ * RI_FKey_noaction_upd, RI_FKey_check_ins and RI_FKey_check_upd
+ * triggers, but not others; see createForeignKeyActionTriggers
+ * and CreateFKCheckTrigger.
+ */
+ if (tgform->tgfoid != F_RI_FKEY_NOACTION_DEL &&
+ tgform->tgfoid != F_RI_FKEY_NOACTION_UPD &&
+ tgform->tgfoid != F_RI_FKEY_CHECK_INS &&
+ tgform->tgfoid != F_RI_FKEY_CHECK_UPD)
+ continue;
+
+ copyTuple = heap_copytuple(tgtuple);
+ copy_tg = (Form_pg_trigger) GETSTRUCT(copyTuple);
+
+ copy_tg->tgdeferrable = cmdcon->deferrable;
+ copy_tg->tginitdeferred = cmdcon->initdeferred;
+ CatalogTupleUpdate(tgrel, &copyTuple->t_self, copyTuple);
+
+ InvokeObjectPostAlterHook(TriggerRelationId, tgform->oid, 0);
+
+ heap_freetuple(copyTuple);
+ }
+
+ systable_endscan(tgscan);
+ }
+
+ /*
+ * If the table at either end of the constraint is partitioned, we need to
+ * recurse and handle every constraint that is a child of this one.
+ *
+ * (This assumes that the recurse flag is forcibly set for partitioned
+ * tables, and not set for legacy inheritance, though we don't check for
+ * that here.)
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
+ get_rel_relkind(refrelid) == RELKIND_PARTITIONED_TABLE)
+ {
+ ScanKeyData pkey;
+ SysScanDesc pscan;
+ HeapTuple childtup;
+
+ ScanKeyInit(&pkey,
+ Anum_pg_constraint_conparentid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(conoid));
+
+ pscan = systable_beginscan(conrel, ConstraintParentIndexId,
+ true, NULL, 1, &pkey);
+
+ while (HeapTupleIsValid(childtup = systable_getnext(pscan)))
+ {
+ Form_pg_constraint childcon = (Form_pg_constraint) GETSTRUCT(childtup);
+ Relation childrel;
+
+ childrel = table_open(childcon->conrelid, lockmode);
+ ATExecAlterConstrRecurse(cmdcon, conrel, tgrel, childrel, childtup,
+ otherrelids, lockmode);
+ table_close(childrel, NoLock);
+ }
+
+ systable_endscan(pscan);
+ }
+
+ return changed;
+}
+
+/*
+ * ALTER TABLE VALIDATE CONSTRAINT
+ *
+ * XXX The reason we handle recursion here rather than at Phase 1 is because
+ * there's no good way to skip recursing when handling foreign keys: there is
+ * no need to lock children in that case, yet we wouldn't be able to avoid
+ * doing so at that level.
+ *
+ * Return value is the address of the validated constraint. If the constraint
+ * was already validated, InvalidObjectAddress is returned.
+ */
+static ObjectAddress
+ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
+ bool recurse, bool recursing, LOCKMODE lockmode)
+{
+ Relation conrel;
+ SysScanDesc scan;
+ ScanKeyData skey[3];
+ HeapTuple tuple;
+ Form_pg_constraint con;
+ ObjectAddress address;
+
+ conrel = table_open(ConstraintRelationId, RowExclusiveLock);
+
+ /*
+ * Find and check the target constraint
+ */
+ ScanKeyInit(&skey[0],
+ Anum_pg_constraint_conrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+ ScanKeyInit(&skey[1],
+ Anum_pg_constraint_contypid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(InvalidOid));
+ ScanKeyInit(&skey[2],
+ Anum_pg_constraint_conname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(constrName));
+ scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId,
+ true, NULL, 3, skey);
+
+ /* There can be at most one matching row */
+ if (!HeapTupleIsValid(tuple = systable_getnext(scan)))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("constraint \"%s\" of relation \"%s\" does not exist",
+ constrName, RelationGetRelationName(rel))));
+
+ con = (Form_pg_constraint) GETSTRUCT(tuple);
+ if (con->contype != CONSTRAINT_FOREIGN &&
+ con->contype != CONSTRAINT_CHECK)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("constraint \"%s\" of relation \"%s\" is not a foreign key or check constraint",
+ constrName, RelationGetRelationName(rel))));
+
+ if (!con->convalidated)
+ {
+ AlteredTableInfo *tab;
+ HeapTuple copyTuple;
+ Form_pg_constraint copy_con;
+
+ if (con->contype == CONSTRAINT_FOREIGN)
+ {
+ NewConstraint *newcon;
+ Constraint *fkconstraint;
+
+ /* Queue validation for phase 3 */
+ fkconstraint = makeNode(Constraint);
+ /* for now this is all we need */
+ fkconstraint->conname = constrName;
+
+ newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
+ newcon->name = constrName;
+ newcon->contype = CONSTR_FOREIGN;
+ newcon->refrelid = con->confrelid;
+ newcon->refindid = con->conindid;
+ newcon->conid = con->oid;
+ newcon->qual = (Node *) fkconstraint;
+
+ /* Find or create work queue entry for this table */
+ tab = ATGetQueueEntry(wqueue, rel);
+ tab->constraints = lappend(tab->constraints, newcon);
+
+ /*
+ * We disallow creating invalid foreign keys to or from
+ * partitioned tables, so ignoring the recursion bit is okay.
+ */
+ }
+ else if (con->contype == CONSTRAINT_CHECK)
+ {
+ List *children = NIL;
+ ListCell *child;
+ NewConstraint *newcon;
+ bool isnull;
+ Datum val;
+ char *conbin;
+
+ /*
+ * If we're recursing, the parent has already done this, so skip
+ * it. Also, if the constraint is a NO INHERIT constraint, we
+ * shouldn't try to look for it in the children.
+ */
+ if (!recursing && !con->connoinherit)
+ children = find_all_inheritors(RelationGetRelid(rel),
+ lockmode, NULL);
+
+ /*
+ * For CHECK constraints, we must ensure that we only mark the
+ * constraint as validated on the parent if it's already validated
+ * on the children.
+ *
+ * We recurse before validating on the parent, to reduce risk of
+ * deadlocks.
+ */
+ foreach(child, children)
+ {
+ Oid childoid = lfirst_oid(child);
+ Relation childrel;
+
+ if (childoid == RelationGetRelid(rel))
+ continue;
+
+ /*
+ * If we are told not to recurse, there had better not be any
+ * child tables, because we can't mark the constraint on the
+ * parent valid unless it is valid for all child tables.
+ */
+ if (!recurse)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("constraint must be validated on child tables too")));
+
+ /* find_all_inheritors already got lock */
+ childrel = table_open(childoid, NoLock);
+
+ ATExecValidateConstraint(wqueue, childrel, constrName, false,
+ true, lockmode);
+ table_close(childrel, NoLock);
+ }
+
+ /* Queue validation for phase 3 */
+ newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
+ newcon->name = constrName;
+ newcon->contype = CONSTR_CHECK;
+ newcon->refrelid = InvalidOid;
+ newcon->refindid = InvalidOid;
+ newcon->conid = con->oid;
+
+ val = SysCacheGetAttr(CONSTROID, tuple,
+ Anum_pg_constraint_conbin, &isnull);
+ if (isnull)
+ elog(ERROR, "null conbin for constraint %u", con->oid);
+
+ conbin = TextDatumGetCString(val);
+ newcon->qual = (Node *) stringToNode(conbin);
+
+ /* Find or create work queue entry for this table */
+ tab = ATGetQueueEntry(wqueue, rel);
+ tab->constraints = lappend(tab->constraints, newcon);
+
+ /*
+ * Invalidate relcache so that others see the new validated
+ * constraint.
+ */
+ CacheInvalidateRelcache(rel);
+ }
+
+ /*
+ * Now update the catalog, while we have the door open.
+ */
+ copyTuple = heap_copytuple(tuple);
+ copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple);
+ copy_con->convalidated = true;
+ CatalogTupleUpdate(conrel, &copyTuple->t_self, copyTuple);
+
+ InvokeObjectPostAlterHook(ConstraintRelationId, con->oid, 0);
+
+ heap_freetuple(copyTuple);
+
+ ObjectAddressSet(address, ConstraintRelationId, con->oid);
+ }
+ else
+ address = InvalidObjectAddress; /* already validated */
+
+ systable_endscan(scan);
+
+ table_close(conrel, RowExclusiveLock);
+
+ return address;
+}
+
+
+/*
+ * transformColumnNameList - transform list of column names
+ *
+ * Lookup each name and return its attnum and, optionally, type OID
+ *
+ * Note: the name of this function suggests that it's general-purpose,
+ * but actually it's only used to look up names appearing in foreign-key
+ * clauses. The error messages would need work to use it in other cases,
+ * and perhaps the validity checks as well.
+ */
+static int
+transformColumnNameList(Oid relId, List *colList,
+ int16 *attnums, Oid *atttypids)
+{
+ ListCell *l;
+ int attnum;
+
+ attnum = 0;
+ foreach(l, colList)
+ {
+ char *attname = strVal(lfirst(l));
+ HeapTuple atttuple;
+ Form_pg_attribute attform;
+
+ atttuple = SearchSysCacheAttName(relId, attname);
+ if (!HeapTupleIsValid(atttuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" referenced in foreign key constraint does not exist",
+ attname)));
+ attform = (Form_pg_attribute) GETSTRUCT(atttuple);
+ if (attform->attnum < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("system columns cannot be used in foreign keys")));
+ if (attnum >= INDEX_MAX_KEYS)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_COLUMNS),
+ errmsg("cannot have more than %d keys in a foreign key",
+ INDEX_MAX_KEYS)));
+ attnums[attnum] = attform->attnum;
+ if (atttypids != NULL)
+ atttypids[attnum] = attform->atttypid;
+ ReleaseSysCache(atttuple);
+ attnum++;
+ }
+
+ return attnum;
+}
+
+/*
+ * transformFkeyGetPrimaryKey -
+ *
+ * Look up the names, attnums, and types of the primary key attributes
+ * for the pkrel. Also return the index OID and index opclasses of the
+ * index supporting the primary key.
+ *
+ * All parameters except pkrel are output parameters. Also, the function
+ * return value is the number of attributes in the primary key.
+ *
+ * Used when the column list in the REFERENCES specification is omitted.
+ */
+static int
+transformFkeyGetPrimaryKey(Relation pkrel, Oid *indexOid,
+ List **attnamelist,
+ int16 *attnums, Oid *atttypids,
+ Oid *opclasses)
+{
+ List *indexoidlist;
+ ListCell *indexoidscan;
+ HeapTuple indexTuple = NULL;
+ Form_pg_index indexStruct = NULL;
+ Datum indclassDatum;
+ bool isnull;
+ oidvector *indclass;
+ int i;
+
+ /*
+ * Get the list of index OIDs for the table from the relcache, and look up
+ * each one in the pg_index syscache until we find one marked primary key
+ * (hopefully there isn't more than one such). Insist it's valid, too.
+ */
+ *indexOid = InvalidOid;
+
+ indexoidlist = RelationGetIndexList(pkrel);
+
+ foreach(indexoidscan, indexoidlist)
+ {
+ Oid indexoid = lfirst_oid(indexoidscan);
+
+ indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
+ if (!HeapTupleIsValid(indexTuple))
+ elog(ERROR, "cache lookup failed for index %u", indexoid);
+ indexStruct = (Form_pg_index) GETSTRUCT(indexTuple);
+ if (indexStruct->indisprimary && indexStruct->indisvalid)
+ {
+ /*
+ * Refuse to use a deferrable primary key. This is per SQL spec,
+ * and there would be a lot of interesting semantic problems if we
+ * tried to allow it.
+ */
+ if (!indexStruct->indimmediate)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot use a deferrable primary key for referenced table \"%s\"",
+ RelationGetRelationName(pkrel))));
+
+ *indexOid = indexoid;
+ break;
+ }
+ ReleaseSysCache(indexTuple);
+ }
+
+ list_free(indexoidlist);
+
+ /*
+ * Check that we found it
+ */
+ if (!OidIsValid(*indexOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("there is no primary key for referenced table \"%s\"",
+ RelationGetRelationName(pkrel))));
+
+ /* Must get indclass the hard way */
+ indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
+ Anum_pg_index_indclass, &isnull);
+ Assert(!isnull);
+ indclass = (oidvector *) DatumGetPointer(indclassDatum);
+
+ /*
+ * Now build the list of PK attributes from the indkey definition (we
+ * assume a primary key cannot have expressional elements)
+ */
+ *attnamelist = NIL;
+ for (i = 0; i < indexStruct->indnkeyatts; i++)
+ {
+ int pkattno = indexStruct->indkey.values[i];
+
+ attnums[i] = pkattno;
+ atttypids[i] = attnumTypeId(pkrel, pkattno);
+ opclasses[i] = indclass->values[i];
+ *attnamelist = lappend(*attnamelist,
+ makeString(pstrdup(NameStr(*attnumAttName(pkrel, pkattno)))));
+ }
+
+ ReleaseSysCache(indexTuple);
+
+ return i;
+}
+
+/*
+ * transformFkeyCheckAttrs -
+ *
+ * Make sure that the attributes of a referenced table belong to a unique
+ * (or primary key) constraint. Return the OID of the index supporting
+ * the constraint, as well as the opclasses associated with the index
+ * columns.
+ */
+static Oid
+transformFkeyCheckAttrs(Relation pkrel,
+ int numattrs, int16 *attnums,
+ Oid *opclasses) /* output parameter */
+{
+ Oid indexoid = InvalidOid;
+ bool found = false;
+ bool found_deferrable = false;
+ List *indexoidlist;
+ ListCell *indexoidscan;
+ int i,
+ j;
+
+ /*
+ * Reject duplicate appearances of columns in the referenced-columns list.
+ * Such a case is forbidden by the SQL standard, and even if we thought it
+ * useful to allow it, there would be ambiguity about how to match the
+ * list to unique indexes (in particular, it'd be unclear which index
+ * opclass goes with which FK column).
+ */
+ for (i = 0; i < numattrs; i++)
+ {
+ for (j = i + 1; j < numattrs; j++)
+ {
+ if (attnums[i] == attnums[j])
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FOREIGN_KEY),
+ errmsg("foreign key referenced-columns list must not contain duplicates")));
+ }
+ }
+
+ /*
+ * Get the list of index OIDs for the table from the relcache, and look up
+ * each one in the pg_index syscache, and match unique indexes to the list
+ * of attnums we are given.
+ */
+ indexoidlist = RelationGetIndexList(pkrel);
+
+ foreach(indexoidscan, indexoidlist)
+ {
+ HeapTuple indexTuple;
+ Form_pg_index indexStruct;
+
+ indexoid = lfirst_oid(indexoidscan);
+ indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
+ if (!HeapTupleIsValid(indexTuple))
+ elog(ERROR, "cache lookup failed for index %u", indexoid);
+ indexStruct = (Form_pg_index) GETSTRUCT(indexTuple);
+
+ /*
+ * Must have the right number of columns; must be unique and not a
+ * partial index; forget it if there are any expressions, too. Invalid
+ * indexes are out as well.
+ */
+ if (indexStruct->indnkeyatts == numattrs &&
+ indexStruct->indisunique &&
+ indexStruct->indisvalid &&
+ heap_attisnull(indexTuple, Anum_pg_index_indpred, NULL) &&
+ heap_attisnull(indexTuple, Anum_pg_index_indexprs, NULL))
+ {
+ Datum indclassDatum;
+ bool isnull;
+ oidvector *indclass;
+
+ /* Must get indclass the hard way */
+ indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
+ Anum_pg_index_indclass, &isnull);
+ Assert(!isnull);
+ indclass = (oidvector *) DatumGetPointer(indclassDatum);
+
+ /*
+ * The given attnum list may match the index columns in any order.
+ * Check for a match, and extract the appropriate opclasses while
+ * we're at it.
+ *
+ * We know that attnums[] is duplicate-free per the test at the
+ * start of this function, and we checked above that the number of
+ * index columns agrees, so if we find a match for each attnums[]
+ * entry then we must have a one-to-one match in some order.
+ */
+ for (i = 0; i < numattrs; i++)
+ {
+ found = false;
+ for (j = 0; j < numattrs; j++)
+ {
+ if (attnums[i] == indexStruct->indkey.values[j])
+ {
+ opclasses[i] = indclass->values[j];
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ break;
+ }
+
+ /*
+ * Refuse to use a deferrable unique/primary key. This is per SQL
+ * spec, and there would be a lot of interesting semantic problems
+ * if we tried to allow it.
+ */
+ if (found && !indexStruct->indimmediate)
+ {
+ /*
+ * Remember that we found an otherwise matching index, so that
+ * we can generate a more appropriate error message.
+ */
+ found_deferrable = true;
+ found = false;
+ }
+ }
+ ReleaseSysCache(indexTuple);
+ if (found)
+ break;
+ }
+
+ if (!found)
+ {
+ if (found_deferrable)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot use a deferrable unique constraint for referenced table \"%s\"",
+ RelationGetRelationName(pkrel))));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FOREIGN_KEY),
+ errmsg("there is no unique constraint matching given keys for referenced table \"%s\"",
+ RelationGetRelationName(pkrel))));
+ }
+
+ list_free(indexoidlist);
+
+ return indexoid;
+}
+
+/*
+ * findFkeyCast -
+ *
+ * Wrapper around find_coercion_pathway() for ATAddForeignKeyConstraint().
+ * Caller has equal regard for binary coercibility and for an exact match.
+*/
+static CoercionPathType
+findFkeyCast(Oid targetTypeId, Oid sourceTypeId, Oid *funcid)
+{
+ CoercionPathType ret;
+
+ if (targetTypeId == sourceTypeId)
+ {
+ ret = COERCION_PATH_RELABELTYPE;
+ *funcid = InvalidOid;
+ }
+ else
+ {
+ ret = find_coercion_pathway(targetTypeId, sourceTypeId,
+ COERCION_IMPLICIT, funcid);
+ if (ret == COERCION_PATH_NONE)
+ /* A previously-relied-upon cast is now gone. */
+ elog(ERROR, "could not find cast from %u to %u",
+ sourceTypeId, targetTypeId);
+ }
+
+ return ret;
+}
+
+/*
+ * Permissions checks on the referenced table for ADD FOREIGN KEY
+ *
+ * Note: we have already checked that the user owns the referencing table,
+ * else we'd have failed much earlier; no additional checks are needed for it.
+ */
+static void
+checkFkeyPermissions(Relation rel, int16 *attnums, int natts)
+{
+ Oid roleid = GetUserId();
+ AclResult aclresult;
+ int i;
+
+ /* Okay if we have relation-level REFERENCES permission */
+ aclresult = pg_class_aclcheck(RelationGetRelid(rel), roleid,
+ ACL_REFERENCES);
+ if (aclresult == ACLCHECK_OK)
+ return;
+ /* Else we must have REFERENCES on each column */
+ for (i = 0; i < natts; i++)
+ {
+ aclresult = pg_attribute_aclcheck(RelationGetRelid(rel), attnums[i],
+ roleid, ACL_REFERENCES);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, get_relkind_objtype(rel->rd_rel->relkind),
+ RelationGetRelationName(rel));
+ }
+}
+
+/*
+ * Scan the existing rows in a table to verify they meet a proposed FK
+ * constraint.
+ *
+ * Caller must have opened and locked both relations appropriately.
+ */
+static void
+validateForeignKeyConstraint(char *conname,
+ Relation rel,
+ Relation pkrel,
+ Oid pkindOid,
+ Oid constraintOid)
+{
+ TupleTableSlot *slot;
+ TableScanDesc scan;
+ Trigger trig;
+ Snapshot snapshot;
+ MemoryContext oldcxt;
+ MemoryContext perTupCxt;
+
+ ereport(DEBUG1,
+ (errmsg_internal("validating foreign key constraint \"%s\"", conname)));
+
+ /*
+ * Build a trigger call structure; we'll need it either way.
+ */
+ MemSet(&trig, 0, sizeof(trig));
+ trig.tgoid = InvalidOid;
+ trig.tgname = conname;
+ trig.tgenabled = TRIGGER_FIRES_ON_ORIGIN;
+ trig.tgisinternal = true;
+ trig.tgconstrrelid = RelationGetRelid(pkrel);
+ trig.tgconstrindid = pkindOid;
+ trig.tgconstraint = constraintOid;
+ trig.tgdeferrable = false;
+ trig.tginitdeferred = false;
+ /* we needn't fill in remaining fields */
+
+ /*
+ * See if we can do it with a single LEFT JOIN query. A false result
+ * indicates we must proceed with the fire-the-trigger method.
+ */
+ if (RI_Initial_Check(&trig, rel, pkrel))
+ return;
+
+ /*
+ * Scan through each tuple, calling RI_FKey_check_ins (insert trigger) as
+ * if that tuple had just been inserted. If any of those fail, it should
+ * ereport(ERROR) and that's that.
+ */
+ snapshot = RegisterSnapshot(GetLatestSnapshot());
+ slot = table_slot_create(rel, NULL);
+ scan = table_beginscan(rel, snapshot, 0, NULL);
+
+ perTupCxt = AllocSetContextCreate(CurrentMemoryContext,
+ "validateForeignKeyConstraint",
+ ALLOCSET_SMALL_SIZES);
+ oldcxt = MemoryContextSwitchTo(perTupCxt);
+
+ while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
+ {
+ LOCAL_FCINFO(fcinfo, 0);
+ TriggerData trigdata = {0};
+
+ CHECK_FOR_INTERRUPTS();
+
+ /*
+ * Make a call to the trigger function
+ *
+ * No parameters are passed, but we do set a context
+ */
+ MemSet(fcinfo, 0, SizeForFunctionCallInfo(0));
+
+ /*
+ * We assume RI_FKey_check_ins won't look at flinfo...
+ */
+ trigdata.type = T_TriggerData;
+ trigdata.tg_event = TRIGGER_EVENT_INSERT | TRIGGER_EVENT_ROW;
+ trigdata.tg_relation = rel;
+ trigdata.tg_trigtuple = ExecFetchSlotHeapTuple(slot, false, NULL);
+ trigdata.tg_trigslot = slot;
+ trigdata.tg_trigger = &trig;
+
+ fcinfo->context = (Node *) &trigdata;
+
+ RI_FKey_check_ins(fcinfo);
+
+ MemoryContextReset(perTupCxt);
+ }
+
+ MemoryContextSwitchTo(oldcxt);
+ MemoryContextDelete(perTupCxt);
+ table_endscan(scan);
+ UnregisterSnapshot(snapshot);
+ ExecDropSingleTupleTableSlot(slot);
+}
+
+/*
+ * CreateFKCheckTrigger
+ * Creates the insert (on_insert=true) or update "check" trigger that
+ * implements a given foreign key
+ *
+ * Returns the OID of the so created trigger.
+ */
+static Oid
+CreateFKCheckTrigger(Oid myRelOid, Oid refRelOid, Constraint *fkconstraint,
+ Oid constraintOid, Oid indexOid, Oid parentTrigOid,
+ bool on_insert)
+{
+ ObjectAddress trigAddress;
+ CreateTrigStmt *fk_trigger;
+
+ /*
+ * Note: for a self-referential FK (referencing and referenced tables are
+ * the same), it is important that the ON UPDATE action fires before the
+ * CHECK action, since both triggers will fire on the same row during an
+ * UPDATE event; otherwise the CHECK trigger will be checking a non-final
+ * state of the row. Triggers fire in name order, so we ensure this by
+ * using names like "RI_ConstraintTrigger_a_NNNN" for the action triggers
+ * and "RI_ConstraintTrigger_c_NNNN" for the check triggers.
+ */
+ fk_trigger = makeNode(CreateTrigStmt);
+ fk_trigger->replace = false;
+ fk_trigger->isconstraint = true;
+ fk_trigger->trigname = "RI_ConstraintTrigger_c";
+ fk_trigger->relation = NULL;
+
+ /* Either ON INSERT or ON UPDATE */
+ if (on_insert)
+ {
+ fk_trigger->funcname = SystemFuncName("RI_FKey_check_ins");
+ fk_trigger->events = TRIGGER_TYPE_INSERT;
+ }
+ else
+ {
+ fk_trigger->funcname = SystemFuncName("RI_FKey_check_upd");
+ fk_trigger->events = TRIGGER_TYPE_UPDATE;
+ }
+
+ fk_trigger->args = NIL;
+ fk_trigger->row = true;
+ fk_trigger->timing = TRIGGER_TYPE_AFTER;
+ fk_trigger->columns = NIL;
+ fk_trigger->whenClause = NULL;
+ fk_trigger->transitionRels = NIL;
+ fk_trigger->deferrable = fkconstraint->deferrable;
+ fk_trigger->initdeferred = fkconstraint->initdeferred;
+ fk_trigger->constrrel = NULL;
+
+ trigAddress = CreateTrigger(fk_trigger, NULL, myRelOid, refRelOid,
+ constraintOid, indexOid, InvalidOid,
+ parentTrigOid, NULL, true, false);
+
+ /* Make changes-so-far visible */
+ CommandCounterIncrement();
+
+ return trigAddress.objectId;
+}
+
+/*
+ * createForeignKeyActionTriggers
+ * Create the referenced-side "action" triggers that implement a foreign
+ * key.
+ *
+ * Returns the OIDs of the so created triggers in *deleteTrigOid and
+ * *updateTrigOid.
+ */
+static void
+createForeignKeyActionTriggers(Relation rel, Oid refRelOid, Constraint *fkconstraint,
+ Oid constraintOid, Oid indexOid,
+ Oid parentDelTrigger, Oid parentUpdTrigger,
+ Oid *deleteTrigOid, Oid *updateTrigOid)
+{
+ CreateTrigStmt *fk_trigger;
+ ObjectAddress trigAddress;
+
+ /*
+ * Build and execute a CREATE CONSTRAINT TRIGGER statement for the ON
+ * DELETE action on the referenced table.
+ */
+ fk_trigger = makeNode(CreateTrigStmt);
+ fk_trigger->replace = false;
+ fk_trigger->isconstraint = true;
+ fk_trigger->trigname = "RI_ConstraintTrigger_a";
+ fk_trigger->relation = NULL;
+ fk_trigger->args = NIL;
+ fk_trigger->row = true;
+ fk_trigger->timing = TRIGGER_TYPE_AFTER;
+ fk_trigger->events = TRIGGER_TYPE_DELETE;
+ fk_trigger->columns = NIL;
+ fk_trigger->whenClause = NULL;
+ fk_trigger->transitionRels = NIL;
+ fk_trigger->constrrel = NULL;
+ switch (fkconstraint->fk_del_action)
+ {
+ case FKCONSTR_ACTION_NOACTION:
+ fk_trigger->deferrable = fkconstraint->deferrable;
+ fk_trigger->initdeferred = fkconstraint->initdeferred;
+ fk_trigger->funcname = SystemFuncName("RI_FKey_noaction_del");
+ break;
+ case FKCONSTR_ACTION_RESTRICT:
+ fk_trigger->deferrable = false;
+ fk_trigger->initdeferred = false;
+ fk_trigger->funcname = SystemFuncName("RI_FKey_restrict_del");
+ break;
+ case FKCONSTR_ACTION_CASCADE:
+ fk_trigger->deferrable = false;
+ fk_trigger->initdeferred = false;
+ fk_trigger->funcname = SystemFuncName("RI_FKey_cascade_del");
+ break;
+ case FKCONSTR_ACTION_SETNULL:
+ fk_trigger->deferrable = false;
+ fk_trigger->initdeferred = false;
+ fk_trigger->funcname = SystemFuncName("RI_FKey_setnull_del");
+ break;
+ case FKCONSTR_ACTION_SETDEFAULT:
+ fk_trigger->deferrable = false;
+ fk_trigger->initdeferred = false;
+ fk_trigger->funcname = SystemFuncName("RI_FKey_setdefault_del");
+ break;
+ default:
+ elog(ERROR, "unrecognized FK action type: %d",
+ (int) fkconstraint->fk_del_action);
+ break;
+ }
+
+ trigAddress = CreateTrigger(fk_trigger, NULL, refRelOid,
+ RelationGetRelid(rel),
+ constraintOid, indexOid, InvalidOid,
+ parentDelTrigger, NULL, true, false);
+ if (deleteTrigOid)
+ *deleteTrigOid = trigAddress.objectId;
+
+ /* Make changes-so-far visible */
+ CommandCounterIncrement();
+
+ /*
+ * Build and execute a CREATE CONSTRAINT TRIGGER statement for the ON
+ * UPDATE action on the referenced table.
+ */
+ fk_trigger = makeNode(CreateTrigStmt);
+ fk_trigger->replace = false;
+ fk_trigger->isconstraint = true;
+ fk_trigger->trigname = "RI_ConstraintTrigger_a";
+ fk_trigger->relation = NULL;
+ fk_trigger->args = NIL;
+ fk_trigger->row = true;
+ fk_trigger->timing = TRIGGER_TYPE_AFTER;
+ fk_trigger->events = TRIGGER_TYPE_UPDATE;
+ fk_trigger->columns = NIL;
+ fk_trigger->whenClause = NULL;
+ fk_trigger->transitionRels = NIL;
+ fk_trigger->constrrel = NULL;
+ switch (fkconstraint->fk_upd_action)
+ {
+ case FKCONSTR_ACTION_NOACTION:
+ fk_trigger->deferrable = fkconstraint->deferrable;
+ fk_trigger->initdeferred = fkconstraint->initdeferred;
+ fk_trigger->funcname = SystemFuncName("RI_FKey_noaction_upd");
+ break;
+ case FKCONSTR_ACTION_RESTRICT:
+ fk_trigger->deferrable = false;
+ fk_trigger->initdeferred = false;
+ fk_trigger->funcname = SystemFuncName("RI_FKey_restrict_upd");
+ break;
+ case FKCONSTR_ACTION_CASCADE:
+ fk_trigger->deferrable = false;
+ fk_trigger->initdeferred = false;
+ fk_trigger->funcname = SystemFuncName("RI_FKey_cascade_upd");
+ break;
+ case FKCONSTR_ACTION_SETNULL:
+ fk_trigger->deferrable = false;
+ fk_trigger->initdeferred = false;
+ fk_trigger->funcname = SystemFuncName("RI_FKey_setnull_upd");
+ break;
+ case FKCONSTR_ACTION_SETDEFAULT:
+ fk_trigger->deferrable = false;
+ fk_trigger->initdeferred = false;
+ fk_trigger->funcname = SystemFuncName("RI_FKey_setdefault_upd");
+ break;
+ default:
+ elog(ERROR, "unrecognized FK action type: %d",
+ (int) fkconstraint->fk_upd_action);
+ break;
+ }
+
+ trigAddress = CreateTrigger(fk_trigger, NULL, refRelOid,
+ RelationGetRelid(rel),
+ constraintOid, indexOid, InvalidOid,
+ parentUpdTrigger, NULL, true, false);
+ if (updateTrigOid)
+ *updateTrigOid = trigAddress.objectId;
+}
+
+/*
+ * createForeignKeyCheckTriggers
+ * Create the referencing-side "check" triggers that implement a foreign
+ * key.
+ *
+ * Returns the OIDs of the so created triggers in *insertTrigOid and
+ * *updateTrigOid.
+ */
+static void
+createForeignKeyCheckTriggers(Oid myRelOid, Oid refRelOid,
+ Constraint *fkconstraint, Oid constraintOid,
+ Oid indexOid,
+ Oid parentInsTrigger, Oid parentUpdTrigger,
+ Oid *insertTrigOid, Oid *updateTrigOid)
+{
+ *insertTrigOid = CreateFKCheckTrigger(myRelOid, refRelOid, fkconstraint,
+ constraintOid, indexOid,
+ parentInsTrigger, true);
+ *updateTrigOid = CreateFKCheckTrigger(myRelOid, refRelOid, fkconstraint,
+ constraintOid, indexOid,
+ parentUpdTrigger, false);
+}
+
+/*
+ * ALTER TABLE DROP CONSTRAINT
+ *
+ * Like DROP COLUMN, we can't use the normal ALTER TABLE recursion mechanism.
+ */
+static void
+ATExecDropConstraint(Relation rel, const char *constrName,
+ DropBehavior behavior,
+ bool recurse, bool recursing,
+ bool missing_ok, LOCKMODE lockmode)
+{
+ List *children;
+ ListCell *child;
+ Relation conrel;
+ Form_pg_constraint con;
+ SysScanDesc scan;
+ ScanKeyData skey[3];
+ HeapTuple tuple;
+ bool found = false;
+ bool is_no_inherit_constraint = false;
+ char contype;
+
+ /* At top level, permission check was done in ATPrepCmd, else do it */
+ if (recursing)
+ ATSimplePermissions(AT_DropConstraint, rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+ conrel = table_open(ConstraintRelationId, RowExclusiveLock);
+
+ /*
+ * Find and drop the target constraint
+ */
+ ScanKeyInit(&skey[0],
+ Anum_pg_constraint_conrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+ ScanKeyInit(&skey[1],
+ Anum_pg_constraint_contypid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(InvalidOid));
+ ScanKeyInit(&skey[2],
+ Anum_pg_constraint_conname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(constrName));
+ scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId,
+ true, NULL, 3, skey);
+
+ /* There can be at most one matching row */
+ if (HeapTupleIsValid(tuple = systable_getnext(scan)))
+ {
+ ObjectAddress conobj;
+
+ con = (Form_pg_constraint) GETSTRUCT(tuple);
+
+ /* Don't drop inherited constraints */
+ if (con->coninhcount > 0 && !recursing)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot drop inherited constraint \"%s\" of relation \"%s\"",
+ constrName, RelationGetRelationName(rel))));
+
+ is_no_inherit_constraint = con->connoinherit;
+ contype = con->contype;
+
+ /*
+ * If it's a foreign-key constraint, we'd better lock the referenced
+ * table and check that that's not in use, just as we've already done
+ * for the constrained table (else we might, eg, be dropping a trigger
+ * that has unfired events). But we can/must skip that in the
+ * self-referential case.
+ */
+ if (contype == CONSTRAINT_FOREIGN &&
+ con->confrelid != RelationGetRelid(rel))
+ {
+ Relation frel;
+
+ /* Must match lock taken by RemoveTriggerById: */
+ frel = table_open(con->confrelid, AccessExclusiveLock);
+ CheckTableNotInUse(frel, "ALTER TABLE");
+ table_close(frel, NoLock);
+ }
+
+ /*
+ * Perform the actual constraint deletion
+ */
+ conobj.classId = ConstraintRelationId;
+ conobj.objectId = con->oid;
+ conobj.objectSubId = 0;
+
+ performDeletion(&conobj, behavior, 0);
+
+ found = true;
+ }
+
+ systable_endscan(scan);
+
+ if (!found)
+ {
+ if (!missing_ok)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("constraint \"%s\" of relation \"%s\" does not exist",
+ constrName, RelationGetRelationName(rel))));
+ }
+ else
+ {
+ ereport(NOTICE,
+ (errmsg("constraint \"%s\" of relation \"%s\" does not exist, skipping",
+ constrName, RelationGetRelationName(rel))));
+ table_close(conrel, RowExclusiveLock);
+ return;
+ }
+ }
+
+ /*
+ * For partitioned tables, non-CHECK inherited constraints are dropped via
+ * the dependency mechanism, so we're done here.
+ */
+ if (contype != CONSTRAINT_CHECK &&
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ table_close(conrel, RowExclusiveLock);
+ return;
+ }
+
+ /*
+ * Propagate to children as appropriate. Unlike most other ALTER
+ * routines, we have to do this one level of recursion at a time; we can't
+ * use find_all_inheritors to do it in one pass.
+ */
+ if (!is_no_inherit_constraint)
+ children = find_inheritance_children(RelationGetRelid(rel), lockmode);
+ else
+ children = NIL;
+
+ /*
+ * For a partitioned table, if partitions exist and we are told not to
+ * recurse, it's a user error. It doesn't make sense to have a constraint
+ * be defined only on the parent, especially if it's a partitioned table.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+ children != NIL && !recurse)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot remove constraint from only the partitioned table when partitions exist"),
+ errhint("Do not specify the ONLY keyword.")));
+
+ foreach(child, children)
+ {
+ Oid childrelid = lfirst_oid(child);
+ Relation childrel;
+ HeapTuple copy_tuple;
+
+ /* find_inheritance_children already got lock */
+ childrel = table_open(childrelid, NoLock);
+ CheckTableNotInUse(childrel, "ALTER TABLE");
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_constraint_conrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(childrelid));
+ ScanKeyInit(&skey[1],
+ Anum_pg_constraint_contypid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(InvalidOid));
+ ScanKeyInit(&skey[2],
+ Anum_pg_constraint_conname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(constrName));
+ scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId,
+ true, NULL, 3, skey);
+
+ /* There can be at most one matching row */
+ if (!HeapTupleIsValid(tuple = systable_getnext(scan)))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("constraint \"%s\" of relation \"%s\" does not exist",
+ constrName,
+ RelationGetRelationName(childrel))));
+
+ copy_tuple = heap_copytuple(tuple);
+
+ systable_endscan(scan);
+
+ con = (Form_pg_constraint) GETSTRUCT(copy_tuple);
+
+ /* Right now only CHECK constraints can be inherited */
+ if (con->contype != CONSTRAINT_CHECK)
+ elog(ERROR, "inherited constraint is not a CHECK constraint");
+
+ if (con->coninhcount <= 0) /* shouldn't happen */
+ elog(ERROR, "relation %u has non-inherited constraint \"%s\"",
+ childrelid, constrName);
+
+ if (recurse)
+ {
+ /*
+ * If the child constraint has other definition sources, just
+ * decrement its inheritance count; if not, recurse to delete it.
+ */
+ if (con->coninhcount == 1 && !con->conislocal)
+ {
+ /* Time to delete this child constraint, too */
+ ATExecDropConstraint(childrel, constrName, behavior,
+ true, true,
+ false, lockmode);
+ }
+ else
+ {
+ /* Child constraint must survive my deletion */
+ con->coninhcount--;
+ CatalogTupleUpdate(conrel, &copy_tuple->t_self, copy_tuple);
+
+ /* Make update visible */
+ CommandCounterIncrement();
+ }
+ }
+ else
+ {
+ /*
+ * If we were told to drop ONLY in this table (no recursion), we
+ * need to mark the inheritors' constraints as locally defined
+ * rather than inherited.
+ */
+ con->coninhcount--;
+ con->conislocal = true;
+
+ CatalogTupleUpdate(conrel, &copy_tuple->t_self, copy_tuple);
+
+ /* Make update visible */
+ CommandCounterIncrement();
+ }
+
+ heap_freetuple(copy_tuple);
+
+ table_close(childrel, NoLock);
+ }
+
+ table_close(conrel, RowExclusiveLock);
+}
+
+/*
+ * ALTER COLUMN TYPE
+ *
+ * Unlike other subcommand types, we do parse transformation for ALTER COLUMN
+ * TYPE during phase 1 --- the AlterTableCmd passed in here is already
+ * transformed (and must be, because we rely on some transformed fields).
+ *
+ * The point of this is that the execution of all ALTER COLUMN TYPEs for a
+ * table will be done "in parallel" during phase 3, so all the USING
+ * expressions should be parsed assuming the original column types. Also,
+ * this allows a USING expression to refer to a field that will be dropped.
+ *
+ * To make this work safely, AT_PASS_DROP then AT_PASS_ALTER_TYPE must be
+ * the first two execution steps in phase 2; they must not see the effects
+ * of any other subcommand types, since the USING expressions are parsed
+ * against the unmodified table's state.
+ */
+static void
+ATPrepAlterColumnType(List **wqueue,
+ AlteredTableInfo *tab, Relation rel,
+ bool recurse, bool recursing,
+ AlterTableCmd *cmd, LOCKMODE lockmode,
+ AlterTableUtilityContext *context)
+{
+ char *colName = cmd->name;
+ ColumnDef *def = (ColumnDef *) cmd->def;
+ TypeName *typeName = def->typeName;
+ Node *transform = def->cooked_default;
+ HeapTuple tuple;
+ Form_pg_attribute attTup;
+ AttrNumber attnum;
+ Oid targettype;
+ int32 targettypmod;
+ Oid targetcollid;
+ NewColumnValue *newval;
+ ParseState *pstate = make_parsestate(NULL);
+ AclResult aclresult;
+ bool is_expr;
+
+ if (rel->rd_rel->reloftype && !recursing)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot alter column type of typed table")));
+
+ /* lookup the attribute so we can check inheritance status */
+ tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(rel))));
+ attTup = (Form_pg_attribute) GETSTRUCT(tuple);
+ attnum = attTup->attnum;
+
+ /* Can't alter a system attribute */
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter system column \"%s\"",
+ colName)));
+
+ /*
+ * Don't alter inherited columns. At outer level, there had better not be
+ * any inherited definition; when recursing, we assume this was checked at
+ * the parent level (see below).
+ */
+ if (attTup->attinhcount > 0 && !recursing)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot alter inherited column \"%s\"",
+ colName)));
+
+ /* Don't alter columns used in the partition key */
+ if (has_partition_attrs(rel,
+ bms_make_singleton(attnum - FirstLowInvalidHeapAttributeNumber),
+ &is_expr))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot alter column \"%s\" because it is part of the partition key of relation \"%s\"",
+ colName, RelationGetRelationName(rel))));
+
+ /* Look up the target type */
+ typenameTypeIdAndMod(NULL, typeName, &targettype, &targettypmod);
+
+ aclresult = pg_type_aclcheck(targettype, GetUserId(), ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error_type(aclresult, targettype);
+
+ /* And the collation */
+ targetcollid = GetColumnDefCollation(NULL, def, targettype);
+
+ /* make sure datatype is legal for a column */
+ CheckAttributeType(colName, targettype, targetcollid,
+ list_make1_oid(rel->rd_rel->reltype),
+ 0);
+
+ if (tab->relkind == RELKIND_RELATION ||
+ tab->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ /*
+ * Set up an expression to transform the old data value to the new
+ * type. If a USING option was given, use the expression as
+ * transformed by transformAlterTableStmt, else just take the old
+ * value and try to coerce it. We do this first so that type
+ * incompatibility can be detected before we waste effort, and because
+ * we need the expression to be parsed against the original table row
+ * type.
+ */
+ if (!transform)
+ {
+ transform = (Node *) makeVar(1, attnum,
+ attTup->atttypid, attTup->atttypmod,
+ attTup->attcollation,
+ 0);
+ }
+
+ transform = coerce_to_target_type(pstate,
+ transform, exprType(transform),
+ targettype, targettypmod,
+ COERCION_ASSIGNMENT,
+ COERCE_IMPLICIT_CAST,
+ -1);
+ if (transform == NULL)
+ {
+ /* error text depends on whether USING was specified or not */
+ if (def->cooked_default != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("result of USING clause for column \"%s\""
+ " cannot be cast automatically to type %s",
+ colName, format_type_be(targettype)),
+ errhint("You might need to add an explicit cast.")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("column \"%s\" cannot be cast automatically to type %s",
+ colName, format_type_be(targettype)),
+ /* translator: USING is SQL, don't translate it */
+ errhint("You might need to specify \"USING %s::%s\".",
+ quote_identifier(colName),
+ format_type_with_typemod(targettype,
+ targettypmod))));
+ }
+
+ /* Fix collations after all else */
+ assign_expr_collations(pstate, transform);
+
+ /* Plan the expr now so we can accurately assess the need to rewrite. */
+ transform = (Node *) expression_planner((Expr *) transform);
+
+ /*
+ * Add a work queue item to make ATRewriteTable update the column
+ * contents.
+ */
+ newval = (NewColumnValue *) palloc0(sizeof(NewColumnValue));
+ newval->attnum = attnum;
+ newval->expr = (Expr *) transform;
+ newval->is_generated = false;
+
+ tab->newvals = lappend(tab->newvals, newval);
+ if (ATColumnChangeRequiresRewrite(transform, attnum))
+ tab->rewrite |= AT_REWRITE_COLUMN_REWRITE;
+ }
+ else if (transform)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a table",
+ RelationGetRelationName(rel))));
+
+ if (!RELKIND_HAS_STORAGE(tab->relkind))
+ {
+ /*
+ * For relations without storage, do this check now. Regular tables
+ * will check it later when the table is being rewritten.
+ */
+ find_composite_type_dependencies(rel->rd_rel->reltype, rel, NULL);
+ }
+
+ ReleaseSysCache(tuple);
+
+ /*
+ * Recurse manually by queueing a new command for each child, if
+ * necessary. We cannot apply ATSimpleRecursion here because we need to
+ * remap attribute numbers in the USING expression, if any.
+ *
+ * If we are told not to recurse, there had better not be any child
+ * tables; else the alter would put them out of step.
+ */
+ if (recurse)
+ {
+ Oid relid = RelationGetRelid(rel);
+ List *child_oids,
+ *child_numparents;
+ ListCell *lo,
+ *li;
+
+ child_oids = find_all_inheritors(relid, lockmode,
+ &child_numparents);
+
+ /*
+ * find_all_inheritors does the recursive search of the inheritance
+ * hierarchy, so all we have to do is process all of the relids in the
+ * list that it returns.
+ */
+ forboth(lo, child_oids, li, child_numparents)
+ {
+ Oid childrelid = lfirst_oid(lo);
+ int numparents = lfirst_int(li);
+ Relation childrel;
+ HeapTuple childtuple;
+ Form_pg_attribute childattTup;
+
+ if (childrelid == relid)
+ continue;
+
+ /* find_all_inheritors already got lock */
+ childrel = relation_open(childrelid, NoLock);
+ CheckTableNotInUse(childrel, "ALTER TABLE");
+
+ /*
+ * Verify that the child doesn't have any inherited definitions of
+ * this column that came from outside this inheritance hierarchy.
+ * (renameatt makes a similar test, though in a different way
+ * because of its different recursion mechanism.)
+ */
+ childtuple = SearchSysCacheAttName(RelationGetRelid(childrel),
+ colName);
+ if (!HeapTupleIsValid(childtuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(childrel))));
+ childattTup = (Form_pg_attribute) GETSTRUCT(childtuple);
+
+ if (childattTup->attinhcount > numparents)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot alter inherited column \"%s\" of relation \"%s\"",
+ colName, RelationGetRelationName(childrel))));
+
+ ReleaseSysCache(childtuple);
+
+ /*
+ * Remap the attribute numbers. If no USING expression was
+ * specified, there is no need for this step.
+ */
+ if (def->cooked_default)
+ {
+ AttrMap *attmap;
+ bool found_whole_row;
+
+ /* create a copy to scribble on */
+ cmd = copyObject(cmd);
+
+ attmap = build_attrmap_by_name(RelationGetDescr(childrel),
+ RelationGetDescr(rel));
+ ((ColumnDef *) cmd->def)->cooked_default =
+ map_variable_attnos(def->cooked_default,
+ 1, 0,
+ attmap,
+ InvalidOid, &found_whole_row);
+ if (found_whole_row)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot convert whole-row table reference"),
+ errdetail("USING expression contains a whole-row table reference.")));
+ pfree(attmap);
+ }
+ ATPrepCmd(wqueue, childrel, cmd, false, true, lockmode, context);
+ relation_close(childrel, NoLock);
+ }
+ }
+ else if (!recursing &&
+ find_inheritance_children(RelationGetRelid(rel), NoLock) != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("type of inherited column \"%s\" must be changed in child tables too",
+ colName)));
+
+ if (tab->relkind == RELKIND_COMPOSITE_TYPE)
+ ATTypedTableRecursion(wqueue, rel, cmd, lockmode, context);
+}
+
+/*
+ * When the data type of a column is changed, a rewrite might not be required
+ * if the new type is sufficiently identical to the old one, and the USING
+ * clause isn't trying to insert some other value. It's safe to skip the
+ * rewrite in these cases:
+ *
+ * - the old type is binary coercible to the new type
+ * - the new type is an unconstrained domain over the old type
+ * - {NEW,OLD} or {OLD,NEW} is {timestamptz,timestamp} and the timezone is UTC
+ *
+ * In the case of a constrained domain, we could get by with scanning the
+ * table and checking the constraint rather than actually rewriting it, but we
+ * don't currently try to do that.
+ */
+static bool
+ATColumnChangeRequiresRewrite(Node *expr, AttrNumber varattno)
+{
+ Assert(expr != NULL);
+
+ for (;;)
+ {
+ /* only one varno, so no need to check that */
+ if (IsA(expr, Var) && ((Var *) expr)->varattno == varattno)
+ return false;
+ else if (IsA(expr, RelabelType))
+ expr = (Node *) ((RelabelType *) expr)->arg;
+ else if (IsA(expr, CoerceToDomain))
+ {
+ CoerceToDomain *d = (CoerceToDomain *) expr;
+
+ if (DomainHasConstraints(d->resulttype))
+ return true;
+ expr = (Node *) d->arg;
+ }
+ else if (IsA(expr, FuncExpr))
+ {
+ FuncExpr *f = (FuncExpr *) expr;
+
+ switch (f->funcid)
+ {
+ case F_TIMESTAMPTZ_TIMESTAMP:
+ case F_TIMESTAMP_TIMESTAMPTZ:
+ if (TimestampTimestampTzRequiresRewrite())
+ return true;
+ else
+ expr = linitial(f->args);
+ break;
+ default:
+ return true;
+ }
+ }
+ else
+ return true;
+ }
+}
+
+/*
+ * ALTER COLUMN .. SET DATA TYPE
+ *
+ * Return the address of the modified column.
+ */
+static ObjectAddress
+ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel,
+ AlterTableCmd *cmd, LOCKMODE lockmode)
+{
+ char *colName = cmd->name;
+ ColumnDef *def = (ColumnDef *) cmd->def;
+ TypeName *typeName = def->typeName;
+ HeapTuple heapTup;
+ Form_pg_attribute attTup,
+ attOldTup;
+ AttrNumber attnum;
+ HeapTuple typeTuple;
+ Form_pg_type tform;
+ Oid targettype;
+ int32 targettypmod;
+ Oid targetcollid;
+ Node *defaultexpr;
+ Relation attrelation;
+ Relation depRel;
+ ScanKeyData key[3];
+ SysScanDesc scan;
+ HeapTuple depTup;
+ ObjectAddress address;
+
+ /*
+ * Clear all the missing values if we're rewriting the table, since this
+ * renders them pointless.
+ */
+ if (tab->rewrite)
+ {
+ Relation newrel;
+
+ newrel = table_open(RelationGetRelid(rel), NoLock);
+ RelationClearMissing(newrel);
+ relation_close(newrel, NoLock);
+ /* make sure we don't conflict with later attribute modifications */
+ CommandCounterIncrement();
+ }
+
+ attrelation = table_open(AttributeRelationId, RowExclusiveLock);
+
+ /* Look up the target column */
+ heapTup = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName);
+ if (!HeapTupleIsValid(heapTup)) /* shouldn't happen */
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(rel))));
+ attTup = (Form_pg_attribute) GETSTRUCT(heapTup);
+ attnum = attTup->attnum;
+ attOldTup = TupleDescAttr(tab->oldDesc, attnum - 1);
+
+ /* Check for multiple ALTER TYPE on same column --- can't cope */
+ if (attTup->atttypid != attOldTup->atttypid ||
+ attTup->atttypmod != attOldTup->atttypmod)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter type of column \"%s\" twice",
+ colName)));
+
+ /* Look up the target type (should not fail, since prep found it) */
+ typeTuple = typenameType(NULL, typeName, &targettypmod);
+ tform = (Form_pg_type) GETSTRUCT(typeTuple);
+ targettype = tform->oid;
+ /* And the collation */
+ targetcollid = GetColumnDefCollation(NULL, def, targettype);
+
+ /*
+ * If there is a default expression for the column, get it and ensure we
+ * can coerce it to the new datatype. (We must do this before changing
+ * the column type, because build_column_default itself will try to
+ * coerce, and will not issue the error message we want if it fails.)
+ *
+ * We remove any implicit coercion steps at the top level of the old
+ * default expression; this has been agreed to satisfy the principle of
+ * least surprise. (The conversion to the new column type should act like
+ * it started from what the user sees as the stored expression, and the
+ * implicit coercions aren't going to be shown.)
+ */
+ if (attTup->atthasdef)
+ {
+ defaultexpr = build_column_default(rel, attnum);
+ Assert(defaultexpr);
+ defaultexpr = strip_implicit_coercions(defaultexpr);
+ defaultexpr = coerce_to_target_type(NULL, /* no UNKNOWN params */
+ defaultexpr, exprType(defaultexpr),
+ targettype, targettypmod,
+ COERCION_ASSIGNMENT,
+ COERCE_IMPLICIT_CAST,
+ -1);
+ if (defaultexpr == NULL)
+ {
+ if (attTup->attgenerated)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("generation expression for column \"%s\" cannot be cast automatically to type %s",
+ colName, format_type_be(targettype))));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("default for column \"%s\" cannot be cast automatically to type %s",
+ colName, format_type_be(targettype))));
+ }
+ }
+ else
+ defaultexpr = NULL;
+
+ /*
+ * Find everything that depends on the column (constraints, indexes, etc),
+ * and record enough information to let us recreate the objects.
+ *
+ * The actual recreation does not happen here, but only after we have
+ * performed all the individual ALTER TYPE operations. We have to save
+ * the info before executing ALTER TYPE, though, else the deparser will
+ * get confused.
+ */
+ depRel = table_open(DependRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_depend_refclassid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationRelationId));
+ ScanKeyInit(&key[1],
+ Anum_pg_depend_refobjid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+ ScanKeyInit(&key[2],
+ Anum_pg_depend_refobjsubid,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum((int32) attnum));
+
+ scan = systable_beginscan(depRel, DependReferenceIndexId, true,
+ NULL, 3, key);
+
+ while (HeapTupleIsValid(depTup = systable_getnext(scan)))
+ {
+ Form_pg_depend foundDep = (Form_pg_depend) GETSTRUCT(depTup);
+ ObjectAddress foundObject;
+
+ foundObject.classId = foundDep->classid;
+ foundObject.objectId = foundDep->objid;
+ foundObject.objectSubId = foundDep->objsubid;
+
+ switch (getObjectClass(&foundObject))
+ {
+ case OCLASS_CLASS:
+ {
+ char relKind = get_rel_relkind(foundObject.objectId);
+
+ if (relKind == RELKIND_INDEX ||
+ relKind == RELKIND_PARTITIONED_INDEX)
+ {
+ Assert(foundObject.objectSubId == 0);
+ RememberIndexForRebuilding(foundObject.objectId, tab);
+ }
+ else if (relKind == RELKIND_SEQUENCE)
+ {
+ /*
+ * This must be a SERIAL column's sequence. We need
+ * not do anything to it.
+ */
+ Assert(foundObject.objectSubId == 0);
+ }
+ else
+ {
+ /* Not expecting any other direct dependencies... */
+ elog(ERROR, "unexpected object depending on column: %s",
+ getObjectDescription(&foundObject, false));
+ }
+ break;
+ }
+
+ case OCLASS_CONSTRAINT:
+ Assert(foundObject.objectSubId == 0);
+ RememberConstraintForRebuilding(foundObject.objectId, tab);
+ break;
+
+ case OCLASS_REWRITE:
+ /* XXX someday see if we can cope with revising views */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter type of a column used by a view or rule"),
+ errdetail("%s depends on column \"%s\"",
+ getObjectDescription(&foundObject, false),
+ colName)));
+ break;
+
+ case OCLASS_TRIGGER:
+
+ /*
+ * A trigger can depend on a column because the column is
+ * specified as an update target, or because the column is
+ * used in the trigger's WHEN condition. The first case would
+ * not require any extra work, but the second case would
+ * require updating the WHEN expression, which will take a
+ * significant amount of new code. Since we can't easily tell
+ * which case applies, we punt for both. FIXME someday.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter type of a column used in a trigger definition"),
+ errdetail("%s depends on column \"%s\"",
+ getObjectDescription(&foundObject, false),
+ colName)));
+ break;
+
+ case OCLASS_POLICY:
+
+ /*
+ * A policy can depend on a column because the column is
+ * specified in the policy's USING or WITH CHECK qual
+ * expressions. It might be possible to rewrite and recheck
+ * the policy expression, but punt for now. It's certainly
+ * easy enough to remove and recreate the policy; still, FIXME
+ * someday.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter type of a column used in a policy definition"),
+ errdetail("%s depends on column \"%s\"",
+ getObjectDescription(&foundObject, false),
+ colName)));
+ break;
+
+ case OCLASS_DEFAULT:
+ {
+ ObjectAddress col = GetAttrDefaultColumnAddress(foundObject.objectId);
+
+ if (col.objectId == RelationGetRelid(rel) &&
+ col.objectSubId == attnum)
+ {
+ /*
+ * Ignore the column's own default expression, which
+ * we will deal with below.
+ */
+ Assert(defaultexpr);
+ }
+ else
+ {
+ /*
+ * This must be a reference from the expression of a
+ * generated column elsewhere in the same table.
+ * Changing the type of a column that is used by a
+ * generated column is not allowed by SQL standard, so
+ * just punt for now. It might be doable with some
+ * thinking and effort.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter type of a column used by a generated column"),
+ errdetail("Column \"%s\" is used by generated column \"%s\".",
+ colName,
+ get_attname(col.objectId,
+ col.objectSubId,
+ false))));
+ }
+ break;
+ }
+
+ case OCLASS_STATISTIC_EXT:
+
+ /*
+ * Give the extended-stats machinery a chance to fix anything
+ * that this column type change would break.
+ */
+ RememberStatisticsForRebuilding(foundObject.objectId, tab);
+ break;
+
+ case OCLASS_PROC:
+ case OCLASS_TYPE:
+ case OCLASS_CAST:
+ case OCLASS_COLLATION:
+ case OCLASS_CONVERSION:
+ case OCLASS_LANGUAGE:
+ case OCLASS_LARGEOBJECT:
+ case OCLASS_OPERATOR:
+ case OCLASS_OPCLASS:
+ case OCLASS_OPFAMILY:
+ case OCLASS_AM:
+ case OCLASS_AMOP:
+ case OCLASS_AMPROC:
+ case OCLASS_SCHEMA:
+ case OCLASS_TSPARSER:
+ case OCLASS_TSDICT:
+ case OCLASS_TSTEMPLATE:
+ case OCLASS_TSCONFIG:
+ case OCLASS_ROLE:
+ case OCLASS_DATABASE:
+ case OCLASS_TBLSPACE:
+ case OCLASS_FDW:
+ case OCLASS_FOREIGN_SERVER:
+ case OCLASS_USER_MAPPING:
+ case OCLASS_DEFACL:
+ case OCLASS_EXTENSION:
+ case OCLASS_EVENT_TRIGGER:
+ case OCLASS_PARAMETER_ACL:
+ case OCLASS_PUBLICATION:
+ case OCLASS_PUBLICATION_NAMESPACE:
+ case OCLASS_PUBLICATION_REL:
+ case OCLASS_SUBSCRIPTION:
+ case OCLASS_TRANSFORM:
+
+ /*
+ * We don't expect any of these sorts of objects to depend on
+ * a column.
+ */
+ elog(ERROR, "unexpected object depending on column: %s",
+ getObjectDescription(&foundObject, false));
+ break;
+
+ /*
+ * There's intentionally no default: case here; we want the
+ * compiler to warn if a new OCLASS hasn't been handled above.
+ */
+ }
+ }
+
+ systable_endscan(scan);
+
+ /*
+ * Now scan for dependencies of this column on other things. The only
+ * things we should find are the dependency on the column datatype and
+ * possibly a collation dependency. Those can be removed.
+ */
+ ScanKeyInit(&key[0],
+ Anum_pg_depend_classid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationRelationId));
+ ScanKeyInit(&key[1],
+ Anum_pg_depend_objid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+ ScanKeyInit(&key[2],
+ Anum_pg_depend_objsubid,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum((int32) attnum));
+
+ scan = systable_beginscan(depRel, DependDependerIndexId, true,
+ NULL, 3, key);
+
+ while (HeapTupleIsValid(depTup = systable_getnext(scan)))
+ {
+ Form_pg_depend foundDep = (Form_pg_depend) GETSTRUCT(depTup);
+ ObjectAddress foundObject;
+
+ foundObject.classId = foundDep->refclassid;
+ foundObject.objectId = foundDep->refobjid;
+ foundObject.objectSubId = foundDep->refobjsubid;
+
+ if (foundDep->deptype != DEPENDENCY_NORMAL)
+ elog(ERROR, "found unexpected dependency type '%c'",
+ foundDep->deptype);
+ if (!(foundDep->refclassid == TypeRelationId &&
+ foundDep->refobjid == attTup->atttypid) &&
+ !(foundDep->refclassid == CollationRelationId &&
+ foundDep->refobjid == attTup->attcollation))
+ elog(ERROR, "found unexpected dependency for column: %s",
+ getObjectDescription(&foundObject, false));
+
+ CatalogTupleDelete(depRel, &depTup->t_self);
+ }
+
+ systable_endscan(scan);
+
+ table_close(depRel, RowExclusiveLock);
+
+ /*
+ * Here we go --- change the recorded column type and collation. (Note
+ * heapTup is a copy of the syscache entry, so okay to scribble on.) First
+ * fix up the missing value if any.
+ */
+ if (attTup->atthasmissing)
+ {
+ Datum missingval;
+ bool missingNull;
+
+ /* if rewrite is true the missing value should already be cleared */
+ Assert(tab->rewrite == 0);
+
+ /* Get the missing value datum */
+ missingval = heap_getattr(heapTup,
+ Anum_pg_attribute_attmissingval,
+ attrelation->rd_att,
+ &missingNull);
+
+ /* if it's a null array there is nothing to do */
+
+ if (!missingNull)
+ {
+ /*
+ * Get the datum out of the array and repack it in a new array
+ * built with the new type data. We assume that since the table
+ * doesn't need rewriting, the actual Datum doesn't need to be
+ * changed, only the array metadata.
+ */
+
+ int one = 1;
+ bool isNull;
+ Datum valuesAtt[Natts_pg_attribute];
+ bool nullsAtt[Natts_pg_attribute];
+ bool replacesAtt[Natts_pg_attribute];
+ HeapTuple newTup;
+
+ MemSet(valuesAtt, 0, sizeof(valuesAtt));
+ MemSet(nullsAtt, false, sizeof(nullsAtt));
+ MemSet(replacesAtt, false, sizeof(replacesAtt));
+
+ missingval = array_get_element(missingval,
+ 1,
+ &one,
+ 0,
+ attTup->attlen,
+ attTup->attbyval,
+ attTup->attalign,
+ &isNull);
+ missingval = PointerGetDatum(construct_array(&missingval,
+ 1,
+ targettype,
+ tform->typlen,
+ tform->typbyval,
+ tform->typalign));
+
+ valuesAtt[Anum_pg_attribute_attmissingval - 1] = missingval;
+ replacesAtt[Anum_pg_attribute_attmissingval - 1] = true;
+ nullsAtt[Anum_pg_attribute_attmissingval - 1] = false;
+
+ newTup = heap_modify_tuple(heapTup, RelationGetDescr(attrelation),
+ valuesAtt, nullsAtt, replacesAtt);
+ heap_freetuple(heapTup);
+ heapTup = newTup;
+ attTup = (Form_pg_attribute) GETSTRUCT(heapTup);
+ }
+ }
+
+ attTup->atttypid = targettype;
+ attTup->atttypmod = targettypmod;
+ attTup->attcollation = targetcollid;
+ attTup->attndims = list_length(typeName->arrayBounds);
+ attTup->attlen = tform->typlen;
+ attTup->attbyval = tform->typbyval;
+ attTup->attalign = tform->typalign;
+ attTup->attstorage = tform->typstorage;
+ attTup->attcompression = InvalidCompressionMethod;
+
+ ReleaseSysCache(typeTuple);
+
+ CatalogTupleUpdate(attrelation, &heapTup->t_self, heapTup);
+
+ table_close(attrelation, RowExclusiveLock);
+
+ /* Install dependencies on new datatype and collation */
+ add_column_datatype_dependency(RelationGetRelid(rel), attnum, targettype);
+ add_column_collation_dependency(RelationGetRelid(rel), attnum, targetcollid);
+
+ /*
+ * Drop any pg_statistic entry for the column, since it's now wrong type
+ */
+ RemoveStatistics(RelationGetRelid(rel), attnum);
+
+ InvokeObjectPostAlterHook(RelationRelationId,
+ RelationGetRelid(rel), attnum);
+
+ /*
+ * Update the default, if present, by brute force --- remove and re-add
+ * the default. Probably unsafe to take shortcuts, since the new version
+ * may well have additional dependencies. (It's okay to do this now,
+ * rather than after other ALTER TYPE commands, since the default won't
+ * depend on other column types.)
+ */
+ if (defaultexpr)
+ {
+ /*
+ * If it's a GENERATED default, drop its dependency records, in
+ * particular its INTERNAL dependency on the column, which would
+ * otherwise cause dependency.c to refuse to perform the deletion.
+ */
+ if (attTup->attgenerated)
+ {
+ Oid attrdefoid = GetAttrDefaultOid(RelationGetRelid(rel), attnum);
+
+ if (!OidIsValid(attrdefoid))
+ elog(ERROR, "could not find attrdef tuple for relation %u attnum %d",
+ RelationGetRelid(rel), attnum);
+ (void) deleteDependencyRecordsFor(AttrDefaultRelationId, attrdefoid, false);
+ }
+
+ /*
+ * Make updates-so-far visible, particularly the new pg_attribute row
+ * which will be updated again.
+ */
+ CommandCounterIncrement();
+
+ /*
+ * We use RESTRICT here for safety, but at present we do not expect
+ * anything to depend on the default.
+ */
+ RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT, true,
+ true);
+
+ StoreAttrDefault(rel, attnum, defaultexpr, true, false);
+ }
+
+ ObjectAddressSubSet(address, RelationRelationId,
+ RelationGetRelid(rel), attnum);
+
+ /* Cleanup */
+ heap_freetuple(heapTup);
+
+ return address;
+}
+
+/*
+ * Subroutine for ATExecAlterColumnType: remember that a replica identity
+ * needs to be reset.
+ */
+static void
+RememberReplicaIdentityForRebuilding(Oid indoid, AlteredTableInfo *tab)
+{
+ if (!get_index_isreplident(indoid))
+ return;
+
+ if (tab->replicaIdentityIndex)
+ elog(ERROR, "relation %u has multiple indexes marked as replica identity", tab->relid);
+
+ tab->replicaIdentityIndex = get_rel_name(indoid);
+}
+
+/*
+ * Subroutine for ATExecAlterColumnType: remember any clustered index.
+ */
+static void
+RememberClusterOnForRebuilding(Oid indoid, AlteredTableInfo *tab)
+{
+ if (!get_index_isclustered(indoid))
+ return;
+
+ if (tab->clusterOnIndex)
+ elog(ERROR, "relation %u has multiple clustered indexes", tab->relid);
+
+ tab->clusterOnIndex = get_rel_name(indoid);
+}
+
+/*
+ * Subroutine for ATExecAlterColumnType: remember that a constraint needs
+ * to be rebuilt (which we might already know).
+ */
+static void
+RememberConstraintForRebuilding(Oid conoid, AlteredTableInfo *tab)
+{
+ /*
+ * This de-duplication check is critical for two independent reasons: we
+ * mustn't try to recreate the same constraint twice, and if a constraint
+ * depends on more than one column whose type is to be altered, we must
+ * capture its definition string before applying any of the column type
+ * changes. ruleutils.c will get confused if we ask again later.
+ */
+ if (!list_member_oid(tab->changedConstraintOids, conoid))
+ {
+ /* OK, capture the constraint's existing definition string */
+ char *defstring = pg_get_constraintdef_command(conoid);
+ Oid indoid;
+
+ tab->changedConstraintOids = lappend_oid(tab->changedConstraintOids,
+ conoid);
+ tab->changedConstraintDefs = lappend(tab->changedConstraintDefs,
+ defstring);
+
+ /*
+ * For the index of a constraint, if any, remember if it is used for
+ * the table's replica identity or if it is a clustered index, so that
+ * ATPostAlterTypeCleanup() can queue up commands necessary to restore
+ * those properties.
+ */
+ indoid = get_constraint_index(conoid);
+ if (OidIsValid(indoid))
+ {
+ RememberReplicaIdentityForRebuilding(indoid, tab);
+ RememberClusterOnForRebuilding(indoid, tab);
+ }
+ }
+}
+
+/*
+ * Subroutine for ATExecAlterColumnType: remember that an index needs
+ * to be rebuilt (which we might already know).
+ */
+static void
+RememberIndexForRebuilding(Oid indoid, AlteredTableInfo *tab)
+{
+ /*
+ * This de-duplication check is critical for two independent reasons: we
+ * mustn't try to recreate the same index twice, and if an index depends
+ * on more than one column whose type is to be altered, we must capture
+ * its definition string before applying any of the column type changes.
+ * ruleutils.c will get confused if we ask again later.
+ */
+ if (!list_member_oid(tab->changedIndexOids, indoid))
+ {
+ /*
+ * Before adding it as an index-to-rebuild, we'd better see if it
+ * belongs to a constraint, and if so rebuild the constraint instead.
+ * Typically this check fails, because constraint indexes normally
+ * have only dependencies on their constraint. But it's possible for
+ * such an index to also have direct dependencies on table columns,
+ * for example with a partial exclusion constraint.
+ */
+ Oid conoid = get_index_constraint(indoid);
+
+ if (OidIsValid(conoid))
+ {
+ RememberConstraintForRebuilding(conoid, tab);
+ }
+ else
+ {
+ /* OK, capture the index's existing definition string */
+ char *defstring = pg_get_indexdef_string(indoid);
+
+ tab->changedIndexOids = lappend_oid(tab->changedIndexOids,
+ indoid);
+ tab->changedIndexDefs = lappend(tab->changedIndexDefs,
+ defstring);
+
+ /*
+ * Remember if this index is used for the table's replica identity
+ * or if it is a clustered index, so that ATPostAlterTypeCleanup()
+ * can queue up commands necessary to restore those properties.
+ */
+ RememberReplicaIdentityForRebuilding(indoid, tab);
+ RememberClusterOnForRebuilding(indoid, tab);
+ }
+ }
+}
+
+/*
+ * Subroutine for ATExecAlterColumnType: remember that a statistics object
+ * needs to be rebuilt (which we might already know).
+ */
+static void
+RememberStatisticsForRebuilding(Oid stxoid, AlteredTableInfo *tab)
+{
+ /*
+ * This de-duplication check is critical for two independent reasons: we
+ * mustn't try to recreate the same statistics object twice, and if the
+ * statistics object depends on more than one column whose type is to be
+ * altered, we must capture its definition string before applying any of
+ * the type changes. ruleutils.c will get confused if we ask again later.
+ */
+ if (!list_member_oid(tab->changedStatisticsOids, stxoid))
+ {
+ /* OK, capture the statistics object's existing definition string */
+ char *defstring = pg_get_statisticsobjdef_string(stxoid);
+
+ tab->changedStatisticsOids = lappend_oid(tab->changedStatisticsOids,
+ stxoid);
+ tab->changedStatisticsDefs = lappend(tab->changedStatisticsDefs,
+ defstring);
+ }
+}
+
+/*
+ * Cleanup after we've finished all the ALTER TYPE operations for a
+ * particular relation. We have to drop and recreate all the indexes
+ * and constraints that depend on the altered columns. We do the
+ * actual dropping here, but re-creation is managed by adding work
+ * queue entries to do those steps later.
+ */
+static void
+ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
+{
+ ObjectAddress obj;
+ ObjectAddresses *objects;
+ ListCell *def_item;
+ ListCell *oid_item;
+
+ /*
+ * Collect all the constraints and indexes to drop so we can process them
+ * in a single call. That way we don't have to worry about dependencies
+ * among them.
+ */
+ objects = new_object_addresses();
+
+ /*
+ * Re-parse the index and constraint definitions, and attach them to the
+ * appropriate work queue entries. We do this before dropping because in
+ * the case of a FOREIGN KEY constraint, we might not yet have exclusive
+ * lock on the table the constraint is attached to, and we need to get
+ * that before reparsing/dropping.
+ *
+ * We can't rely on the output of deparsing to tell us which relation to
+ * operate on, because concurrent activity might have made the name
+ * resolve differently. Instead, we've got to use the OID of the
+ * constraint or index we're processing to figure out which relation to
+ * operate on.
+ */
+ forboth(oid_item, tab->changedConstraintOids,
+ def_item, tab->changedConstraintDefs)
+ {
+ Oid oldId = lfirst_oid(oid_item);
+ HeapTuple tup;
+ Form_pg_constraint con;
+ Oid relid;
+ Oid confrelid;
+ char contype;
+ bool conislocal;
+
+ tup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(oldId));
+ if (!HeapTupleIsValid(tup)) /* should not happen */
+ elog(ERROR, "cache lookup failed for constraint %u", oldId);
+ con = (Form_pg_constraint) GETSTRUCT(tup);
+ if (OidIsValid(con->conrelid))
+ relid = con->conrelid;
+ else
+ {
+ /* must be a domain constraint */
+ relid = get_typ_typrelid(getBaseType(con->contypid));
+ if (!OidIsValid(relid))
+ elog(ERROR, "could not identify relation associated with constraint %u", oldId);
+ }
+ confrelid = con->confrelid;
+ contype = con->contype;
+ conislocal = con->conislocal;
+ ReleaseSysCache(tup);
+
+ ObjectAddressSet(obj, ConstraintRelationId, oldId);
+ add_exact_object_address(&obj, objects);
+
+ /*
+ * If the constraint is inherited (only), we don't want to inject a
+ * new definition here; it'll get recreated when ATAddCheckConstraint
+ * recurses from adding the parent table's constraint. But we had to
+ * carry the info this far so that we can drop the constraint below.
+ */
+ if (!conislocal)
+ continue;
+
+ /*
+ * When rebuilding an FK constraint that references the table we're
+ * modifying, we might not yet have any lock on the FK's table, so get
+ * one now. We'll need AccessExclusiveLock for the DROP CONSTRAINT
+ * step, so there's no value in asking for anything weaker.
+ */
+ if (relid != tab->relid && contype == CONSTRAINT_FOREIGN)
+ LockRelationOid(relid, AccessExclusiveLock);
+
+ ATPostAlterTypeParse(oldId, relid, confrelid,
+ (char *) lfirst(def_item),
+ wqueue, lockmode, tab->rewrite);
+ }
+ forboth(oid_item, tab->changedIndexOids,
+ def_item, tab->changedIndexDefs)
+ {
+ Oid oldId = lfirst_oid(oid_item);
+ Oid relid;
+
+ relid = IndexGetRelation(oldId, false);
+ ATPostAlterTypeParse(oldId, relid, InvalidOid,
+ (char *) lfirst(def_item),
+ wqueue, lockmode, tab->rewrite);
+
+ ObjectAddressSet(obj, RelationRelationId, oldId);
+ add_exact_object_address(&obj, objects);
+ }
+
+ /* add dependencies for new statistics */
+ forboth(oid_item, tab->changedStatisticsOids,
+ def_item, tab->changedStatisticsDefs)
+ {
+ Oid oldId = lfirst_oid(oid_item);
+ Oid relid;
+
+ relid = StatisticsGetRelation(oldId, false);
+ ATPostAlterTypeParse(oldId, relid, InvalidOid,
+ (char *) lfirst(def_item),
+ wqueue, lockmode, tab->rewrite);
+
+ ObjectAddressSet(obj, StatisticExtRelationId, oldId);
+ add_exact_object_address(&obj, objects);
+ }
+
+ /*
+ * Queue up command to restore replica identity index marking
+ */
+ if (tab->replicaIdentityIndex)
+ {
+ AlterTableCmd *cmd = makeNode(AlterTableCmd);
+ ReplicaIdentityStmt *subcmd = makeNode(ReplicaIdentityStmt);
+
+ subcmd->identity_type = REPLICA_IDENTITY_INDEX;
+ subcmd->name = tab->replicaIdentityIndex;
+ cmd->subtype = AT_ReplicaIdentity;
+ cmd->def = (Node *) subcmd;
+
+ /* do it after indexes and constraints */
+ tab->subcmds[AT_PASS_OLD_CONSTR] =
+ lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd);
+ }
+
+ /*
+ * Queue up command to restore marking of index used for cluster.
+ */
+ if (tab->clusterOnIndex)
+ {
+ AlterTableCmd *cmd = makeNode(AlterTableCmd);
+
+ cmd->subtype = AT_ClusterOn;
+ cmd->name = tab->clusterOnIndex;
+
+ /* do it after indexes and constraints */
+ tab->subcmds[AT_PASS_OLD_CONSTR] =
+ lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd);
+ }
+
+ /*
+ * It should be okay to use DROP_RESTRICT here, since nothing else should
+ * be depending on these objects.
+ */
+ performMultipleDeletions(objects, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
+
+ free_object_addresses(objects);
+
+ /*
+ * The objects will get recreated during subsequent passes over the work
+ * queue.
+ */
+}
+
+/*
+ * Parse the previously-saved definition string for a constraint, index or
+ * statistics object against the newly-established column data type(s), and
+ * queue up the resulting command parsetrees for execution.
+ *
+ * This might fail if, for example, you have a WHERE clause that uses an
+ * operator that's not available for the new column type.
+ */
+static void
+ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd,
+ List **wqueue, LOCKMODE lockmode, bool rewrite)
+{
+ List *raw_parsetree_list;
+ List *querytree_list;
+ ListCell *list_item;
+ Relation rel;
+
+ /*
+ * We expect that we will get only ALTER TABLE and CREATE INDEX
+ * statements. Hence, there is no need to pass them through
+ * parse_analyze_*() or the rewriter, but instead we need to pass them
+ * through parse_utilcmd.c to make them ready for execution.
+ */
+ raw_parsetree_list = raw_parser(cmd, RAW_PARSE_DEFAULT);
+ querytree_list = NIL;
+ foreach(list_item, raw_parsetree_list)
+ {
+ RawStmt *rs = lfirst_node(RawStmt, list_item);
+ Node *stmt = rs->stmt;
+
+ if (IsA(stmt, IndexStmt))
+ querytree_list = lappend(querytree_list,
+ transformIndexStmt(oldRelId,
+ (IndexStmt *) stmt,
+ cmd));
+ else if (IsA(stmt, AlterTableStmt))
+ {
+ List *beforeStmts;
+ List *afterStmts;
+
+ stmt = (Node *) transformAlterTableStmt(oldRelId,
+ (AlterTableStmt *) stmt,
+ cmd,
+ &beforeStmts,
+ &afterStmts);
+ querytree_list = list_concat(querytree_list, beforeStmts);
+ querytree_list = lappend(querytree_list, stmt);
+ querytree_list = list_concat(querytree_list, afterStmts);
+ }
+ else if (IsA(stmt, CreateStatsStmt))
+ querytree_list = lappend(querytree_list,
+ transformStatsStmt(oldRelId,
+ (CreateStatsStmt *) stmt,
+ cmd));
+ else
+ querytree_list = lappend(querytree_list, stmt);
+ }
+
+ /* Caller should already have acquired whatever lock we need. */
+ rel = relation_open(oldRelId, NoLock);
+
+ /*
+ * Attach each generated command to the proper place in the work queue.
+ * Note this could result in creation of entirely new work-queue entries.
+ *
+ * Also note that we have to tweak the command subtypes, because it turns
+ * out that re-creation of indexes and constraints has to act a bit
+ * differently from initial creation.
+ */
+ foreach(list_item, querytree_list)
+ {
+ Node *stm = (Node *) lfirst(list_item);
+ AlteredTableInfo *tab;
+
+ tab = ATGetQueueEntry(wqueue, rel);
+
+ if (IsA(stm, IndexStmt))
+ {
+ IndexStmt *stmt = (IndexStmt *) stm;
+ AlterTableCmd *newcmd;
+
+ if (!rewrite)
+ TryReuseIndex(oldId, stmt);
+ stmt->reset_default_tblspc = true;
+ /* keep the index's comment */
+ stmt->idxcomment = GetComment(oldId, RelationRelationId, 0);
+
+ newcmd = makeNode(AlterTableCmd);
+ newcmd->subtype = AT_ReAddIndex;
+ newcmd->def = (Node *) stmt;
+ tab->subcmds[AT_PASS_OLD_INDEX] =
+ lappend(tab->subcmds[AT_PASS_OLD_INDEX], newcmd);
+ }
+ else if (IsA(stm, AlterTableStmt))
+ {
+ AlterTableStmt *stmt = (AlterTableStmt *) stm;
+ ListCell *lcmd;
+
+ foreach(lcmd, stmt->cmds)
+ {
+ AlterTableCmd *cmd = lfirst_node(AlterTableCmd, lcmd);
+
+ if (cmd->subtype == AT_AddIndex)
+ {
+ IndexStmt *indstmt;
+ Oid indoid;
+
+ indstmt = castNode(IndexStmt, cmd->def);
+ indoid = get_constraint_index(oldId);
+
+ if (!rewrite)
+ TryReuseIndex(indoid, indstmt);
+ /* keep any comment on the index */
+ indstmt->idxcomment = GetComment(indoid,
+ RelationRelationId, 0);
+ indstmt->reset_default_tblspc = true;
+
+ cmd->subtype = AT_ReAddIndex;
+ tab->subcmds[AT_PASS_OLD_INDEX] =
+ lappend(tab->subcmds[AT_PASS_OLD_INDEX], cmd);
+
+ /* recreate any comment on the constraint */
+ RebuildConstraintComment(tab,
+ AT_PASS_OLD_INDEX,
+ oldId,
+ rel,
+ NIL,
+ indstmt->idxname);
+ }
+ else if (cmd->subtype == AT_AddConstraint)
+ {
+ Constraint *con = castNode(Constraint, cmd->def);
+
+ con->old_pktable_oid = refRelId;
+ /* rewriting neither side of a FK */
+ if (con->contype == CONSTR_FOREIGN &&
+ !rewrite && tab->rewrite == 0)
+ TryReuseForeignKey(oldId, con);
+ con->reset_default_tblspc = true;
+ cmd->subtype = AT_ReAddConstraint;
+ tab->subcmds[AT_PASS_OLD_CONSTR] =
+ lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd);
+
+ /* recreate any comment on the constraint */
+ RebuildConstraintComment(tab,
+ AT_PASS_OLD_CONSTR,
+ oldId,
+ rel,
+ NIL,
+ con->conname);
+ }
+ else if (cmd->subtype == AT_SetNotNull)
+ {
+ /*
+ * The parser will create AT_SetNotNull subcommands for
+ * columns of PRIMARY KEY indexes/constraints, but we need
+ * not do anything with them here, because the columns'
+ * NOT NULL marks will already have been propagated into
+ * the new table definition.
+ */
+ }
+ else
+ elog(ERROR, "unexpected statement subtype: %d",
+ (int) cmd->subtype);
+ }
+ }
+ else if (IsA(stm, AlterDomainStmt))
+ {
+ AlterDomainStmt *stmt = (AlterDomainStmt *) stm;
+
+ if (stmt->subtype == 'C') /* ADD CONSTRAINT */
+ {
+ Constraint *con = castNode(Constraint, stmt->def);
+ AlterTableCmd *cmd = makeNode(AlterTableCmd);
+
+ cmd->subtype = AT_ReAddDomainConstraint;
+ cmd->def = (Node *) stmt;
+ tab->subcmds[AT_PASS_OLD_CONSTR] =
+ lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd);
+
+ /* recreate any comment on the constraint */
+ RebuildConstraintComment(tab,
+ AT_PASS_OLD_CONSTR,
+ oldId,
+ NULL,
+ stmt->typeName,
+ con->conname);
+ }
+ else
+ elog(ERROR, "unexpected statement subtype: %d",
+ (int) stmt->subtype);
+ }
+ else if (IsA(stm, CreateStatsStmt))
+ {
+ CreateStatsStmt *stmt = (CreateStatsStmt *) stm;
+ AlterTableCmd *newcmd;
+
+ /* keep the statistics object's comment */
+ stmt->stxcomment = GetComment(oldId, StatisticExtRelationId, 0);
+
+ newcmd = makeNode(AlterTableCmd);
+ newcmd->subtype = AT_ReAddStatistics;
+ newcmd->def = (Node *) stmt;
+ tab->subcmds[AT_PASS_MISC] =
+ lappend(tab->subcmds[AT_PASS_MISC], newcmd);
+ }
+ else
+ elog(ERROR, "unexpected statement type: %d",
+ (int) nodeTag(stm));
+ }
+
+ relation_close(rel, NoLock);
+}
+
+/*
+ * Subroutine for ATPostAlterTypeParse() to recreate any existing comment
+ * for a table or domain constraint that is being rebuilt.
+ *
+ * objid is the OID of the constraint.
+ * Pass "rel" for a table constraint, or "domname" (domain's qualified name
+ * as a string list) for a domain constraint.
+ * (We could dig that info, as well as the conname, out of the pg_constraint
+ * entry; but callers already have them so might as well pass them.)
+ */
+static void
+RebuildConstraintComment(AlteredTableInfo *tab, int pass, Oid objid,
+ Relation rel, List *domname,
+ const char *conname)
+{
+ CommentStmt *cmd;
+ char *comment_str;
+ AlterTableCmd *newcmd;
+
+ /* Look for comment for object wanted, and leave if none */
+ comment_str = GetComment(objid, ConstraintRelationId, 0);
+ if (comment_str == NULL)
+ return;
+
+ /* Build CommentStmt node, copying all input data for safety */
+ cmd = makeNode(CommentStmt);
+ if (rel)
+ {
+ cmd->objtype = OBJECT_TABCONSTRAINT;
+ cmd->object = (Node *)
+ list_make3(makeString(get_namespace_name(RelationGetNamespace(rel))),
+ makeString(pstrdup(RelationGetRelationName(rel))),
+ makeString(pstrdup(conname)));
+ }
+ else
+ {
+ cmd->objtype = OBJECT_DOMCONSTRAINT;
+ cmd->object = (Node *)
+ list_make2(makeTypeNameFromNameList(copyObject(domname)),
+ makeString(pstrdup(conname)));
+ }
+ cmd->comment = comment_str;
+
+ /* Append it to list of commands */
+ newcmd = makeNode(AlterTableCmd);
+ newcmd->subtype = AT_ReAddComment;
+ newcmd->def = (Node *) cmd;
+ tab->subcmds[pass] = lappend(tab->subcmds[pass], newcmd);
+}
+
+/*
+ * Subroutine for ATPostAlterTypeParse(). Calls out to CheckIndexCompatible()
+ * for the real analysis, then mutates the IndexStmt based on that verdict.
+ */
+static void
+TryReuseIndex(Oid oldId, IndexStmt *stmt)
+{
+ if (CheckIndexCompatible(oldId,
+ stmt->accessMethod,
+ stmt->indexParams,
+ stmt->excludeOpNames))
+ {
+ Relation irel = index_open(oldId, NoLock);
+
+ /* If it's a partitioned index, there is no storage to share. */
+ if (irel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
+ {
+ stmt->oldNode = irel->rd_node.relNode;
+ stmt->oldCreateSubid = irel->rd_createSubid;
+ stmt->oldFirstRelfilenodeSubid = irel->rd_firstRelfilenodeSubid;
+ }
+ index_close(irel, NoLock);
+ }
+}
+
+/*
+ * Subroutine for ATPostAlterTypeParse().
+ *
+ * Stash the old P-F equality operator into the Constraint node, for possible
+ * use by ATAddForeignKeyConstraint() in determining whether revalidation of
+ * this constraint can be skipped.
+ */
+static void
+TryReuseForeignKey(Oid oldId, Constraint *con)
+{
+ HeapTuple tup;
+ Datum adatum;
+ bool isNull;
+ ArrayType *arr;
+ Oid *rawarr;
+ int numkeys;
+ int i;
+
+ Assert(con->contype == CONSTR_FOREIGN);
+ Assert(con->old_conpfeqop == NIL); /* already prepared this node */
+
+ tup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(oldId));
+ if (!HeapTupleIsValid(tup)) /* should not happen */
+ elog(ERROR, "cache lookup failed for constraint %u", oldId);
+
+ adatum = SysCacheGetAttr(CONSTROID, tup,
+ Anum_pg_constraint_conpfeqop, &isNull);
+ if (isNull)
+ elog(ERROR, "null conpfeqop for constraint %u", oldId);
+ arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
+ numkeys = ARR_DIMS(arr)[0];
+ /* test follows the one in ri_FetchConstraintInfo() */
+ if (ARR_NDIM(arr) != 1 ||
+ ARR_HASNULL(arr) ||
+ ARR_ELEMTYPE(arr) != OIDOID)
+ elog(ERROR, "conpfeqop is not a 1-D Oid array");
+ rawarr = (Oid *) ARR_DATA_PTR(arr);
+
+ /* stash a List of the operator Oids in our Constraint node */
+ for (i = 0; i < numkeys; i++)
+ con->old_conpfeqop = lappend_oid(con->old_conpfeqop, rawarr[i]);
+
+ ReleaseSysCache(tup);
+}
+
+/*
+ * ALTER COLUMN .. OPTIONS ( ... )
+ *
+ * Returns the address of the modified column
+ */
+static ObjectAddress
+ATExecAlterColumnGenericOptions(Relation rel,
+ const char *colName,
+ List *options,
+ LOCKMODE lockmode)
+{
+ Relation ftrel;
+ Relation attrel;
+ ForeignServer *server;
+ ForeignDataWrapper *fdw;
+ HeapTuple tuple;
+ HeapTuple newtuple;
+ bool isnull;
+ Datum repl_val[Natts_pg_attribute];
+ bool repl_null[Natts_pg_attribute];
+ bool repl_repl[Natts_pg_attribute];
+ Datum datum;
+ Form_pg_foreign_table fttableform;
+ Form_pg_attribute atttableform;
+ AttrNumber attnum;
+ ObjectAddress address;
+
+ if (options == NIL)
+ return InvalidObjectAddress;
+
+ /* First, determine FDW validator associated to the foreign table. */
+ ftrel = table_open(ForeignTableRelationId, AccessShareLock);
+ tuple = SearchSysCache1(FOREIGNTABLEREL, rel->rd_id);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("foreign table \"%s\" does not exist",
+ RelationGetRelationName(rel))));
+ fttableform = (Form_pg_foreign_table) GETSTRUCT(tuple);
+ server = GetForeignServer(fttableform->ftserver);
+ fdw = GetForeignDataWrapper(server->fdwid);
+
+ table_close(ftrel, AccessShareLock);
+ ReleaseSysCache(tuple);
+
+ attrel = table_open(AttributeRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colName, RelationGetRelationName(rel))));
+
+ /* Prevent them from altering a system attribute */
+ atttableform = (Form_pg_attribute) GETSTRUCT(tuple);
+ attnum = atttableform->attnum;
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter system column \"%s\"", colName)));
+
+
+ /* Initialize buffers for new tuple values */
+ memset(repl_val, 0, sizeof(repl_val));
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ /* Extract the current options */
+ datum = SysCacheGetAttr(ATTNAME,
+ tuple,
+ Anum_pg_attribute_attfdwoptions,
+ &isnull);
+ if (isnull)
+ datum = PointerGetDatum(NULL);
+
+ /* Transform the options */
+ datum = transformGenericOptions(AttributeRelationId,
+ datum,
+ options,
+ fdw->fdwvalidator);
+
+ if (PointerIsValid(DatumGetPointer(datum)))
+ repl_val[Anum_pg_attribute_attfdwoptions - 1] = datum;
+ else
+ repl_null[Anum_pg_attribute_attfdwoptions - 1] = true;
+
+ repl_repl[Anum_pg_attribute_attfdwoptions - 1] = true;
+
+ /* Everything looks good - update the tuple */
+
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(attrel),
+ repl_val, repl_null, repl_repl);
+
+ CatalogTupleUpdate(attrel, &newtuple->t_self, newtuple);
+
+ InvokeObjectPostAlterHook(RelationRelationId,
+ RelationGetRelid(rel),
+ atttableform->attnum);
+ ObjectAddressSubSet(address, RelationRelationId,
+ RelationGetRelid(rel), attnum);
+
+ ReleaseSysCache(tuple);
+
+ table_close(attrel, RowExclusiveLock);
+
+ heap_freetuple(newtuple);
+
+ return address;
+}
+
+/*
+ * ALTER TABLE OWNER
+ *
+ * recursing is true if we are recursing from a table to its indexes,
+ * sequences, or toast table. We don't allow the ownership of those things to
+ * be changed separately from the parent table. Also, we can skip permission
+ * checks (this is necessary not just an optimization, else we'd fail to
+ * handle toast tables properly).
+ *
+ * recursing is also true if ALTER TYPE OWNER is calling us to fix up a
+ * free-standing composite type.
+ */
+void
+ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lockmode)
+{
+ Relation target_rel;
+ Relation class_rel;
+ HeapTuple tuple;
+ Form_pg_class tuple_class;
+
+ /*
+ * Get exclusive lock till end of transaction on the target table. Use
+ * relation_open so that we can work on indexes and sequences.
+ */
+ target_rel = relation_open(relationOid, lockmode);
+
+ /* Get its pg_class tuple, too */
+ class_rel = table_open(RelationRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relationOid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", relationOid);
+ tuple_class = (Form_pg_class) GETSTRUCT(tuple);
+
+ /* Can we change the ownership of this tuple? */
+ switch (tuple_class->relkind)
+ {
+ case RELKIND_RELATION:
+ case RELKIND_VIEW:
+ case RELKIND_MATVIEW:
+ case RELKIND_FOREIGN_TABLE:
+ case RELKIND_PARTITIONED_TABLE:
+ /* ok to change owner */
+ break;
+ case RELKIND_INDEX:
+ if (!recursing)
+ {
+ /*
+ * Because ALTER INDEX OWNER used to be allowed, and in fact
+ * is generated by old versions of pg_dump, we give a warning
+ * and do nothing rather than erroring out. Also, to avoid
+ * unnecessary chatter while restoring those old dumps, say
+ * nothing at all if the command would be a no-op anyway.
+ */
+ if (tuple_class->relowner != newOwnerId)
+ ereport(WARNING,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot change owner of index \"%s\"",
+ NameStr(tuple_class->relname)),
+ errhint("Change the ownership of the index's table, instead.")));
+ /* quick hack to exit via the no-op path */
+ newOwnerId = tuple_class->relowner;
+ }
+ break;
+ case RELKIND_PARTITIONED_INDEX:
+ if (recursing)
+ break;
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot change owner of index \"%s\"",
+ NameStr(tuple_class->relname)),
+ errhint("Change the ownership of the index's table, instead.")));
+ break;
+ case RELKIND_SEQUENCE:
+ if (!recursing &&
+ tuple_class->relowner != newOwnerId)
+ {
+ /* if it's an owned sequence, disallow changing it by itself */
+ Oid tableId;
+ int32 colId;
+
+ if (sequenceIsOwned(relationOid, DEPENDENCY_AUTO, &tableId, &colId) ||
+ sequenceIsOwned(relationOid, DEPENDENCY_INTERNAL, &tableId, &colId))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot change owner of sequence \"%s\"",
+ NameStr(tuple_class->relname)),
+ errdetail("Sequence \"%s\" is linked to table \"%s\".",
+ NameStr(tuple_class->relname),
+ get_rel_name(tableId))));
+ }
+ break;
+ case RELKIND_COMPOSITE_TYPE:
+ if (recursing)
+ break;
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is a composite type",
+ NameStr(tuple_class->relname)),
+ errhint("Use ALTER TYPE instead.")));
+ break;
+ case RELKIND_TOASTVALUE:
+ if (recursing)
+ break;
+ /* FALL THRU */
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot change owner of relation \"%s\"",
+ NameStr(tuple_class->relname)),
+ errdetail_relkind_not_supported(tuple_class->relkind)));
+ }
+
+ /*
+ * If the new owner is the same as the existing owner, consider the
+ * command to have succeeded. This is for dump restoration purposes.
+ */
+ if (tuple_class->relowner != newOwnerId)
+ {
+ Datum repl_val[Natts_pg_class];
+ bool repl_null[Natts_pg_class];
+ bool repl_repl[Natts_pg_class];
+ Acl *newAcl;
+ Datum aclDatum;
+ bool isNull;
+ HeapTuple newtuple;
+
+ /* skip permission checks when recursing to index or toast table */
+ if (!recursing)
+ {
+ /* Superusers can always do it */
+ if (!superuser())
+ {
+ Oid namespaceOid = tuple_class->relnamespace;
+ AclResult aclresult;
+
+ /* Otherwise, must be owner of the existing object */
+ if (!pg_class_ownercheck(relationOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relationOid)),
+ RelationGetRelationName(target_rel));
+
+ /* Must be able to become new owner */
+ check_is_member_of_role(GetUserId(), newOwnerId);
+
+ /* New owner must have CREATE privilege on namespace */
+ aclresult = pg_namespace_aclcheck(namespaceOid, newOwnerId,
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(namespaceOid));
+ }
+ }
+
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ repl_repl[Anum_pg_class_relowner - 1] = true;
+ repl_val[Anum_pg_class_relowner - 1] = ObjectIdGetDatum(newOwnerId);
+
+ /*
+ * Determine the modified ACL for the new owner. This is only
+ * necessary when the ACL is non-null.
+ */
+ aclDatum = SysCacheGetAttr(RELOID, tuple,
+ Anum_pg_class_relacl,
+ &isNull);
+ if (!isNull)
+ {
+ newAcl = aclnewowner(DatumGetAclP(aclDatum),
+ tuple_class->relowner, newOwnerId);
+ repl_repl[Anum_pg_class_relacl - 1] = true;
+ repl_val[Anum_pg_class_relacl - 1] = PointerGetDatum(newAcl);
+ }
+
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(class_rel), repl_val, repl_null, repl_repl);
+
+ CatalogTupleUpdate(class_rel, &newtuple->t_self, newtuple);
+
+ heap_freetuple(newtuple);
+
+ /*
+ * We must similarly update any per-column ACLs to reflect the new
+ * owner; for neatness reasons that's split out as a subroutine.
+ */
+ change_owner_fix_column_acls(relationOid,
+ tuple_class->relowner,
+ newOwnerId);
+
+ /*
+ * Update owner dependency reference, if any. A composite type has
+ * none, because it's tracked for the pg_type entry instead of here;
+ * indexes and TOAST tables don't have their own entries either.
+ */
+ if (tuple_class->relkind != RELKIND_COMPOSITE_TYPE &&
+ tuple_class->relkind != RELKIND_INDEX &&
+ tuple_class->relkind != RELKIND_PARTITIONED_INDEX &&
+ tuple_class->relkind != RELKIND_TOASTVALUE)
+ changeDependencyOnOwner(RelationRelationId, relationOid,
+ newOwnerId);
+
+ /*
+ * Also change the ownership of the table's row type, if it has one
+ */
+ if (OidIsValid(tuple_class->reltype))
+ AlterTypeOwnerInternal(tuple_class->reltype, newOwnerId);
+
+ /*
+ * If we are operating on a table or materialized view, also change
+ * the ownership of any indexes and sequences that belong to the
+ * relation, as well as its toast table (if it has one).
+ */
+ if (tuple_class->relkind == RELKIND_RELATION ||
+ tuple_class->relkind == RELKIND_PARTITIONED_TABLE ||
+ tuple_class->relkind == RELKIND_MATVIEW ||
+ tuple_class->relkind == RELKIND_TOASTVALUE)
+ {
+ List *index_oid_list;
+ ListCell *i;
+
+ /* Find all the indexes belonging to this relation */
+ index_oid_list = RelationGetIndexList(target_rel);
+
+ /* For each index, recursively change its ownership */
+ foreach(i, index_oid_list)
+ ATExecChangeOwner(lfirst_oid(i), newOwnerId, true, lockmode);
+
+ list_free(index_oid_list);
+ }
+
+ /* If it has a toast table, recurse to change its ownership */
+ if (tuple_class->reltoastrelid != InvalidOid)
+ ATExecChangeOwner(tuple_class->reltoastrelid, newOwnerId,
+ true, lockmode);
+
+ /* If it has dependent sequences, recurse to change them too */
+ change_owner_recurse_to_sequences(relationOid, newOwnerId, lockmode);
+ }
+
+ InvokeObjectPostAlterHook(RelationRelationId, relationOid, 0);
+
+ ReleaseSysCache(tuple);
+ table_close(class_rel, RowExclusiveLock);
+ relation_close(target_rel, NoLock);
+}
+
+/*
+ * change_owner_fix_column_acls
+ *
+ * Helper function for ATExecChangeOwner. Scan the columns of the table
+ * and fix any non-null column ACLs to reflect the new owner.
+ */
+static void
+change_owner_fix_column_acls(Oid relationOid, Oid oldOwnerId, Oid newOwnerId)
+{
+ Relation attRelation;
+ SysScanDesc scan;
+ ScanKeyData key[1];
+ HeapTuple attributeTuple;
+
+ attRelation = table_open(AttributeRelationId, RowExclusiveLock);
+ ScanKeyInit(&key[0],
+ Anum_pg_attribute_attrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relationOid));
+ scan = systable_beginscan(attRelation, AttributeRelidNumIndexId,
+ true, NULL, 1, key);
+ while (HeapTupleIsValid(attributeTuple = systable_getnext(scan)))
+ {
+ Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attributeTuple);
+ Datum repl_val[Natts_pg_attribute];
+ bool repl_null[Natts_pg_attribute];
+ bool repl_repl[Natts_pg_attribute];
+ Acl *newAcl;
+ Datum aclDatum;
+ bool isNull;
+ HeapTuple newtuple;
+
+ /* Ignore dropped columns */
+ if (att->attisdropped)
+ continue;
+
+ aclDatum = heap_getattr(attributeTuple,
+ Anum_pg_attribute_attacl,
+ RelationGetDescr(attRelation),
+ &isNull);
+ /* Null ACLs do not require changes */
+ if (isNull)
+ continue;
+
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ newAcl = aclnewowner(DatumGetAclP(aclDatum),
+ oldOwnerId, newOwnerId);
+ repl_repl[Anum_pg_attribute_attacl - 1] = true;
+ repl_val[Anum_pg_attribute_attacl - 1] = PointerGetDatum(newAcl);
+
+ newtuple = heap_modify_tuple(attributeTuple,
+ RelationGetDescr(attRelation),
+ repl_val, repl_null, repl_repl);
+
+ CatalogTupleUpdate(attRelation, &newtuple->t_self, newtuple);
+
+ heap_freetuple(newtuple);
+ }
+ systable_endscan(scan);
+ table_close(attRelation, RowExclusiveLock);
+}
+
+/*
+ * change_owner_recurse_to_sequences
+ *
+ * Helper function for ATExecChangeOwner. Examines pg_depend searching
+ * for sequences that are dependent on serial columns, and changes their
+ * ownership.
+ */
+static void
+change_owner_recurse_to_sequences(Oid relationOid, Oid newOwnerId, LOCKMODE lockmode)
+{
+ Relation depRel;
+ SysScanDesc scan;
+ ScanKeyData key[2];
+ HeapTuple tup;
+
+ /*
+ * SERIAL sequences are those having an auto dependency on one of the
+ * table's columns (we don't care *which* column, exactly).
+ */
+ depRel = table_open(DependRelationId, AccessShareLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_depend_refclassid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationRelationId));
+ ScanKeyInit(&key[1],
+ Anum_pg_depend_refobjid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relationOid));
+ /* we leave refobjsubid unspecified */
+
+ scan = systable_beginscan(depRel, DependReferenceIndexId, true,
+ NULL, 2, key);
+
+ while (HeapTupleIsValid(tup = systable_getnext(scan)))
+ {
+ Form_pg_depend depForm = (Form_pg_depend) GETSTRUCT(tup);
+ Relation seqRel;
+
+ /* skip dependencies other than auto dependencies on columns */
+ if (depForm->refobjsubid == 0 ||
+ depForm->classid != RelationRelationId ||
+ depForm->objsubid != 0 ||
+ !(depForm->deptype == DEPENDENCY_AUTO || depForm->deptype == DEPENDENCY_INTERNAL))
+ continue;
+
+ /* Use relation_open just in case it's an index */
+ seqRel = relation_open(depForm->objid, lockmode);
+
+ /* skip non-sequence relations */
+ if (RelationGetForm(seqRel)->relkind != RELKIND_SEQUENCE)
+ {
+ /* No need to keep the lock */
+ relation_close(seqRel, lockmode);
+ continue;
+ }
+
+ /* We don't need to close the sequence while we alter it. */
+ ATExecChangeOwner(depForm->objid, newOwnerId, true, lockmode);
+
+ /* Now we can close it. Keep the lock till end of transaction. */
+ relation_close(seqRel, NoLock);
+ }
+
+ systable_endscan(scan);
+
+ relation_close(depRel, AccessShareLock);
+}
+
+/*
+ * ALTER TABLE CLUSTER ON
+ *
+ * The only thing we have to do is to change the indisclustered bits.
+ *
+ * Return the address of the new clustering index.
+ */
+static ObjectAddress
+ATExecClusterOn(Relation rel, const char *indexName, LOCKMODE lockmode)
+{
+ Oid indexOid;
+ ObjectAddress address;
+
+ indexOid = get_relname_relid(indexName, rel->rd_rel->relnamespace);
+
+ if (!OidIsValid(indexOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("index \"%s\" for table \"%s\" does not exist",
+ indexName, RelationGetRelationName(rel))));
+
+ /* Check index is valid to cluster on */
+ check_index_is_clusterable(rel, indexOid, lockmode);
+
+ /* And do the work */
+ mark_index_clustered(rel, indexOid, false);
+
+ ObjectAddressSet(address,
+ RelationRelationId, indexOid);
+
+ return address;
+}
+
+/*
+ * ALTER TABLE SET WITHOUT CLUSTER
+ *
+ * We have to find any indexes on the table that have indisclustered bit
+ * set and turn it off.
+ */
+static void
+ATExecDropCluster(Relation rel, LOCKMODE lockmode)
+{
+ mark_index_clustered(rel, InvalidOid, false);
+}
+
+/*
+ * Preparation phase for SET ACCESS METHOD
+ *
+ * Check that access method exists. If it is the same as the table's current
+ * access method, it is a no-op. Otherwise, a table rewrite is necessary.
+ */
+static void
+ATPrepSetAccessMethod(AlteredTableInfo *tab, Relation rel, const char *amname)
+{
+ Oid amoid;
+
+ /* Check that the table access method exists */
+ amoid = get_table_am_oid(amname, false);
+
+ if (rel->rd_rel->relam == amoid)
+ return;
+
+ /* Save info for Phase 3 to do the real work */
+ tab->rewrite |= AT_REWRITE_ACCESS_METHOD;
+ tab->newAccessMethod = amoid;
+}
+
+/*
+ * ALTER TABLE SET TABLESPACE
+ */
+static void
+ATPrepSetTableSpace(AlteredTableInfo *tab, Relation rel, const char *tablespacename, LOCKMODE lockmode)
+{
+ Oid tablespaceId;
+
+ /* Check that the tablespace exists */
+ tablespaceId = get_tablespace_oid(tablespacename, false);
+
+ /* Check permissions except when moving to database's default */
+ if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
+ {
+ AclResult aclresult;
+
+ aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_TABLESPACE, tablespacename);
+ }
+
+ /* Save info for Phase 3 to do the real work */
+ if (OidIsValid(tab->newTableSpace))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot have multiple SET TABLESPACE subcommands")));
+
+ tab->newTableSpace = tablespaceId;
+}
+
+/*
+ * Set, reset, or replace reloptions.
+ */
+static void
+ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation,
+ LOCKMODE lockmode)
+{
+ Oid relid;
+ Relation pgclass;
+ HeapTuple tuple;
+ HeapTuple newtuple;
+ Datum datum;
+ bool isnull;
+ Datum newOptions;
+ Datum repl_val[Natts_pg_class];
+ bool repl_null[Natts_pg_class];
+ bool repl_repl[Natts_pg_class];
+ static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
+
+ if (defList == NIL && operation != AT_ReplaceRelOptions)
+ return; /* nothing to do */
+
+ pgclass = table_open(RelationRelationId, RowExclusiveLock);
+
+ /* Fetch heap tuple */
+ relid = RelationGetRelid(rel);
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", relid);
+
+ if (operation == AT_ReplaceRelOptions)
+ {
+ /*
+ * If we're supposed to replace the reloptions list, we just pretend
+ * there were none before.
+ */
+ datum = (Datum) 0;
+ isnull = true;
+ }
+ else
+ {
+ /* Get the old reloptions */
+ datum = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
+ &isnull);
+ }
+
+ /* Generate new proposed reloptions (text array) */
+ newOptions = transformRelOptions(isnull ? (Datum) 0 : datum,
+ defList, NULL, validnsps, false,
+ operation == AT_ResetRelOptions);
+
+ /* Validate */
+ switch (rel->rd_rel->relkind)
+ {
+ case RELKIND_RELATION:
+ case RELKIND_TOASTVALUE:
+ case RELKIND_MATVIEW:
+ (void) heap_reloptions(rel->rd_rel->relkind, newOptions, true);
+ break;
+ case RELKIND_PARTITIONED_TABLE:
+ (void) partitioned_table_reloptions(newOptions, true);
+ break;
+ case RELKIND_VIEW:
+ (void) view_reloptions(newOptions, true);
+ break;
+ case RELKIND_INDEX:
+ case RELKIND_PARTITIONED_INDEX:
+ (void) index_reloptions(rel->rd_indam->amoptions, newOptions, true);
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot set options for relation \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_relkind_not_supported(rel->rd_rel->relkind)));
+ break;
+ }
+
+ /* Special-case validation of view options */
+ if (rel->rd_rel->relkind == RELKIND_VIEW)
+ {
+ Query *view_query = get_view_query(rel);
+ List *view_options = untransformRelOptions(newOptions);
+ ListCell *cell;
+ bool check_option = false;
+
+ foreach(cell, view_options)
+ {
+ DefElem *defel = (DefElem *) lfirst(cell);
+
+ if (strcmp(defel->defname, "check_option") == 0)
+ check_option = true;
+ }
+
+ /*
+ * If the check option is specified, look to see if the view is
+ * actually auto-updatable or not.
+ */
+ if (check_option)
+ {
+ const char *view_updatable_error =
+ view_query_is_auto_updatable(view_query, true);
+
+ if (view_updatable_error)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("WITH CHECK OPTION is supported only on automatically updatable views"),
+ errhint("%s", _(view_updatable_error))));
+ }
+ }
+
+ /*
+ * All we need do here is update the pg_class row; the new options will be
+ * propagated into relcaches during post-commit cache inval.
+ */
+ memset(repl_val, 0, sizeof(repl_val));
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ if (newOptions != (Datum) 0)
+ repl_val[Anum_pg_class_reloptions - 1] = newOptions;
+ else
+ repl_null[Anum_pg_class_reloptions - 1] = true;
+
+ repl_repl[Anum_pg_class_reloptions - 1] = true;
+
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(pgclass),
+ repl_val, repl_null, repl_repl);
+
+ CatalogTupleUpdate(pgclass, &newtuple->t_self, newtuple);
+
+ InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), 0);
+
+ heap_freetuple(newtuple);
+
+ ReleaseSysCache(tuple);
+
+ /* repeat the whole exercise for the toast table, if there's one */
+ if (OidIsValid(rel->rd_rel->reltoastrelid))
+ {
+ Relation toastrel;
+ Oid toastid = rel->rd_rel->reltoastrelid;
+
+ toastrel = table_open(toastid, lockmode);
+
+ /* Fetch heap tuple */
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", toastid);
+
+ if (operation == AT_ReplaceRelOptions)
+ {
+ /*
+ * If we're supposed to replace the reloptions list, we just
+ * pretend there were none before.
+ */
+ datum = (Datum) 0;
+ isnull = true;
+ }
+ else
+ {
+ /* Get the old reloptions */
+ datum = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
+ &isnull);
+ }
+
+ newOptions = transformRelOptions(isnull ? (Datum) 0 : datum,
+ defList, "toast", validnsps, false,
+ operation == AT_ResetRelOptions);
+
+ (void) heap_reloptions(RELKIND_TOASTVALUE, newOptions, true);
+
+ memset(repl_val, 0, sizeof(repl_val));
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ if (newOptions != (Datum) 0)
+ repl_val[Anum_pg_class_reloptions - 1] = newOptions;
+ else
+ repl_null[Anum_pg_class_reloptions - 1] = true;
+
+ repl_repl[Anum_pg_class_reloptions - 1] = true;
+
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(pgclass),
+ repl_val, repl_null, repl_repl);
+
+ CatalogTupleUpdate(pgclass, &newtuple->t_self, newtuple);
+
+ InvokeObjectPostAlterHookArg(RelationRelationId,
+ RelationGetRelid(toastrel), 0,
+ InvalidOid, true);
+
+ heap_freetuple(newtuple);
+
+ ReleaseSysCache(tuple);
+
+ table_close(toastrel, NoLock);
+ }
+
+ table_close(pgclass, RowExclusiveLock);
+}
+
+/*
+ * Execute ALTER TABLE SET TABLESPACE for cases where there is no tuple
+ * rewriting to be done, so we just want to copy the data as fast as possible.
+ */
+static void
+ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
+{
+ Relation rel;
+ Oid reltoastrelid;
+ Oid newrelfilenode;
+ RelFileNode newrnode;
+ List *reltoastidxids = NIL;
+ ListCell *lc;
+
+ /*
+ * Need lock here in case we are recursing to toast table or index
+ */
+ rel = relation_open(tableOid, lockmode);
+
+ /* Check first if relation can be moved to new tablespace */
+ if (!CheckRelationTableSpaceMove(rel, newTableSpace))
+ {
+ InvokeObjectPostAlterHook(RelationRelationId,
+ RelationGetRelid(rel), 0);
+ relation_close(rel, NoLock);
+ return;
+ }
+
+ reltoastrelid = rel->rd_rel->reltoastrelid;
+ /* Fetch the list of indexes on toast relation if necessary */
+ if (OidIsValid(reltoastrelid))
+ {
+ Relation toastRel = relation_open(reltoastrelid, lockmode);
+
+ reltoastidxids = RelationGetIndexList(toastRel);
+ relation_close(toastRel, lockmode);
+ }
+
+ /*
+ * Relfilenodes are not unique in databases across tablespaces, so we need
+ * to allocate a new one in the new tablespace.
+ */
+ newrelfilenode = GetNewRelFileNode(newTableSpace, NULL,
+ rel->rd_rel->relpersistence);
+
+ /* Open old and new relation */
+ newrnode = rel->rd_node;
+ newrnode.relNode = newrelfilenode;
+ newrnode.spcNode = newTableSpace;
+
+ /* hand off to AM to actually create the new filenode and copy the data */
+ if (rel->rd_rel->relkind == RELKIND_INDEX)
+ {
+ index_copy_data(rel, newrnode);
+ }
+ else
+ {
+ Assert(RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind));
+ table_relation_copy_data(rel, &newrnode);
+ }
+
+ /*
+ * Update the pg_class row.
+ *
+ * NB: This wouldn't work if ATExecSetTableSpace() were allowed to be
+ * executed on pg_class or its indexes (the above copy wouldn't contain
+ * the updated pg_class entry), but that's forbidden with
+ * CheckRelationTableSpaceMove().
+ */
+ SetRelationTableSpace(rel, newTableSpace, newrelfilenode);
+
+ InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), 0);
+
+ RelationAssumeNewRelfilenode(rel);
+
+ relation_close(rel, NoLock);
+
+ /* Make sure the reltablespace change is visible */
+ CommandCounterIncrement();
+
+ /* Move associated toast relation and/or indexes, too */
+ if (OidIsValid(reltoastrelid))
+ ATExecSetTableSpace(reltoastrelid, newTableSpace, lockmode);
+ foreach(lc, reltoastidxids)
+ ATExecSetTableSpace(lfirst_oid(lc), newTableSpace, lockmode);
+
+ /* Clean up */
+ list_free(reltoastidxids);
+}
+
+/*
+ * Special handling of ALTER TABLE SET TABLESPACE for relations with no
+ * storage that have an interest in preserving tablespace.
+ *
+ * Since these have no storage the tablespace can be updated with a simple
+ * metadata only operation to update the tablespace.
+ */
+static void
+ATExecSetTableSpaceNoStorage(Relation rel, Oid newTableSpace)
+{
+ /*
+ * Shouldn't be called on relations having storage; these are processed in
+ * phase 3.
+ */
+ Assert(!RELKIND_HAS_STORAGE(rel->rd_rel->relkind));
+
+ /* check if relation can be moved to its new tablespace */
+ if (!CheckRelationTableSpaceMove(rel, newTableSpace))
+ {
+ InvokeObjectPostAlterHook(RelationRelationId,
+ RelationGetRelid(rel),
+ 0);
+ return;
+ }
+
+ /* Update can be done, so change reltablespace */
+ SetRelationTableSpace(rel, newTableSpace, InvalidOid);
+
+ InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), 0);
+
+ /* Make sure the reltablespace change is visible */
+ CommandCounterIncrement();
+}
+
+/*
+ * Alter Table ALL ... SET TABLESPACE
+ *
+ * Allows a user to move all objects of some type in a given tablespace in the
+ * current database to another tablespace. Objects can be chosen based on the
+ * owner of the object also, to allow users to move only their objects.
+ * The user must have CREATE rights on the new tablespace, as usual. The main
+ * permissions handling is done by the lower-level table move function.
+ *
+ * All to-be-moved objects are locked first. If NOWAIT is specified and the
+ * lock can't be acquired then we ereport(ERROR).
+ */
+Oid
+AlterTableMoveAll(AlterTableMoveAllStmt *stmt)
+{
+ List *relations = NIL;
+ ListCell *l;
+ ScanKeyData key[1];
+ Relation rel;
+ TableScanDesc scan;
+ HeapTuple tuple;
+ Oid orig_tablespaceoid;
+ Oid new_tablespaceoid;
+ List *role_oids = roleSpecsToIds(stmt->roles);
+
+ /* Ensure we were not asked to move something we can't */
+ if (stmt->objtype != OBJECT_TABLE && stmt->objtype != OBJECT_INDEX &&
+ stmt->objtype != OBJECT_MATVIEW)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("only tables, indexes, and materialized views exist in tablespaces")));
+
+ /* Get the orig and new tablespace OIDs */
+ orig_tablespaceoid = get_tablespace_oid(stmt->orig_tablespacename, false);
+ new_tablespaceoid = get_tablespace_oid(stmt->new_tablespacename, false);
+
+ /* Can't move shared relations in to or out of pg_global */
+ /* This is also checked by ATExecSetTableSpace, but nice to stop earlier */
+ if (orig_tablespaceoid == GLOBALTABLESPACE_OID ||
+ new_tablespaceoid == GLOBALTABLESPACE_OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot move relations in to or out of pg_global tablespace")));
+
+ /*
+ * Must have CREATE rights on the new tablespace, unless it is the
+ * database default tablespace (which all users implicitly have CREATE
+ * rights on).
+ */
+ if (OidIsValid(new_tablespaceoid) && new_tablespaceoid != MyDatabaseTableSpace)
+ {
+ AclResult aclresult;
+
+ aclresult = pg_tablespace_aclcheck(new_tablespaceoid, GetUserId(),
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_TABLESPACE,
+ get_tablespace_name(new_tablespaceoid));
+ }
+
+ /*
+ * Now that the checks are done, check if we should set either to
+ * InvalidOid because it is our database's default tablespace.
+ */
+ if (orig_tablespaceoid == MyDatabaseTableSpace)
+ orig_tablespaceoid = InvalidOid;
+
+ if (new_tablespaceoid == MyDatabaseTableSpace)
+ new_tablespaceoid = InvalidOid;
+
+ /* no-op */
+ if (orig_tablespaceoid == new_tablespaceoid)
+ return new_tablespaceoid;
+
+ /*
+ * Walk the list of objects in the tablespace and move them. This will
+ * only find objects in our database, of course.
+ */
+ ScanKeyInit(&key[0],
+ Anum_pg_class_reltablespace,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(orig_tablespaceoid));
+
+ rel = table_open(RelationRelationId, AccessShareLock);
+ scan = table_beginscan_catalog(rel, 1, key);
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ Form_pg_class relForm = (Form_pg_class) GETSTRUCT(tuple);
+ Oid relOid = relForm->oid;
+
+ /*
+ * Do not move objects in pg_catalog as part of this, if an admin
+ * really wishes to do so, they can issue the individual ALTER
+ * commands directly.
+ *
+ * Also, explicitly avoid any shared tables, temp tables, or TOAST
+ * (TOAST will be moved with the main table).
+ */
+ if (IsCatalogNamespace(relForm->relnamespace) ||
+ relForm->relisshared ||
+ isAnyTempNamespace(relForm->relnamespace) ||
+ IsToastNamespace(relForm->relnamespace))
+ continue;
+
+ /* Only move the object type requested */
+ if ((stmt->objtype == OBJECT_TABLE &&
+ relForm->relkind != RELKIND_RELATION &&
+ relForm->relkind != RELKIND_PARTITIONED_TABLE) ||
+ (stmt->objtype == OBJECT_INDEX &&
+ relForm->relkind != RELKIND_INDEX &&
+ relForm->relkind != RELKIND_PARTITIONED_INDEX) ||
+ (stmt->objtype == OBJECT_MATVIEW &&
+ relForm->relkind != RELKIND_MATVIEW))
+ continue;
+
+ /* Check if we are only moving objects owned by certain roles */
+ if (role_oids != NIL && !list_member_oid(role_oids, relForm->relowner))
+ continue;
+
+ /*
+ * Handle permissions-checking here since we are locking the tables
+ * and also to avoid doing a bunch of work only to fail part-way. Note
+ * that permissions will also be checked by AlterTableInternal().
+ *
+ * Caller must be considered an owner on the table to move it.
+ */
+ if (!pg_class_ownercheck(relOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relOid)),
+ NameStr(relForm->relname));
+
+ if (stmt->nowait &&
+ !ConditionalLockRelationOid(relOid, AccessExclusiveLock))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ errmsg("aborting because lock on relation \"%s.%s\" is not available",
+ get_namespace_name(relForm->relnamespace),
+ NameStr(relForm->relname))));
+ else
+ LockRelationOid(relOid, AccessExclusiveLock);
+
+ /* Add to our list of objects to move */
+ relations = lappend_oid(relations, relOid);
+ }
+
+ table_endscan(scan);
+ table_close(rel, AccessShareLock);
+
+ if (relations == NIL)
+ ereport(NOTICE,
+ (errcode(ERRCODE_NO_DATA_FOUND),
+ errmsg("no matching relations in tablespace \"%s\" found",
+ orig_tablespaceoid == InvalidOid ? "(database default)" :
+ get_tablespace_name(orig_tablespaceoid))));
+
+ /* Everything is locked, loop through and move all of the relations. */
+ foreach(l, relations)
+ {
+ List *cmds = NIL;
+ AlterTableCmd *cmd = makeNode(AlterTableCmd);
+
+ cmd->subtype = AT_SetTableSpace;
+ cmd->name = stmt->new_tablespacename;
+
+ cmds = lappend(cmds, cmd);
+
+ EventTriggerAlterTableStart((Node *) stmt);
+ /* OID is set by AlterTableInternal */
+ AlterTableInternal(lfirst_oid(l), cmds, false);
+ EventTriggerAlterTableEnd();
+ }
+
+ return new_tablespaceoid;
+}
+
+static void
+index_copy_data(Relation rel, RelFileNode newrnode)
+{
+ SMgrRelation dstrel;
+
+ dstrel = smgropen(newrnode, rel->rd_backend);
+
+ /*
+ * Since we copy the file directly without looking at the shared buffers,
+ * we'd better first flush out any pages of the source relation that are
+ * in shared buffers. We assume no new changes will be made while we are
+ * holding exclusive lock on the rel.
+ */
+ FlushRelationBuffers(rel);
+
+ /*
+ * Create and copy all forks of the relation, and schedule unlinking of
+ * old physical files.
+ *
+ * NOTE: any conflict in relfilenode value will be caught in
+ * RelationCreateStorage().
+ */
+ RelationCreateStorage(newrnode, rel->rd_rel->relpersistence, true);
+
+ /* copy main fork */
+ RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM,
+ rel->rd_rel->relpersistence);
+
+ /* copy those extra forks that exist */
+ for (ForkNumber forkNum = MAIN_FORKNUM + 1;
+ forkNum <= MAX_FORKNUM; forkNum++)
+ {
+ if (smgrexists(RelationGetSmgr(rel), forkNum))
+ {
+ smgrcreate(dstrel, forkNum, false);
+
+ /*
+ * WAL log creation if the relation is persistent, or this is the
+ * init fork of an unlogged relation.
+ */
+ if (RelationIsPermanent(rel) ||
+ (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
+ forkNum == INIT_FORKNUM))
+ log_smgrcreate(&newrnode, forkNum);
+ RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
+ rel->rd_rel->relpersistence);
+ }
+ }
+
+ /* drop old relation, and close new one */
+ RelationDropStorage(rel);
+ smgrclose(dstrel);
+}
+
+/*
+ * ALTER TABLE ENABLE/DISABLE TRIGGER
+ *
+ * We just pass this off to trigger.c.
+ */
+static void
+ATExecEnableDisableTrigger(Relation rel, const char *trigname,
+ char fires_when, bool skip_system, bool recurse,
+ LOCKMODE lockmode)
+{
+ EnableDisableTriggerNew2(rel, trigname, InvalidOid,
+ fires_when, skip_system, recurse,
+ lockmode);
+}
+
+/*
+ * ALTER TABLE ENABLE/DISABLE RULE
+ *
+ * We just pass this off to rewriteDefine.c.
+ */
+static void
+ATExecEnableDisableRule(Relation rel, const char *rulename,
+ char fires_when, LOCKMODE lockmode)
+{
+ EnableDisableRule(rel, rulename, fires_when);
+}
+
+/*
+ * ALTER TABLE INHERIT
+ *
+ * Add a parent to the child's parents. This verifies that all the columns and
+ * check constraints of the parent appear in the child and that they have the
+ * same data types and expressions.
+ */
+static void
+ATPrepAddInherit(Relation child_rel)
+{
+ if (child_rel->rd_rel->reloftype)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot change inheritance of typed table")));
+
+ if (child_rel->rd_rel->relispartition)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot change inheritance of a partition")));
+
+ if (child_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot change inheritance of partitioned table")));
+}
+
+/*
+ * Return the address of the new parent relation.
+ */
+static ObjectAddress
+ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode)
+{
+ Relation parent_rel;
+ List *children;
+ ObjectAddress address;
+ const char *trigger_name;
+
+ /*
+ * A self-exclusive lock is needed here. See the similar case in
+ * MergeAttributes() for a full explanation.
+ */
+ parent_rel = table_openrv(parent, ShareUpdateExclusiveLock);
+
+ /*
+ * Must be owner of both parent and child -- child was checked by
+ * ATSimplePermissions call in ATPrepCmd
+ */
+ ATSimplePermissions(AT_AddInherit, parent_rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+ /* Permanent rels cannot inherit from temporary ones */
+ if (parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+ child_rel->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot inherit from temporary relation \"%s\"",
+ RelationGetRelationName(parent_rel))));
+
+ /* If parent rel is temp, it must belong to this session */
+ if (parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+ !parent_rel->rd_islocaltemp)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot inherit from temporary relation of another session")));
+
+ /* Ditto for the child */
+ if (child_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+ !child_rel->rd_islocaltemp)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot inherit to temporary relation of another session")));
+
+ /* Prevent partitioned tables from becoming inheritance parents */
+ if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot inherit from partitioned table \"%s\"",
+ parent->relname)));
+
+ /* Likewise for partitions */
+ if (parent_rel->rd_rel->relispartition)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot inherit from a partition")));
+
+ /*
+ * Prevent circularity by seeing if proposed parent inherits from child.
+ * (In particular, this disallows making a rel inherit from itself.)
+ *
+ * This is not completely bulletproof because of race conditions: in
+ * multi-level inheritance trees, someone else could concurrently be
+ * making another inheritance link that closes the loop but does not join
+ * either of the rels we have locked. Preventing that seems to require
+ * exclusive locks on the entire inheritance tree, which is a cure worse
+ * than the disease. find_all_inheritors() will cope with circularity
+ * anyway, so don't sweat it too much.
+ *
+ * We use weakest lock we can on child's children, namely AccessShareLock.
+ */
+ children = find_all_inheritors(RelationGetRelid(child_rel),
+ AccessShareLock, NULL);
+
+ if (list_member_oid(children, RelationGetRelid(parent_rel)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_TABLE),
+ errmsg("circular inheritance not allowed"),
+ errdetail("\"%s\" is already a child of \"%s\".",
+ parent->relname,
+ RelationGetRelationName(child_rel))));
+
+ /*
+ * If child_rel has row-level triggers with transition tables, we
+ * currently don't allow it to become an inheritance child. See also
+ * prohibitions in ATExecAttachPartition() and CreateTrigger().
+ */
+ trigger_name = FindTriggerIncompatibleWithInheritance(child_rel->trigdesc);
+ if (trigger_name != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("trigger \"%s\" prevents table \"%s\" from becoming an inheritance child",
+ trigger_name, RelationGetRelationName(child_rel)),
+ errdetail("ROW triggers with transition tables are not supported in inheritance hierarchies.")));
+
+ /* OK to create inheritance */
+ CreateInheritance(child_rel, parent_rel);
+
+ ObjectAddressSet(address, RelationRelationId,
+ RelationGetRelid(parent_rel));
+
+ /* keep our lock on the parent relation until commit */
+ table_close(parent_rel, NoLock);
+
+ return address;
+}
+
+/*
+ * CreateInheritance
+ * Catalog manipulation portion of creating inheritance between a child
+ * table and a parent table.
+ *
+ * Common to ATExecAddInherit() and ATExecAttachPartition().
+ */
+static void
+CreateInheritance(Relation child_rel, Relation parent_rel)
+{
+ Relation catalogRelation;
+ SysScanDesc scan;
+ ScanKeyData key;
+ HeapTuple inheritsTuple;
+ int32 inhseqno;
+
+ /* Note: get RowExclusiveLock because we will write pg_inherits below. */
+ catalogRelation = table_open(InheritsRelationId, RowExclusiveLock);
+
+ /*
+ * Check for duplicates in the list of parents, and determine the highest
+ * inhseqno already present; we'll use the next one for the new parent.
+ * Also, if proposed child is a partition, it cannot already be
+ * inheriting.
+ *
+ * Note: we do not reject the case where the child already inherits from
+ * the parent indirectly; CREATE TABLE doesn't reject comparable cases.
+ */
+ ScanKeyInit(&key,
+ Anum_pg_inherits_inhrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(child_rel)));
+ scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId,
+ true, NULL, 1, &key);
+
+ /* inhseqno sequences start at 1 */
+ inhseqno = 0;
+ while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan)))
+ {
+ Form_pg_inherits inh = (Form_pg_inherits) GETSTRUCT(inheritsTuple);
+
+ if (inh->inhparent == RelationGetRelid(parent_rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_TABLE),
+ errmsg("relation \"%s\" would be inherited from more than once",
+ RelationGetRelationName(parent_rel))));
+
+ if (inh->inhseqno > inhseqno)
+ inhseqno = inh->inhseqno;
+ }
+ systable_endscan(scan);
+
+ /* Match up the columns and bump attinhcount as needed */
+ MergeAttributesIntoExisting(child_rel, parent_rel);
+
+ /* Match up the constraints and bump coninhcount as needed */
+ MergeConstraintsIntoExisting(child_rel, parent_rel);
+
+ /*
+ * OK, it looks valid. Make the catalog entries that show inheritance.
+ */
+ StoreCatalogInheritance1(RelationGetRelid(child_rel),
+ RelationGetRelid(parent_rel),
+ inhseqno + 1,
+ catalogRelation,
+ parent_rel->rd_rel->relkind ==
+ RELKIND_PARTITIONED_TABLE);
+
+ /* Now we're done with pg_inherits */
+ table_close(catalogRelation, RowExclusiveLock);
+}
+
+/*
+ * Obtain the source-text form of the constraint expression for a check
+ * constraint, given its pg_constraint tuple
+ */
+static char *
+decompile_conbin(HeapTuple contup, TupleDesc tupdesc)
+{
+ Form_pg_constraint con;
+ bool isnull;
+ Datum attr;
+ Datum expr;
+
+ con = (Form_pg_constraint) GETSTRUCT(contup);
+ attr = heap_getattr(contup, Anum_pg_constraint_conbin, tupdesc, &isnull);
+ if (isnull)
+ elog(ERROR, "null conbin for constraint %u", con->oid);
+
+ expr = DirectFunctionCall2(pg_get_expr, attr,
+ ObjectIdGetDatum(con->conrelid));
+ return TextDatumGetCString(expr);
+}
+
+/*
+ * Determine whether two check constraints are functionally equivalent
+ *
+ * The test we apply is to see whether they reverse-compile to the same
+ * source string. This insulates us from issues like whether attributes
+ * have the same physical column numbers in parent and child relations.
+ */
+static bool
+constraints_equivalent(HeapTuple a, HeapTuple b, TupleDesc tupleDesc)
+{
+ Form_pg_constraint acon = (Form_pg_constraint) GETSTRUCT(a);
+ Form_pg_constraint bcon = (Form_pg_constraint) GETSTRUCT(b);
+
+ if (acon->condeferrable != bcon->condeferrable ||
+ acon->condeferred != bcon->condeferred ||
+ strcmp(decompile_conbin(a, tupleDesc),
+ decompile_conbin(b, tupleDesc)) != 0)
+ return false;
+ else
+ return true;
+}
+
+/*
+ * Check columns in child table match up with columns in parent, and increment
+ * their attinhcount.
+ *
+ * Called by CreateInheritance
+ *
+ * Currently all parent columns must be found in child. Missing columns are an
+ * error. One day we might consider creating new columns like CREATE TABLE
+ * does. However, that is widely unpopular --- in the common use case of
+ * partitioned tables it's a foot-gun.
+ *
+ * The data type must match exactly. If the parent column is NOT NULL then
+ * the child must be as well. Defaults are not compared, however.
+ */
+static void
+MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel)
+{
+ Relation attrrel;
+ AttrNumber parent_attno;
+ int parent_natts;
+ TupleDesc tupleDesc;
+ HeapTuple tuple;
+ bool child_is_partition = false;
+
+ attrrel = table_open(AttributeRelationId, RowExclusiveLock);
+
+ tupleDesc = RelationGetDescr(parent_rel);
+ parent_natts = tupleDesc->natts;
+
+ /* If parent_rel is a partitioned table, child_rel must be a partition */
+ if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ child_is_partition = true;
+
+ for (parent_attno = 1; parent_attno <= parent_natts; parent_attno++)
+ {
+ Form_pg_attribute attribute = TupleDescAttr(tupleDesc,
+ parent_attno - 1);
+ char *attributeName = NameStr(attribute->attname);
+
+ /* Ignore dropped columns in the parent. */
+ if (attribute->attisdropped)
+ continue;
+
+ /* Find same column in child (matching on column name). */
+ tuple = SearchSysCacheCopyAttName(RelationGetRelid(child_rel),
+ attributeName);
+ if (HeapTupleIsValid(tuple))
+ {
+ /* Check they are same type, typmod, and collation */
+ Form_pg_attribute childatt = (Form_pg_attribute) GETSTRUCT(tuple);
+
+ if (attribute->atttypid != childatt->atttypid ||
+ attribute->atttypmod != childatt->atttypmod)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("child table \"%s\" has different type for column \"%s\"",
+ RelationGetRelationName(child_rel),
+ attributeName)));
+
+ if (attribute->attcollation != childatt->attcollation)
+ ereport(ERROR,
+ (errcode(ERRCODE_COLLATION_MISMATCH),
+ errmsg("child table \"%s\" has different collation for column \"%s\"",
+ RelationGetRelationName(child_rel),
+ attributeName)));
+
+ /*
+ * Check child doesn't discard NOT NULL property. (Other
+ * constraints are checked elsewhere.)
+ */
+ if (attribute->attnotnull && !childatt->attnotnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("column \"%s\" in child table must be marked NOT NULL",
+ attributeName)));
+
+ /*
+ * If parent column is generated, child column must be, too.
+ */
+ if (attribute->attgenerated && !childatt->attgenerated)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("column \"%s\" in child table must be a generated column",
+ attributeName)));
+
+ /*
+ * Check that both generation expressions match.
+ *
+ * The test we apply is to see whether they reverse-compile to the
+ * same source string. This insulates us from issues like whether
+ * attributes have the same physical column numbers in parent and
+ * child relations. (See also constraints_equivalent().)
+ */
+ if (attribute->attgenerated && childatt->attgenerated)
+ {
+ TupleConstr *child_constr = child_rel->rd_att->constr;
+ TupleConstr *parent_constr = parent_rel->rd_att->constr;
+ char *child_expr = NULL;
+ char *parent_expr = NULL;
+
+ Assert(child_constr != NULL);
+ Assert(parent_constr != NULL);
+
+ for (int i = 0; i < child_constr->num_defval; i++)
+ {
+ if (child_constr->defval[i].adnum == childatt->attnum)
+ {
+ child_expr =
+ TextDatumGetCString(DirectFunctionCall2(pg_get_expr,
+ CStringGetTextDatum(child_constr->defval[i].adbin),
+ ObjectIdGetDatum(child_rel->rd_id)));
+ break;
+ }
+ }
+ Assert(child_expr != NULL);
+
+ for (int i = 0; i < parent_constr->num_defval; i++)
+ {
+ if (parent_constr->defval[i].adnum == attribute->attnum)
+ {
+ parent_expr =
+ TextDatumGetCString(DirectFunctionCall2(pg_get_expr,
+ CStringGetTextDatum(parent_constr->defval[i].adbin),
+ ObjectIdGetDatum(parent_rel->rd_id)));
+ break;
+ }
+ }
+ Assert(parent_expr != NULL);
+
+ if (strcmp(child_expr, parent_expr) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("column \"%s\" in child table has a conflicting generation expression",
+ attributeName)));
+ }
+
+ /*
+ * OK, bump the child column's inheritance count. (If we fail
+ * later on, this change will just roll back.)
+ */
+ childatt->attinhcount++;
+
+ /*
+ * In case of partitions, we must enforce that value of attislocal
+ * is same in all partitions. (Note: there are only inherited
+ * attributes in partitions)
+ */
+ if (child_is_partition)
+ {
+ Assert(childatt->attinhcount == 1);
+ childatt->attislocal = false;
+ }
+
+ CatalogTupleUpdate(attrrel, &tuple->t_self, tuple);
+ heap_freetuple(tuple);
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("child table is missing column \"%s\"",
+ attributeName)));
+ }
+ }
+
+ table_close(attrrel, RowExclusiveLock);
+}
+
+/*
+ * Check constraints in child table match up with constraints in parent,
+ * and increment their coninhcount.
+ *
+ * Constraints that are marked ONLY in the parent are ignored.
+ *
+ * Called by CreateInheritance
+ *
+ * Currently all constraints in parent must be present in the child. One day we
+ * may consider adding new constraints like CREATE TABLE does.
+ *
+ * XXX This is O(N^2) which may be an issue with tables with hundreds of
+ * constraints. As long as tables have more like 10 constraints it shouldn't be
+ * a problem though. Even 100 constraints ought not be the end of the world.
+ *
+ * XXX See MergeWithExistingConstraint too if you change this code.
+ */
+static void
+MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel)
+{
+ Relation catalog_relation;
+ TupleDesc tuple_desc;
+ SysScanDesc parent_scan;
+ ScanKeyData parent_key;
+ HeapTuple parent_tuple;
+ bool child_is_partition = false;
+
+ catalog_relation = table_open(ConstraintRelationId, RowExclusiveLock);
+ tuple_desc = RelationGetDescr(catalog_relation);
+
+ /* If parent_rel is a partitioned table, child_rel must be a partition */
+ if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ child_is_partition = true;
+
+ /* Outer loop scans through the parent's constraint definitions */
+ ScanKeyInit(&parent_key,
+ Anum_pg_constraint_conrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(parent_rel)));
+ parent_scan = systable_beginscan(catalog_relation, ConstraintRelidTypidNameIndexId,
+ true, NULL, 1, &parent_key);
+
+ while (HeapTupleIsValid(parent_tuple = systable_getnext(parent_scan)))
+ {
+ Form_pg_constraint parent_con = (Form_pg_constraint) GETSTRUCT(parent_tuple);
+ SysScanDesc child_scan;
+ ScanKeyData child_key;
+ HeapTuple child_tuple;
+ bool found = false;
+
+ if (parent_con->contype != CONSTRAINT_CHECK)
+ continue;
+
+ /* if the parent's constraint is marked NO INHERIT, it's not inherited */
+ if (parent_con->connoinherit)
+ continue;
+
+ /* Search for a child constraint matching this one */
+ ScanKeyInit(&child_key,
+ Anum_pg_constraint_conrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(child_rel)));
+ child_scan = systable_beginscan(catalog_relation, ConstraintRelidTypidNameIndexId,
+ true, NULL, 1, &child_key);
+
+ while (HeapTupleIsValid(child_tuple = systable_getnext(child_scan)))
+ {
+ Form_pg_constraint child_con = (Form_pg_constraint) GETSTRUCT(child_tuple);
+ HeapTuple child_copy;
+
+ if (child_con->contype != CONSTRAINT_CHECK)
+ continue;
+
+ if (strcmp(NameStr(parent_con->conname),
+ NameStr(child_con->conname)) != 0)
+ continue;
+
+ if (!constraints_equivalent(parent_tuple, child_tuple, tuple_desc))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("child table \"%s\" has different definition for check constraint \"%s\"",
+ RelationGetRelationName(child_rel),
+ NameStr(parent_con->conname))));
+
+ /* If the child constraint is "no inherit" then cannot merge */
+ if (child_con->connoinherit)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("constraint \"%s\" conflicts with non-inherited constraint on child table \"%s\"",
+ NameStr(child_con->conname),
+ RelationGetRelationName(child_rel))));
+
+ /*
+ * If the child constraint is "not valid" then cannot merge with a
+ * valid parent constraint
+ */
+ if (parent_con->convalidated && !child_con->convalidated)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("constraint \"%s\" conflicts with NOT VALID constraint on child table \"%s\"",
+ NameStr(child_con->conname),
+ RelationGetRelationName(child_rel))));
+
+ /*
+ * OK, bump the child constraint's inheritance count. (If we fail
+ * later on, this change will just roll back.)
+ */
+ child_copy = heap_copytuple(child_tuple);
+ child_con = (Form_pg_constraint) GETSTRUCT(child_copy);
+ child_con->coninhcount++;
+
+ /*
+ * In case of partitions, an inherited constraint must be
+ * inherited only once since it cannot have multiple parents and
+ * it is never considered local.
+ */
+ if (child_is_partition)
+ {
+ Assert(child_con->coninhcount == 1);
+ child_con->conislocal = false;
+ }
+
+ CatalogTupleUpdate(catalog_relation, &child_copy->t_self, child_copy);
+ heap_freetuple(child_copy);
+
+ found = true;
+ break;
+ }
+
+ systable_endscan(child_scan);
+
+ if (!found)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("child table is missing constraint \"%s\"",
+ NameStr(parent_con->conname))));
+ }
+
+ systable_endscan(parent_scan);
+ table_close(catalog_relation, RowExclusiveLock);
+}
+
+/*
+ * ALTER TABLE NO INHERIT
+ *
+ * Return value is the address of the relation that is no longer parent.
+ */
+static ObjectAddress
+ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
+{
+ ObjectAddress address;
+ Relation parent_rel;
+
+ if (rel->rd_rel->relispartition)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot change inheritance of a partition")));
+
+ /*
+ * AccessShareLock on the parent is probably enough, seeing that DROP
+ * TABLE doesn't lock parent tables at all. We need some lock since we'll
+ * be inspecting the parent's schema.
+ */
+ parent_rel = table_openrv(parent, AccessShareLock);
+
+ /*
+ * We don't bother to check ownership of the parent table --- ownership of
+ * the child is presumed enough rights.
+ */
+
+ /* Off to RemoveInheritance() where most of the work happens */
+ RemoveInheritance(rel, parent_rel, false);
+
+ ObjectAddressSet(address, RelationRelationId,
+ RelationGetRelid(parent_rel));
+
+ /* keep our lock on the parent relation until commit */
+ table_close(parent_rel, NoLock);
+
+ return address;
+}
+
+/*
+ * MarkInheritDetached
+ *
+ * Set inhdetachpending for a partition, for ATExecDetachPartition
+ * in concurrent mode. While at it, verify that no other partition is
+ * already pending detach.
+ */
+static void
+MarkInheritDetached(Relation child_rel, Relation parent_rel)
+{
+ Relation catalogRelation;
+ SysScanDesc scan;
+ ScanKeyData key;
+ HeapTuple inheritsTuple;
+ bool found = false;
+
+ Assert(parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+
+ /*
+ * Find pg_inherits entries by inhparent. (We need to scan them all in
+ * order to verify that no other partition is pending detach.)
+ */
+ catalogRelation = table_open(InheritsRelationId, RowExclusiveLock);
+ ScanKeyInit(&key,
+ Anum_pg_inherits_inhparent,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(parent_rel)));
+ scan = systable_beginscan(catalogRelation, InheritsParentIndexId,
+ true, NULL, 1, &key);
+
+ while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan)))
+ {
+ Form_pg_inherits inhForm;
+
+ inhForm = (Form_pg_inherits) GETSTRUCT(inheritsTuple);
+ if (inhForm->inhdetachpending)
+ ereport(ERROR,
+ errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("partition \"%s\" already pending detach in partitioned table \"%s.%s\"",
+ get_rel_name(inhForm->inhrelid),
+ get_namespace_name(parent_rel->rd_rel->relnamespace),
+ RelationGetRelationName(parent_rel)),
+ errhint("Use ALTER TABLE ... DETACH PARTITION ... FINALIZE to complete the pending detach operation."));
+
+ if (inhForm->inhrelid == RelationGetRelid(child_rel))
+ {
+ HeapTuple newtup;
+
+ newtup = heap_copytuple(inheritsTuple);
+ ((Form_pg_inherits) GETSTRUCT(newtup))->inhdetachpending = true;
+
+ CatalogTupleUpdate(catalogRelation,
+ &inheritsTuple->t_self,
+ newtup);
+ found = true;
+ heap_freetuple(newtup);
+ /* keep looking, to ensure we catch others pending detach */
+ }
+ }
+
+ /* Done */
+ systable_endscan(scan);
+ table_close(catalogRelation, RowExclusiveLock);
+
+ if (!found)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_TABLE),
+ errmsg("relation \"%s\" is not a partition of relation \"%s\"",
+ RelationGetRelationName(child_rel),
+ RelationGetRelationName(parent_rel))));
+}
+
+/*
+ * RemoveInheritance
+ *
+ * Drop a parent from the child's parents. This just adjusts the attinhcount
+ * and attislocal of the columns and removes the pg_inherit and pg_depend
+ * entries. expect_detached is passed down to DeleteInheritsTuple, q.v..
+ *
+ * If attinhcount goes to 0 then attislocal gets set to true. If it goes back
+ * up attislocal stays true, which means if a child is ever removed from a
+ * parent then its columns will never be automatically dropped which may
+ * surprise. But at least we'll never surprise by dropping columns someone
+ * isn't expecting to be dropped which would actually mean data loss.
+ *
+ * coninhcount and conislocal for inherited constraints are adjusted in
+ * exactly the same way.
+ *
+ * Common to ATExecDropInherit() and ATExecDetachPartition().
+ */
+static void
+RemoveInheritance(Relation child_rel, Relation parent_rel, bool expect_detached)
+{
+ Relation catalogRelation;
+ SysScanDesc scan;
+ ScanKeyData key[3];
+ HeapTuple attributeTuple,
+ constraintTuple;
+ List *connames;
+ bool found;
+ bool child_is_partition = false;
+
+ /* If parent_rel is a partitioned table, child_rel must be a partition */
+ if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ child_is_partition = true;
+
+ found = DeleteInheritsTuple(RelationGetRelid(child_rel),
+ RelationGetRelid(parent_rel),
+ expect_detached,
+ RelationGetRelationName(child_rel));
+ if (!found)
+ {
+ if (child_is_partition)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_TABLE),
+ errmsg("relation \"%s\" is not a partition of relation \"%s\"",
+ RelationGetRelationName(child_rel),
+ RelationGetRelationName(parent_rel))));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_TABLE),
+ errmsg("relation \"%s\" is not a parent of relation \"%s\"",
+ RelationGetRelationName(parent_rel),
+ RelationGetRelationName(child_rel))));
+ }
+
+ /*
+ * Search through child columns looking for ones matching parent rel
+ */
+ catalogRelation = table_open(AttributeRelationId, RowExclusiveLock);
+ ScanKeyInit(&key[0],
+ Anum_pg_attribute_attrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(child_rel)));
+ scan = systable_beginscan(catalogRelation, AttributeRelidNumIndexId,
+ true, NULL, 1, key);
+ while (HeapTupleIsValid(attributeTuple = systable_getnext(scan)))
+ {
+ Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attributeTuple);
+
+ /* Ignore if dropped or not inherited */
+ if (att->attisdropped)
+ continue;
+ if (att->attinhcount <= 0)
+ continue;
+
+ if (SearchSysCacheExistsAttName(RelationGetRelid(parent_rel),
+ NameStr(att->attname)))
+ {
+ /* Decrement inhcount and possibly set islocal to true */
+ HeapTuple copyTuple = heap_copytuple(attributeTuple);
+ Form_pg_attribute copy_att = (Form_pg_attribute) GETSTRUCT(copyTuple);
+
+ copy_att->attinhcount--;
+ if (copy_att->attinhcount == 0)
+ copy_att->attislocal = true;
+
+ CatalogTupleUpdate(catalogRelation, &copyTuple->t_self, copyTuple);
+ heap_freetuple(copyTuple);
+ }
+ }
+ systable_endscan(scan);
+ table_close(catalogRelation, RowExclusiveLock);
+
+ /*
+ * Likewise, find inherited check constraints and disinherit them. To do
+ * this, we first need a list of the names of the parent's check
+ * constraints. (We cheat a bit by only checking for name matches,
+ * assuming that the expressions will match.)
+ */
+ catalogRelation = table_open(ConstraintRelationId, RowExclusiveLock);
+ ScanKeyInit(&key[0],
+ Anum_pg_constraint_conrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(parent_rel)));
+ scan = systable_beginscan(catalogRelation, ConstraintRelidTypidNameIndexId,
+ true, NULL, 1, key);
+
+ connames = NIL;
+
+ while (HeapTupleIsValid(constraintTuple = systable_getnext(scan)))
+ {
+ Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(constraintTuple);
+
+ if (con->contype == CONSTRAINT_CHECK)
+ connames = lappend(connames, pstrdup(NameStr(con->conname)));
+ }
+
+ systable_endscan(scan);
+
+ /* Now scan the child's constraints */
+ ScanKeyInit(&key[0],
+ Anum_pg_constraint_conrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(child_rel)));
+ scan = systable_beginscan(catalogRelation, ConstraintRelidTypidNameIndexId,
+ true, NULL, 1, key);
+
+ while (HeapTupleIsValid(constraintTuple = systable_getnext(scan)))
+ {
+ Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(constraintTuple);
+ bool match;
+ ListCell *lc;
+
+ if (con->contype != CONSTRAINT_CHECK)
+ continue;
+
+ match = false;
+ foreach(lc, connames)
+ {
+ if (strcmp(NameStr(con->conname), (char *) lfirst(lc)) == 0)
+ {
+ match = true;
+ break;
+ }
+ }
+
+ if (match)
+ {
+ /* Decrement inhcount and possibly set islocal to true */
+ HeapTuple copyTuple = heap_copytuple(constraintTuple);
+ Form_pg_constraint copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple);
+
+ if (copy_con->coninhcount <= 0) /* shouldn't happen */
+ elog(ERROR, "relation %u has non-inherited constraint \"%s\"",
+ RelationGetRelid(child_rel), NameStr(copy_con->conname));
+
+ copy_con->coninhcount--;
+ if (copy_con->coninhcount == 0)
+ copy_con->conislocal = true;
+
+ CatalogTupleUpdate(catalogRelation, &copyTuple->t_self, copyTuple);
+ heap_freetuple(copyTuple);
+ }
+ }
+
+ systable_endscan(scan);
+ table_close(catalogRelation, RowExclusiveLock);
+
+ drop_parent_dependency(RelationGetRelid(child_rel),
+ RelationRelationId,
+ RelationGetRelid(parent_rel),
+ child_dependency_type(child_is_partition));
+
+ /*
+ * Post alter hook of this inherits. Since object_access_hook doesn't take
+ * multiple object identifiers, we relay oid of parent relation using
+ * auxiliary_id argument.
+ */
+ InvokeObjectPostAlterHookArg(InheritsRelationId,
+ RelationGetRelid(child_rel), 0,
+ RelationGetRelid(parent_rel), false);
+}
+
+/*
+ * Drop the dependency created by StoreCatalogInheritance1 (CREATE TABLE
+ * INHERITS/ALTER TABLE INHERIT -- refclassid will be RelationRelationId) or
+ * heap_create_with_catalog (CREATE TABLE OF/ALTER TABLE OF -- refclassid will
+ * be TypeRelationId). There's no convenient way to do this, so go trawling
+ * through pg_depend.
+ */
+static void
+drop_parent_dependency(Oid relid, Oid refclassid, Oid refobjid,
+ DependencyType deptype)
+{
+ Relation catalogRelation;
+ SysScanDesc scan;
+ ScanKeyData key[3];
+ HeapTuple depTuple;
+
+ catalogRelation = table_open(DependRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_depend_classid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationRelationId));
+ ScanKeyInit(&key[1],
+ Anum_pg_depend_objid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid));
+ ScanKeyInit(&key[2],
+ Anum_pg_depend_objsubid,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(0));
+
+ scan = systable_beginscan(catalogRelation, DependDependerIndexId, true,
+ NULL, 3, key);
+
+ while (HeapTupleIsValid(depTuple = systable_getnext(scan)))
+ {
+ Form_pg_depend dep = (Form_pg_depend) GETSTRUCT(depTuple);
+
+ if (dep->refclassid == refclassid &&
+ dep->refobjid == refobjid &&
+ dep->refobjsubid == 0 &&
+ dep->deptype == deptype)
+ CatalogTupleDelete(catalogRelation, &depTuple->t_self);
+ }
+
+ systable_endscan(scan);
+ table_close(catalogRelation, RowExclusiveLock);
+}
+
+/*
+ * ALTER TABLE OF
+ *
+ * Attach a table to a composite type, as though it had been created with CREATE
+ * TABLE OF. All attname, atttypid, atttypmod and attcollation must match. The
+ * subject table must not have inheritance parents. These restrictions ensure
+ * that you cannot create a configuration impossible with CREATE TABLE OF alone.
+ *
+ * The address of the type is returned.
+ */
+static ObjectAddress
+ATExecAddOf(Relation rel, const TypeName *ofTypename, LOCKMODE lockmode)
+{
+ Oid relid = RelationGetRelid(rel);
+ Type typetuple;
+ Form_pg_type typeform;
+ Oid typeid;
+ Relation inheritsRelation,
+ relationRelation;
+ SysScanDesc scan;
+ ScanKeyData key;
+ AttrNumber table_attno,
+ type_attno;
+ TupleDesc typeTupleDesc,
+ tableTupleDesc;
+ ObjectAddress tableobj,
+ typeobj;
+ HeapTuple classtuple;
+
+ /* Validate the type. */
+ typetuple = typenameType(NULL, ofTypename, NULL);
+ check_of_type(typetuple);
+ typeform = (Form_pg_type) GETSTRUCT(typetuple);
+ typeid = typeform->oid;
+
+ /* Fail if the table has any inheritance parents. */
+ inheritsRelation = table_open(InheritsRelationId, AccessShareLock);
+ ScanKeyInit(&key,
+ Anum_pg_inherits_inhrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid));
+ scan = systable_beginscan(inheritsRelation, InheritsRelidSeqnoIndexId,
+ true, NULL, 1, &key);
+ if (HeapTupleIsValid(systable_getnext(scan)))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("typed tables cannot inherit")));
+ systable_endscan(scan);
+ table_close(inheritsRelation, AccessShareLock);
+
+ /*
+ * Check the tuple descriptors for compatibility. Unlike inheritance, we
+ * require that the order also match. However, attnotnull need not match.
+ */
+ typeTupleDesc = lookup_rowtype_tupdesc(typeid, -1);
+ tableTupleDesc = RelationGetDescr(rel);
+ table_attno = 1;
+ for (type_attno = 1; type_attno <= typeTupleDesc->natts; type_attno++)
+ {
+ Form_pg_attribute type_attr,
+ table_attr;
+ const char *type_attname,
+ *table_attname;
+
+ /* Get the next non-dropped type attribute. */
+ type_attr = TupleDescAttr(typeTupleDesc, type_attno - 1);
+ if (type_attr->attisdropped)
+ continue;
+ type_attname = NameStr(type_attr->attname);
+
+ /* Get the next non-dropped table attribute. */
+ do
+ {
+ if (table_attno > tableTupleDesc->natts)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("table is missing column \"%s\"",
+ type_attname)));
+ table_attr = TupleDescAttr(tableTupleDesc, table_attno - 1);
+ table_attno++;
+ } while (table_attr->attisdropped);
+ table_attname = NameStr(table_attr->attname);
+
+ /* Compare name. */
+ if (strncmp(table_attname, type_attname, NAMEDATALEN) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("table has column \"%s\" where type requires \"%s\"",
+ table_attname, type_attname)));
+
+ /* Compare type. */
+ if (table_attr->atttypid != type_attr->atttypid ||
+ table_attr->atttypmod != type_attr->atttypmod ||
+ table_attr->attcollation != type_attr->attcollation)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("table \"%s\" has different type for column \"%s\"",
+ RelationGetRelationName(rel), type_attname)));
+ }
+ ReleaseTupleDesc(typeTupleDesc);
+
+ /* Any remaining columns at the end of the table had better be dropped. */
+ for (; table_attno <= tableTupleDesc->natts; table_attno++)
+ {
+ Form_pg_attribute table_attr = TupleDescAttr(tableTupleDesc,
+ table_attno - 1);
+
+ if (!table_attr->attisdropped)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("table has extra column \"%s\"",
+ NameStr(table_attr->attname))));
+ }
+
+ /* If the table was already typed, drop the existing dependency. */
+ if (rel->rd_rel->reloftype)
+ drop_parent_dependency(relid, TypeRelationId, rel->rd_rel->reloftype,
+ DEPENDENCY_NORMAL);
+
+ /* Record a dependency on the new type. */
+ tableobj.classId = RelationRelationId;
+ tableobj.objectId = relid;
+ tableobj.objectSubId = 0;
+ typeobj.classId = TypeRelationId;
+ typeobj.objectId = typeid;
+ typeobj.objectSubId = 0;
+ recordDependencyOn(&tableobj, &typeobj, DEPENDENCY_NORMAL);
+
+ /* Update pg_class.reloftype */
+ relationRelation = table_open(RelationRelationId, RowExclusiveLock);
+ classtuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(classtuple))
+ elog(ERROR, "cache lookup failed for relation %u", relid);
+ ((Form_pg_class) GETSTRUCT(classtuple))->reloftype = typeid;
+ CatalogTupleUpdate(relationRelation, &classtuple->t_self, classtuple);
+
+ InvokeObjectPostAlterHook(RelationRelationId, relid, 0);
+
+ heap_freetuple(classtuple);
+ table_close(relationRelation, RowExclusiveLock);
+
+ ReleaseSysCache(typetuple);
+
+ return typeobj;
+}
+
+/*
+ * ALTER TABLE NOT OF
+ *
+ * Detach a typed table from its originating type. Just clear reloftype and
+ * remove the dependency.
+ */
+static void
+ATExecDropOf(Relation rel, LOCKMODE lockmode)
+{
+ Oid relid = RelationGetRelid(rel);
+ Relation relationRelation;
+ HeapTuple tuple;
+
+ if (!OidIsValid(rel->rd_rel->reloftype))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a typed table",
+ RelationGetRelationName(rel))));
+
+ /*
+ * We don't bother to check ownership of the type --- ownership of the
+ * table is presumed enough rights. No lock required on the type, either.
+ */
+
+ drop_parent_dependency(relid, TypeRelationId, rel->rd_rel->reloftype,
+ DEPENDENCY_NORMAL);
+
+ /* Clear pg_class.reloftype */
+ relationRelation = table_open(RelationRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", relid);
+ ((Form_pg_class) GETSTRUCT(tuple))->reloftype = InvalidOid;
+ CatalogTupleUpdate(relationRelation, &tuple->t_self, tuple);
+
+ InvokeObjectPostAlterHook(RelationRelationId, relid, 0);
+
+ heap_freetuple(tuple);
+ table_close(relationRelation, RowExclusiveLock);
+}
+
+/*
+ * relation_mark_replica_identity: Update a table's replica identity
+ *
+ * Iff ri_type = REPLICA_IDENTITY_INDEX, indexOid must be the Oid of a suitable
+ * index. Otherwise, it must be InvalidOid.
+ *
+ * Caller had better hold an exclusive lock on the relation, as the results
+ * of running two of these concurrently wouldn't be pretty.
+ */
+static void
+relation_mark_replica_identity(Relation rel, char ri_type, Oid indexOid,
+ bool is_internal)
+{
+ Relation pg_index;
+ Relation pg_class;
+ HeapTuple pg_class_tuple;
+ HeapTuple pg_index_tuple;
+ Form_pg_class pg_class_form;
+ Form_pg_index pg_index_form;
+ ListCell *index;
+
+ /*
+ * Check whether relreplident has changed, and update it if so.
+ */
+ pg_class = table_open(RelationRelationId, RowExclusiveLock);
+ pg_class_tuple = SearchSysCacheCopy1(RELOID,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+ if (!HeapTupleIsValid(pg_class_tuple))
+ elog(ERROR, "cache lookup failed for relation \"%s\"",
+ RelationGetRelationName(rel));
+ pg_class_form = (Form_pg_class) GETSTRUCT(pg_class_tuple);
+ if (pg_class_form->relreplident != ri_type)
+ {
+ pg_class_form->relreplident = ri_type;
+ CatalogTupleUpdate(pg_class, &pg_class_tuple->t_self, pg_class_tuple);
+ }
+ table_close(pg_class, RowExclusiveLock);
+ heap_freetuple(pg_class_tuple);
+
+ /*
+ * Update the per-index indisreplident flags correctly.
+ */
+ pg_index = table_open(IndexRelationId, RowExclusiveLock);
+ foreach(index, RelationGetIndexList(rel))
+ {
+ Oid thisIndexOid = lfirst_oid(index);
+ bool dirty = false;
+
+ pg_index_tuple = SearchSysCacheCopy1(INDEXRELID,
+ ObjectIdGetDatum(thisIndexOid));
+ if (!HeapTupleIsValid(pg_index_tuple))
+ elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
+ pg_index_form = (Form_pg_index) GETSTRUCT(pg_index_tuple);
+
+ if (thisIndexOid == indexOid)
+ {
+ /* Set the bit if not already set. */
+ if (!pg_index_form->indisreplident)
+ {
+ dirty = true;
+ pg_index_form->indisreplident = true;
+ }
+ }
+ else
+ {
+ /* Unset the bit if set. */
+ if (pg_index_form->indisreplident)
+ {
+ dirty = true;
+ pg_index_form->indisreplident = false;
+ }
+ }
+
+ if (dirty)
+ {
+ CatalogTupleUpdate(pg_index, &pg_index_tuple->t_self, pg_index_tuple);
+ InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
+ InvalidOid, is_internal);
+
+ /*
+ * Invalidate the relcache for the table, so that after we commit
+ * all sessions will refresh the table's replica identity index
+ * before attempting any UPDATE or DELETE on the table. (If we
+ * changed the table's pg_class row above, then a relcache inval
+ * is already queued due to that; but we might not have.)
+ */
+ CacheInvalidateRelcache(rel);
+ }
+ heap_freetuple(pg_index_tuple);
+ }
+
+ table_close(pg_index, RowExclusiveLock);
+}
+
+/*
+ * ALTER TABLE <name> REPLICA IDENTITY ...
+ */
+static void
+ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKMODE lockmode)
+{
+ Oid indexOid;
+ Relation indexRel;
+ int key;
+
+ if (stmt->identity_type == REPLICA_IDENTITY_DEFAULT)
+ {
+ relation_mark_replica_identity(rel, stmt->identity_type, InvalidOid, true);
+ return;
+ }
+ else if (stmt->identity_type == REPLICA_IDENTITY_FULL)
+ {
+ relation_mark_replica_identity(rel, stmt->identity_type, InvalidOid, true);
+ return;
+ }
+ else if (stmt->identity_type == REPLICA_IDENTITY_NOTHING)
+ {
+ relation_mark_replica_identity(rel, stmt->identity_type, InvalidOid, true);
+ return;
+ }
+ else if (stmt->identity_type == REPLICA_IDENTITY_INDEX)
+ {
+ /* fallthrough */ ;
+ }
+ else
+ elog(ERROR, "unexpected identity type %u", stmt->identity_type);
+
+ /* Check that the index exists */
+ indexOid = get_relname_relid(stmt->name, rel->rd_rel->relnamespace);
+ if (!OidIsValid(indexOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("index \"%s\" for table \"%s\" does not exist",
+ stmt->name, RelationGetRelationName(rel))));
+
+ indexRel = index_open(indexOid, ShareLock);
+
+ /* Check that the index is on the relation we're altering. */
+ if (indexRel->rd_index == NULL ||
+ indexRel->rd_index->indrelid != RelationGetRelid(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not an index for table \"%s\"",
+ RelationGetRelationName(indexRel),
+ RelationGetRelationName(rel))));
+ /* The AM must support uniqueness, and the index must in fact be unique. */
+ if (!indexRel->rd_indam->amcanunique ||
+ !indexRel->rd_index->indisunique)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot use non-unique index \"%s\" as replica identity",
+ RelationGetRelationName(indexRel))));
+ /* Deferred indexes are not guaranteed to be always unique. */
+ if (!indexRel->rd_index->indimmediate)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot use non-immediate index \"%s\" as replica identity",
+ RelationGetRelationName(indexRel))));
+ /* Expression indexes aren't supported. */
+ if (RelationGetIndexExpressions(indexRel) != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot use expression index \"%s\" as replica identity",
+ RelationGetRelationName(indexRel))));
+ /* Predicate indexes aren't supported. */
+ if (RelationGetIndexPredicate(indexRel) != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot use partial index \"%s\" as replica identity",
+ RelationGetRelationName(indexRel))));
+
+ /* Check index for nullable columns. */
+ for (key = 0; key < IndexRelationGetNumberOfKeyAttributes(indexRel); key++)
+ {
+ int16 attno = indexRel->rd_index->indkey.values[key];
+ Form_pg_attribute attr;
+
+ /*
+ * Reject any other system columns. (Going forward, we'll disallow
+ * indexes containing such columns in the first place, but they might
+ * exist in older branches.)
+ */
+ if (attno <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("index \"%s\" cannot be used as replica identity because column %d is a system column",
+ RelationGetRelationName(indexRel), attno)));
+
+ attr = TupleDescAttr(rel->rd_att, attno - 1);
+ if (!attr->attnotnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("index \"%s\" cannot be used as replica identity because column \"%s\" is nullable",
+ RelationGetRelationName(indexRel),
+ NameStr(attr->attname))));
+ }
+
+ /* This index is suitable for use as a replica identity. Mark it. */
+ relation_mark_replica_identity(rel, stmt->identity_type, indexOid, true);
+
+ index_close(indexRel, NoLock);
+}
+
+/*
+ * ALTER TABLE ENABLE/DISABLE ROW LEVEL SECURITY
+ */
+static void
+ATExecSetRowSecurity(Relation rel, bool rls)
+{
+ Relation pg_class;
+ Oid relid;
+ HeapTuple tuple;
+
+ relid = RelationGetRelid(rel);
+
+ /* Pull the record for this relation and update it */
+ pg_class = table_open(RelationRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", relid);
+
+ ((Form_pg_class) GETSTRUCT(tuple))->relrowsecurity = rls;
+ CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
+
+ table_close(pg_class, RowExclusiveLock);
+ heap_freetuple(tuple);
+}
+
+/*
+ * ALTER TABLE FORCE/NO FORCE ROW LEVEL SECURITY
+ */
+static void
+ATExecForceNoForceRowSecurity(Relation rel, bool force_rls)
+{
+ Relation pg_class;
+ Oid relid;
+ HeapTuple tuple;
+
+ relid = RelationGetRelid(rel);
+
+ pg_class = table_open(RelationRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", relid);
+
+ ((Form_pg_class) GETSTRUCT(tuple))->relforcerowsecurity = force_rls;
+ CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
+
+ table_close(pg_class, RowExclusiveLock);
+ heap_freetuple(tuple);
+}
+
+/*
+ * ALTER FOREIGN TABLE <name> OPTIONS (...)
+ */
+static void
+ATExecGenericOptions(Relation rel, List *options)
+{
+ Relation ftrel;
+ ForeignServer *server;
+ ForeignDataWrapper *fdw;
+ HeapTuple tuple;
+ bool isnull;
+ Datum repl_val[Natts_pg_foreign_table];
+ bool repl_null[Natts_pg_foreign_table];
+ bool repl_repl[Natts_pg_foreign_table];
+ Datum datum;
+ Form_pg_foreign_table tableform;
+
+ if (options == NIL)
+ return;
+
+ ftrel = table_open(ForeignTableRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCacheCopy1(FOREIGNTABLEREL, rel->rd_id);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("foreign table \"%s\" does not exist",
+ RelationGetRelationName(rel))));
+ tableform = (Form_pg_foreign_table) GETSTRUCT(tuple);
+ server = GetForeignServer(tableform->ftserver);
+ fdw = GetForeignDataWrapper(server->fdwid);
+
+ memset(repl_val, 0, sizeof(repl_val));
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ /* Extract the current options */
+ datum = SysCacheGetAttr(FOREIGNTABLEREL,
+ tuple,
+ Anum_pg_foreign_table_ftoptions,
+ &isnull);
+ if (isnull)
+ datum = PointerGetDatum(NULL);
+
+ /* Transform the options */
+ datum = transformGenericOptions(ForeignTableRelationId,
+ datum,
+ options,
+ fdw->fdwvalidator);
+
+ if (PointerIsValid(DatumGetPointer(datum)))
+ repl_val[Anum_pg_foreign_table_ftoptions - 1] = datum;
+ else
+ repl_null[Anum_pg_foreign_table_ftoptions - 1] = true;
+
+ repl_repl[Anum_pg_foreign_table_ftoptions - 1] = true;
+
+ /* Everything looks good - update the tuple */
+
+ tuple = heap_modify_tuple(tuple, RelationGetDescr(ftrel),
+ repl_val, repl_null, repl_repl);
+
+ CatalogTupleUpdate(ftrel, &tuple->t_self, tuple);
+
+ /*
+ * Invalidate relcache so that all sessions will refresh any cached plans
+ * that might depend on the old options.
+ */
+ CacheInvalidateRelcache(rel);
+
+ InvokeObjectPostAlterHook(ForeignTableRelationId,
+ RelationGetRelid(rel), 0);
+
+ table_close(ftrel, RowExclusiveLock);
+
+ heap_freetuple(tuple);
+}
+
+/*
+ * ALTER TABLE ALTER COLUMN SET COMPRESSION
+ *
+ * Return value is the address of the modified column
+ */
+static ObjectAddress
+ATExecSetCompression(AlteredTableInfo *tab,
+ Relation rel,
+ const char *column,
+ Node *newValue,
+ LOCKMODE lockmode)
+{
+ Relation attrel;
+ HeapTuple tuple;
+ Form_pg_attribute atttableform;
+ AttrNumber attnum;
+ char *compression;
+ char cmethod;
+ ObjectAddress address;
+
+ Assert(IsA(newValue, String));
+ compression = strVal(newValue);
+
+ attrel = table_open(AttributeRelationId, RowExclusiveLock);
+
+ /* copy the cache entry so we can scribble on it below */
+ tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), column);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ column, RelationGetRelationName(rel))));
+
+ /* prevent them from altering a system attribute */
+ atttableform = (Form_pg_attribute) GETSTRUCT(tuple);
+ attnum = atttableform->attnum;
+ if (attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot alter system column \"%s\"", column)));
+
+ /*
+ * Check that column type is compressible, then get the attribute
+ * compression method code
+ */
+ cmethod = GetAttributeCompression(atttableform->atttypid, compression);
+
+ /* update pg_attribute entry */
+ atttableform->attcompression = cmethod;
+ CatalogTupleUpdate(attrel, &tuple->t_self, tuple);
+
+ InvokeObjectPostAlterHook(RelationRelationId,
+ RelationGetRelid(rel),
+ attnum);
+
+ /*
+ * Apply the change to indexes as well (only for simple index columns,
+ * matching behavior of index.c ConstructTupleDescriptor()).
+ */
+ SetIndexStorageProperties(rel, attrel, attnum,
+ false, 0,
+ true, cmethod,
+ lockmode);
+
+ heap_freetuple(tuple);
+
+ table_close(attrel, RowExclusiveLock);
+
+ /* make changes visible */
+ CommandCounterIncrement();
+
+ ObjectAddressSubSet(address, RelationRelationId,
+ RelationGetRelid(rel), attnum);
+ return address;
+}
+
+
+/*
+ * Preparation phase for SET LOGGED/UNLOGGED
+ *
+ * This verifies that we're not trying to change a temp table. Also,
+ * existing foreign key constraints are checked to avoid ending up with
+ * permanent tables referencing unlogged tables.
+ *
+ * Return value is false if the operation is a no-op (in which case the
+ * checks are skipped), otherwise true.
+ */
+static bool
+ATPrepChangePersistence(Relation rel, bool toLogged)
+{
+ Relation pg_constraint;
+ HeapTuple tuple;
+ SysScanDesc scan;
+ ScanKeyData skey[1];
+
+ /*
+ * Disallow changing status for a temp table. Also verify whether we can
+ * get away with doing nothing; in such cases we don't need to run the
+ * checks below, either.
+ */
+ switch (rel->rd_rel->relpersistence)
+ {
+ case RELPERSISTENCE_TEMP:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot change logged status of table \"%s\" because it is temporary",
+ RelationGetRelationName(rel)),
+ errtable(rel)));
+ break;
+ case RELPERSISTENCE_PERMANENT:
+ if (toLogged)
+ /* nothing to do */
+ return false;
+ break;
+ case RELPERSISTENCE_UNLOGGED:
+ if (!toLogged)
+ /* nothing to do */
+ return false;
+ break;
+ }
+
+ /*
+ * Check that the table is not part any publication when changing to
+ * UNLOGGED as UNLOGGED tables can't be published.
+ */
+ if (!toLogged &&
+ list_length(GetRelationPublications(RelationGetRelid(rel))) > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot change table \"%s\" to unlogged because it is part of a publication",
+ RelationGetRelationName(rel)),
+ errdetail("Unlogged relations cannot be replicated.")));
+
+ /*
+ * Check existing foreign key constraints to preserve the invariant that
+ * permanent tables cannot reference unlogged ones. Self-referencing
+ * foreign keys can safely be ignored.
+ */
+ pg_constraint = table_open(ConstraintRelationId, AccessShareLock);
+
+ /*
+ * Scan conrelid if changing to permanent, else confrelid. This also
+ * determines whether a useful index exists.
+ */
+ ScanKeyInit(&skey[0],
+ toLogged ? Anum_pg_constraint_conrelid :
+ Anum_pg_constraint_confrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+ scan = systable_beginscan(pg_constraint,
+ toLogged ? ConstraintRelidTypidNameIndexId : InvalidOid,
+ true, NULL, 1, skey);
+
+ while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+ {
+ Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple);
+
+ if (con->contype == CONSTRAINT_FOREIGN)
+ {
+ Oid foreignrelid;
+ Relation foreignrel;
+
+ /* the opposite end of what we used as scankey */
+ foreignrelid = toLogged ? con->confrelid : con->conrelid;
+
+ /* ignore if self-referencing */
+ if (RelationGetRelid(rel) == foreignrelid)
+ continue;
+
+ foreignrel = relation_open(foreignrelid, AccessShareLock);
+
+ if (toLogged)
+ {
+ if (!RelationIsPermanent(foreignrel))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("could not change table \"%s\" to logged because it references unlogged table \"%s\"",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(foreignrel)),
+ errtableconstraint(rel, NameStr(con->conname))));
+ }
+ else
+ {
+ if (RelationIsPermanent(foreignrel))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("could not change table \"%s\" to unlogged because it references logged table \"%s\"",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(foreignrel)),
+ errtableconstraint(rel, NameStr(con->conname))));
+ }
+
+ relation_close(foreignrel, AccessShareLock);
+ }
+ }
+
+ systable_endscan(scan);
+
+ table_close(pg_constraint, AccessShareLock);
+
+ return true;
+}
+
+/*
+ * Execute ALTER TABLE SET SCHEMA
+ */
+ObjectAddress
+AlterTableNamespace(AlterObjectSchemaStmt *stmt, Oid *oldschema)
+{
+ Relation rel;
+ Oid relid;
+ Oid oldNspOid;
+ Oid nspOid;
+ RangeVar *newrv;
+ ObjectAddresses *objsMoved;
+ ObjectAddress myself;
+
+ relid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock,
+ stmt->missing_ok ? RVR_MISSING_OK : 0,
+ RangeVarCallbackForAlterRelation,
+ (void *) stmt);
+
+ if (!OidIsValid(relid))
+ {
+ ereport(NOTICE,
+ (errmsg("relation \"%s\" does not exist, skipping",
+ stmt->relation->relname)));
+ return InvalidObjectAddress;
+ }
+
+ rel = relation_open(relid, NoLock);
+
+ oldNspOid = RelationGetNamespace(rel);
+
+ /* If it's an owned sequence, disallow moving it by itself. */
+ if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
+ {
+ Oid tableId;
+ int32 colId;
+
+ if (sequenceIsOwned(relid, DEPENDENCY_AUTO, &tableId, &colId) ||
+ sequenceIsOwned(relid, DEPENDENCY_INTERNAL, &tableId, &colId))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot move an owned sequence into another schema"),
+ errdetail("Sequence \"%s\" is linked to table \"%s\".",
+ RelationGetRelationName(rel),
+ get_rel_name(tableId))));
+ }
+
+ /* Get and lock schema OID and check its permissions. */
+ newrv = makeRangeVar(stmt->newschema, RelationGetRelationName(rel), -1);
+ nspOid = RangeVarGetAndCheckCreationNamespace(newrv, NoLock, NULL);
+
+ /* common checks on switching namespaces */
+ CheckSetNamespace(oldNspOid, nspOid);
+
+ objsMoved = new_object_addresses();
+ AlterTableNamespaceInternal(rel, oldNspOid, nspOid, objsMoved);
+ free_object_addresses(objsMoved);
+
+ ObjectAddressSet(myself, RelationRelationId, relid);
+
+ if (oldschema)
+ *oldschema = oldNspOid;
+
+ /* close rel, but keep lock until commit */
+ relation_close(rel, NoLock);
+
+ return myself;
+}
+
+/*
+ * The guts of relocating a table or materialized view to another namespace:
+ * besides moving the relation itself, its dependent objects are relocated to
+ * the new schema.
+ */
+void
+AlterTableNamespaceInternal(Relation rel, Oid oldNspOid, Oid nspOid,
+ ObjectAddresses *objsMoved)
+{
+ Relation classRel;
+
+ Assert(objsMoved != NULL);
+
+ /* OK, modify the pg_class row and pg_depend entry */
+ classRel = table_open(RelationRelationId, RowExclusiveLock);
+
+ AlterRelationNamespaceInternal(classRel, RelationGetRelid(rel), oldNspOid,
+ nspOid, true, objsMoved);
+
+ /* Fix the table's row type too, if it has one */
+ if (OidIsValid(rel->rd_rel->reltype))
+ AlterTypeNamespaceInternal(rel->rd_rel->reltype,
+ nspOid, false, false, objsMoved);
+
+ /* Fix other dependent stuff */
+ if (rel->rd_rel->relkind == RELKIND_RELATION ||
+ rel->rd_rel->relkind == RELKIND_MATVIEW ||
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ AlterIndexNamespaces(classRel, rel, oldNspOid, nspOid, objsMoved);
+ AlterSeqNamespaces(classRel, rel, oldNspOid, nspOid,
+ objsMoved, AccessExclusiveLock);
+ AlterConstraintNamespaces(RelationGetRelid(rel), oldNspOid, nspOid,
+ false, objsMoved);
+ }
+
+ table_close(classRel, RowExclusiveLock);
+}
+
+/*
+ * The guts of relocating a relation to another namespace: fix the pg_class
+ * entry, and the pg_depend entry if any. Caller must already have
+ * opened and write-locked pg_class.
+ */
+void
+AlterRelationNamespaceInternal(Relation classRel, Oid relOid,
+ Oid oldNspOid, Oid newNspOid,
+ bool hasDependEntry,
+ ObjectAddresses *objsMoved)
+{
+ HeapTuple classTup;
+ Form_pg_class classForm;
+ ObjectAddress thisobj;
+ bool already_done = false;
+
+ classTup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relOid));
+ if (!HeapTupleIsValid(classTup))
+ elog(ERROR, "cache lookup failed for relation %u", relOid);
+ classForm = (Form_pg_class) GETSTRUCT(classTup);
+
+ Assert(classForm->relnamespace == oldNspOid);
+
+ thisobj.classId = RelationRelationId;
+ thisobj.objectId = relOid;
+ thisobj.objectSubId = 0;
+
+ /*
+ * If the object has already been moved, don't move it again. If it's
+ * already in the right place, don't move it, but still fire the object
+ * access hook.
+ */
+ already_done = object_address_present(&thisobj, objsMoved);
+ if (!already_done && oldNspOid != newNspOid)
+ {
+ /* check for duplicate name (more friendly than unique-index failure) */
+ if (get_relname_relid(NameStr(classForm->relname),
+ newNspOid) != InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_TABLE),
+ errmsg("relation \"%s\" already exists in schema \"%s\"",
+ NameStr(classForm->relname),
+ get_namespace_name(newNspOid))));
+
+ /* classTup is a copy, so OK to scribble on */
+ classForm->relnamespace = newNspOid;
+
+ CatalogTupleUpdate(classRel, &classTup->t_self, classTup);
+
+ /* Update dependency on schema if caller said so */
+ if (hasDependEntry &&
+ changeDependencyFor(RelationRelationId,
+ relOid,
+ NamespaceRelationId,
+ oldNspOid,
+ newNspOid) != 1)
+ elog(ERROR, "failed to change schema dependency for relation \"%s\"",
+ NameStr(classForm->relname));
+ }
+ if (!already_done)
+ {
+ add_exact_object_address(&thisobj, objsMoved);
+
+ InvokeObjectPostAlterHook(RelationRelationId, relOid, 0);
+ }
+
+ heap_freetuple(classTup);
+}
+
+/*
+ * Move all indexes for the specified relation to another namespace.
+ *
+ * Note: we assume adequate permission checking was done by the caller,
+ * and that the caller has a suitable lock on the owning relation.
+ */
+static void
+AlterIndexNamespaces(Relation classRel, Relation rel,
+ Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved)
+{
+ List *indexList;
+ ListCell *l;
+
+ indexList = RelationGetIndexList(rel);
+
+ foreach(l, indexList)
+ {
+ Oid indexOid = lfirst_oid(l);
+ ObjectAddress thisobj;
+
+ thisobj.classId = RelationRelationId;
+ thisobj.objectId = indexOid;
+ thisobj.objectSubId = 0;
+
+ /*
+ * Note: currently, the index will not have its own dependency on the
+ * namespace, so we don't need to do changeDependencyFor(). There's no
+ * row type in pg_type, either.
+ *
+ * XXX this objsMoved test may be pointless -- surely we have a single
+ * dependency link from a relation to each index?
+ */
+ if (!object_address_present(&thisobj, objsMoved))
+ {
+ AlterRelationNamespaceInternal(classRel, indexOid,
+ oldNspOid, newNspOid,
+ false, objsMoved);
+ add_exact_object_address(&thisobj, objsMoved);
+ }
+ }
+
+ list_free(indexList);
+}
+
+/*
+ * Move all identity and SERIAL-column sequences of the specified relation to another
+ * namespace.
+ *
+ * Note: we assume adequate permission checking was done by the caller,
+ * and that the caller has a suitable lock on the owning relation.
+ */
+static void
+AlterSeqNamespaces(Relation classRel, Relation rel,
+ Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved,
+ LOCKMODE lockmode)
+{
+ Relation depRel;
+ SysScanDesc scan;
+ ScanKeyData key[2];
+ HeapTuple tup;
+
+ /*
+ * SERIAL sequences are those having an auto dependency on one of the
+ * table's columns (we don't care *which* column, exactly).
+ */
+ depRel = table_open(DependRelationId, AccessShareLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_depend_refclassid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationRelationId));
+ ScanKeyInit(&key[1],
+ Anum_pg_depend_refobjid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+ /* we leave refobjsubid unspecified */
+
+ scan = systable_beginscan(depRel, DependReferenceIndexId, true,
+ NULL, 2, key);
+
+ while (HeapTupleIsValid(tup = systable_getnext(scan)))
+ {
+ Form_pg_depend depForm = (Form_pg_depend) GETSTRUCT(tup);
+ Relation seqRel;
+
+ /* skip dependencies other than auto dependencies on columns */
+ if (depForm->refobjsubid == 0 ||
+ depForm->classid != RelationRelationId ||
+ depForm->objsubid != 0 ||
+ !(depForm->deptype == DEPENDENCY_AUTO || depForm->deptype == DEPENDENCY_INTERNAL))
+ continue;
+
+ /* Use relation_open just in case it's an index */
+ seqRel = relation_open(depForm->objid, lockmode);
+
+ /* skip non-sequence relations */
+ if (RelationGetForm(seqRel)->relkind != RELKIND_SEQUENCE)
+ {
+ /* No need to keep the lock */
+ relation_close(seqRel, lockmode);
+ continue;
+ }
+
+ /* Fix the pg_class and pg_depend entries */
+ AlterRelationNamespaceInternal(classRel, depForm->objid,
+ oldNspOid, newNspOid,
+ true, objsMoved);
+
+ /*
+ * Sequences used to have entries in pg_type, but no longer do. If we
+ * ever re-instate that, we'll need to move the pg_type entry to the
+ * new namespace, too (using AlterTypeNamespaceInternal).
+ */
+ Assert(RelationGetForm(seqRel)->reltype == InvalidOid);
+
+ /* Now we can close it. Keep the lock till end of transaction. */
+ relation_close(seqRel, NoLock);
+ }
+
+ systable_endscan(scan);
+
+ relation_close(depRel, AccessShareLock);
+}
+
+
+/*
+ * This code supports
+ * CREATE TEMP TABLE ... ON COMMIT { DROP | PRESERVE ROWS | DELETE ROWS }
+ *
+ * Because we only support this for TEMP tables, it's sufficient to remember
+ * the state in a backend-local data structure.
+ */
+
+/*
+ * Register a newly-created relation's ON COMMIT action.
+ */
+void
+register_on_commit_action(Oid relid, OnCommitAction action)
+{
+ OnCommitItem *oc;
+ MemoryContext oldcxt;
+
+ /*
+ * We needn't bother registering the relation unless there is an ON COMMIT
+ * action we need to take.
+ */
+ if (action == ONCOMMIT_NOOP || action == ONCOMMIT_PRESERVE_ROWS)
+ return;
+
+ oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+
+ oc = (OnCommitItem *) palloc(sizeof(OnCommitItem));
+ oc->relid = relid;
+ oc->oncommit = action;
+ oc->creating_subid = GetCurrentSubTransactionId();
+ oc->deleting_subid = InvalidSubTransactionId;
+
+ /*
+ * We use lcons() here so that ON COMMIT actions are processed in reverse
+ * order of registration. That might not be essential but it seems
+ * reasonable.
+ */
+ on_commits = lcons(oc, on_commits);
+
+ MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * Unregister any ON COMMIT action when a relation is deleted.
+ *
+ * Actually, we only mark the OnCommitItem entry as to be deleted after commit.
+ */
+void
+remove_on_commit_action(Oid relid)
+{
+ ListCell *l;
+
+ foreach(l, on_commits)
+ {
+ OnCommitItem *oc = (OnCommitItem *) lfirst(l);
+
+ if (oc->relid == relid)
+ {
+ oc->deleting_subid = GetCurrentSubTransactionId();
+ break;
+ }
+ }
+}
+
+/*
+ * Perform ON COMMIT actions.
+ *
+ * This is invoked just before actually committing, since it's possible
+ * to encounter errors.
+ */
+void
+PreCommit_on_commit_actions(void)
+{
+ ListCell *l;
+ List *oids_to_truncate = NIL;
+ List *oids_to_drop = NIL;
+
+ foreach(l, on_commits)
+ {
+ OnCommitItem *oc = (OnCommitItem *) lfirst(l);
+
+ /* Ignore entry if already dropped in this xact */
+ if (oc->deleting_subid != InvalidSubTransactionId)
+ continue;
+
+ switch (oc->oncommit)
+ {
+ case ONCOMMIT_NOOP:
+ case ONCOMMIT_PRESERVE_ROWS:
+ /* Do nothing (there shouldn't be such entries, actually) */
+ break;
+ case ONCOMMIT_DELETE_ROWS:
+
+ /*
+ * If this transaction hasn't accessed any temporary
+ * relations, we can skip truncating ON COMMIT DELETE ROWS
+ * tables, as they must still be empty.
+ */
+ if ((MyXactFlags & XACT_FLAGS_ACCESSEDTEMPNAMESPACE))
+ oids_to_truncate = lappend_oid(oids_to_truncate, oc->relid);
+ break;
+ case ONCOMMIT_DROP:
+ oids_to_drop = lappend_oid(oids_to_drop, oc->relid);
+ break;
+ }
+ }
+
+ /*
+ * Truncate relations before dropping so that all dependencies between
+ * relations are removed after they are worked on. Doing it like this
+ * might be a waste as it is possible that a relation being truncated will
+ * be dropped anyway due to its parent being dropped, but this makes the
+ * code more robust because of not having to re-check that the relation
+ * exists at truncation time.
+ */
+ if (oids_to_truncate != NIL)
+ heap_truncate(oids_to_truncate);
+
+ if (oids_to_drop != NIL)
+ {
+ ObjectAddresses *targetObjects = new_object_addresses();
+ ListCell *l;
+
+ foreach(l, oids_to_drop)
+ {
+ ObjectAddress object;
+
+ object.classId = RelationRelationId;
+ object.objectId = lfirst_oid(l);
+ object.objectSubId = 0;
+
+ Assert(!object_address_present(&object, targetObjects));
+
+ add_exact_object_address(&object, targetObjects);
+ }
+
+ /*
+ * Object deletion might involve toast table access (to clean up
+ * toasted catalog entries), so ensure we have a valid snapshot.
+ */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
+ /*
+ * Since this is an automatic drop, rather than one directly initiated
+ * by the user, we pass the PERFORM_DELETION_INTERNAL flag.
+ */
+ performMultipleDeletions(targetObjects, DROP_CASCADE,
+ PERFORM_DELETION_INTERNAL | PERFORM_DELETION_QUIETLY);
+
+ PopActiveSnapshot();
+
+#ifdef USE_ASSERT_CHECKING
+
+ /*
+ * Note that table deletion will call remove_on_commit_action, so the
+ * entry should get marked as deleted.
+ */
+ foreach(l, on_commits)
+ {
+ OnCommitItem *oc = (OnCommitItem *) lfirst(l);
+
+ if (oc->oncommit != ONCOMMIT_DROP)
+ continue;
+
+ Assert(oc->deleting_subid != InvalidSubTransactionId);
+ }
+#endif
+ }
+}
+
+/*
+ * Post-commit or post-abort cleanup for ON COMMIT management.
+ *
+ * All we do here is remove no-longer-needed OnCommitItem entries.
+ *
+ * During commit, remove entries that were deleted during this transaction;
+ * during abort, remove those created during this transaction.
+ */
+void
+AtEOXact_on_commit_actions(bool isCommit)
+{
+ ListCell *cur_item;
+
+ foreach(cur_item, on_commits)
+ {
+ OnCommitItem *oc = (OnCommitItem *) lfirst(cur_item);
+
+ if (isCommit ? oc->deleting_subid != InvalidSubTransactionId :
+ oc->creating_subid != InvalidSubTransactionId)
+ {
+ /* cur_item must be removed */
+ on_commits = foreach_delete_current(on_commits, cur_item);
+ pfree(oc);
+ }
+ else
+ {
+ /* cur_item must be preserved */
+ oc->creating_subid = InvalidSubTransactionId;
+ oc->deleting_subid = InvalidSubTransactionId;
+ }
+ }
+}
+
+/*
+ * Post-subcommit or post-subabort cleanup for ON COMMIT management.
+ *
+ * During subabort, we can immediately remove entries created during this
+ * subtransaction. During subcommit, just relabel entries marked during
+ * this subtransaction as being the parent's responsibility.
+ */
+void
+AtEOSubXact_on_commit_actions(bool isCommit, SubTransactionId mySubid,
+ SubTransactionId parentSubid)
+{
+ ListCell *cur_item;
+
+ foreach(cur_item, on_commits)
+ {
+ OnCommitItem *oc = (OnCommitItem *) lfirst(cur_item);
+
+ if (!isCommit && oc->creating_subid == mySubid)
+ {
+ /* cur_item must be removed */
+ on_commits = foreach_delete_current(on_commits, cur_item);
+ pfree(oc);
+ }
+ else
+ {
+ /* cur_item must be preserved */
+ if (oc->creating_subid == mySubid)
+ oc->creating_subid = parentSubid;
+ if (oc->deleting_subid == mySubid)
+ oc->deleting_subid = isCommit ? parentSubid : InvalidSubTransactionId;
+ }
+ }
+}
+
+/*
+ * This is intended as a callback for RangeVarGetRelidExtended(). It allows
+ * the relation to be locked only if (1) it's a plain or partitioned table,
+ * materialized view, or TOAST table and (2) the current user is the owner (or
+ * the superuser). This meets the permission-checking needs of CLUSTER,
+ * REINDEX TABLE, and REFRESH MATERIALIZED VIEW; we expose it here so that it
+ * can be used by all.
+ */
+void
+RangeVarCallbackOwnsTable(const RangeVar *relation,
+ Oid relId, Oid oldRelId, void *arg)
+{
+ char relkind;
+
+ /* Nothing to do if the relation was not found. */
+ if (!OidIsValid(relId))
+ return;
+
+ /*
+ * If the relation does exist, check whether it's an index. But note that
+ * the relation might have been dropped between the time we did the name
+ * lookup and now. In that case, there's nothing to do.
+ */
+ relkind = get_rel_relkind(relId);
+ if (!relkind)
+ return;
+ if (relkind != RELKIND_RELATION && relkind != RELKIND_TOASTVALUE &&
+ relkind != RELKIND_MATVIEW && relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a table or materialized view", relation->relname)));
+
+ /* Check permissions */
+ if (!pg_class_ownercheck(relId, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relId)), relation->relname);
+}
+
+/*
+ * Callback to RangeVarGetRelidExtended() for TRUNCATE processing.
+ */
+static void
+RangeVarCallbackForTruncate(const RangeVar *relation,
+ Oid relId, Oid oldRelId, void *arg)
+{
+ HeapTuple tuple;
+
+ /* Nothing to do if the relation was not found. */
+ if (!OidIsValid(relId))
+ return;
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relId));
+ if (!HeapTupleIsValid(tuple)) /* should not happen */
+ elog(ERROR, "cache lookup failed for relation %u", relId);
+
+ truncate_check_rel(relId, (Form_pg_class) GETSTRUCT(tuple));
+ truncate_check_perms(relId, (Form_pg_class) GETSTRUCT(tuple));
+
+ ReleaseSysCache(tuple);
+}
+
+/*
+ * Callback to RangeVarGetRelidExtended(), similar to
+ * RangeVarCallbackOwnsTable() but without checks on the type of the relation.
+ */
+void
+RangeVarCallbackOwnsRelation(const RangeVar *relation,
+ Oid relId, Oid oldRelId, void *arg)
+{
+ HeapTuple tuple;
+
+ /* Nothing to do if the relation was not found. */
+ if (!OidIsValid(relId))
+ return;
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relId));
+ if (!HeapTupleIsValid(tuple)) /* should not happen */
+ elog(ERROR, "cache lookup failed for relation %u", relId);
+
+ if (!pg_class_ownercheck(relId, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relId)),
+ relation->relname);
+
+ if (!allowSystemTableMods &&
+ IsSystemClass(relId, (Form_pg_class) GETSTRUCT(tuple)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied: \"%s\" is a system catalog",
+ relation->relname)));
+
+ ReleaseSysCache(tuple);
+}
+
+/*
+ * Common RangeVarGetRelid callback for rename, set schema, and alter table
+ * processing.
+ */
+static void
+RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid,
+ void *arg)
+{
+ Node *stmt = (Node *) arg;
+ ObjectType reltype;
+ HeapTuple tuple;
+ Form_pg_class classform;
+ AclResult aclresult;
+ char relkind;
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ return; /* concurrently dropped */
+ classform = (Form_pg_class) GETSTRUCT(tuple);
+ relkind = classform->relkind;
+
+ /* Must own relation. */
+ if (!pg_class_ownercheck(relid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relid)), rv->relname);
+
+ /* No system table modifications unless explicitly allowed. */
+ if (!allowSystemTableMods && IsSystemClass(relid, classform))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied: \"%s\" is a system catalog",
+ rv->relname)));
+
+ /*
+ * Extract the specified relation type from the statement parse tree.
+ *
+ * Also, for ALTER .. RENAME, check permissions: the user must (still)
+ * have CREATE rights on the containing namespace.
+ */
+ if (IsA(stmt, RenameStmt))
+ {
+ aclresult = pg_namespace_aclcheck(classform->relnamespace,
+ GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(classform->relnamespace));
+ reltype = ((RenameStmt *) stmt)->renameType;
+ }
+ else if (IsA(stmt, AlterObjectSchemaStmt))
+ reltype = ((AlterObjectSchemaStmt *) stmt)->objectType;
+
+ else if (IsA(stmt, AlterTableStmt))
+ reltype = ((AlterTableStmt *) stmt)->objtype;
+ else
+ {
+ elog(ERROR, "unrecognized node type: %d", (int) nodeTag(stmt));
+ reltype = OBJECT_TABLE; /* placate compiler */
+ }
+
+ /*
+ * For compatibility with prior releases, we allow ALTER TABLE to be used
+ * with most other types of relations (but not composite types). We allow
+ * similar flexibility for ALTER INDEX in the case of RENAME, but not
+ * otherwise. Otherwise, the user must select the correct form of the
+ * command for the relation at issue.
+ */
+ if (reltype == OBJECT_SEQUENCE && relkind != RELKIND_SEQUENCE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a sequence", rv->relname)));
+
+ if (reltype == OBJECT_VIEW && relkind != RELKIND_VIEW)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a view", rv->relname)));
+
+ if (reltype == OBJECT_MATVIEW && relkind != RELKIND_MATVIEW)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a materialized view", rv->relname)));
+
+ if (reltype == OBJECT_FOREIGN_TABLE && relkind != RELKIND_FOREIGN_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a foreign table", rv->relname)));
+
+ if (reltype == OBJECT_TYPE && relkind != RELKIND_COMPOSITE_TYPE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a composite type", rv->relname)));
+
+ if (reltype == OBJECT_INDEX && relkind != RELKIND_INDEX &&
+ relkind != RELKIND_PARTITIONED_INDEX
+ && !IsA(stmt, RenameStmt))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not an index", rv->relname)));
+
+ /*
+ * Don't allow ALTER TABLE on composite types. We want people to use ALTER
+ * TYPE for that.
+ */
+ if (reltype != OBJECT_TYPE && relkind == RELKIND_COMPOSITE_TYPE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is a composite type", rv->relname),
+ errhint("Use ALTER TYPE instead.")));
+
+ /*
+ * Don't allow ALTER TABLE .. SET SCHEMA on relations that can't be moved
+ * to a different schema, such as indexes and TOAST tables.
+ */
+ if (IsA(stmt, AlterObjectSchemaStmt))
+ {
+ if (relkind == RELKIND_INDEX || relkind == RELKIND_PARTITIONED_INDEX)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot change schema of index \"%s\"",
+ rv->relname),
+ errhint("Change the schema of the table instead.")));
+ else if (relkind == RELKIND_COMPOSITE_TYPE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot change schema of composite type \"%s\"",
+ rv->relname),
+ errhint("Use ALTER TYPE instead.")));
+ else if (relkind == RELKIND_TOASTVALUE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot change schema of TOAST table \"%s\"",
+ rv->relname),
+ errhint("Change the schema of the table instead.")));
+ }
+
+ ReleaseSysCache(tuple);
+}
+
+/*
+ * Transform any expressions present in the partition key
+ *
+ * Returns a transformed PartitionSpec, as well as the strategy code
+ */
+static PartitionSpec *
+transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy)
+{
+ PartitionSpec *newspec;
+ ParseState *pstate;
+ ParseNamespaceItem *nsitem;
+ ListCell *l;
+
+ newspec = makeNode(PartitionSpec);
+
+ newspec->strategy = partspec->strategy;
+ newspec->partParams = NIL;
+ newspec->location = partspec->location;
+
+ /* Parse partitioning strategy name */
+ if (pg_strcasecmp(partspec->strategy, "hash") == 0)
+ *strategy = PARTITION_STRATEGY_HASH;
+ else if (pg_strcasecmp(partspec->strategy, "list") == 0)
+ *strategy = PARTITION_STRATEGY_LIST;
+ else if (pg_strcasecmp(partspec->strategy, "range") == 0)
+ *strategy = PARTITION_STRATEGY_RANGE;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized partitioning strategy \"%s\"",
+ partspec->strategy)));
+
+ /* Check valid number of columns for strategy */
+ if (*strategy == PARTITION_STRATEGY_LIST &&
+ list_length(partspec->partParams) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot use \"list\" partition strategy with more than one column")));
+
+ /*
+ * Create a dummy ParseState and insert the target relation as its sole
+ * rangetable entry. We need a ParseState for transformExpr.
+ */
+ pstate = make_parsestate(NULL);
+ nsitem = addRangeTableEntryForRelation(pstate, rel, AccessShareLock,
+ NULL, false, true);
+ addNSItemToQuery(pstate, nsitem, true, true, true);
+
+ /* take care of any partition expressions */
+ foreach(l, partspec->partParams)
+ {
+ PartitionElem *pelem = lfirst_node(PartitionElem, l);
+
+ if (pelem->expr)
+ {
+ /* Copy, to avoid scribbling on the input */
+ pelem = copyObject(pelem);
+
+ /* Now do parse transformation of the expression */
+ pelem->expr = transformExpr(pstate, pelem->expr,
+ EXPR_KIND_PARTITION_EXPRESSION);
+
+ /* we have to fix its collations too */
+ assign_expr_collations(pstate, pelem->expr);
+ }
+
+ newspec->partParams = lappend(newspec->partParams, pelem);
+ }
+
+ return newspec;
+}
+
+/*
+ * Compute per-partition-column information from a list of PartitionElems.
+ * Expressions in the PartitionElems must be parse-analyzed already.
+ */
+static void
+ComputePartitionAttrs(ParseState *pstate, Relation rel, List *partParams, AttrNumber *partattrs,
+ List **partexprs, Oid *partopclass, Oid *partcollation,
+ char strategy)
+{
+ int attn;
+ ListCell *lc;
+ Oid am_oid;
+
+ attn = 0;
+ foreach(lc, partParams)
+ {
+ PartitionElem *pelem = lfirst_node(PartitionElem, lc);
+ Oid atttype;
+ Oid attcollation;
+
+ if (pelem->name != NULL)
+ {
+ /* Simple attribute reference */
+ HeapTuple atttuple;
+ Form_pg_attribute attform;
+
+ atttuple = SearchSysCacheAttName(RelationGetRelid(rel),
+ pelem->name);
+ if (!HeapTupleIsValid(atttuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" named in partition key does not exist",
+ pelem->name),
+ parser_errposition(pstate, pelem->location)));
+ attform = (Form_pg_attribute) GETSTRUCT(atttuple);
+
+ if (attform->attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot use system column \"%s\" in partition key",
+ pelem->name),
+ parser_errposition(pstate, pelem->location)));
+
+ /*
+ * Generated columns cannot work: They are computed after BEFORE
+ * triggers, but partition routing is done before all triggers.
+ */
+ if (attform->attgenerated)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot use generated column in partition key"),
+ errdetail("Column \"%s\" is a generated column.",
+ pelem->name),
+ parser_errposition(pstate, pelem->location)));
+
+ partattrs[attn] = attform->attnum;
+ atttype = attform->atttypid;
+ attcollation = attform->attcollation;
+ ReleaseSysCache(atttuple);
+ }
+ else
+ {
+ /* Expression */
+ Node *expr = pelem->expr;
+ char partattname[16];
+
+ Assert(expr != NULL);
+ atttype = exprType(expr);
+ attcollation = exprCollation(expr);
+
+ /*
+ * The expression must be of a storable type (e.g., not RECORD).
+ * The test is the same as for whether a table column is of a safe
+ * type (which is why we needn't check for the non-expression
+ * case).
+ */
+ snprintf(partattname, sizeof(partattname), "%d", attn + 1);
+ CheckAttributeType(partattname,
+ atttype, attcollation,
+ NIL, CHKATYPE_IS_PARTKEY);
+
+ /*
+ * Strip any top-level COLLATE clause. This ensures that we treat
+ * "x COLLATE y" and "(x COLLATE y)" alike.
+ */
+ while (IsA(expr, CollateExpr))
+ expr = (Node *) ((CollateExpr *) expr)->arg;
+
+ if (IsA(expr, Var) &&
+ ((Var *) expr)->varattno > 0)
+ {
+ /*
+ * User wrote "(column)" or "(column COLLATE something)".
+ * Treat it like simple attribute anyway.
+ */
+ partattrs[attn] = ((Var *) expr)->varattno;
+ }
+ else
+ {
+ Bitmapset *expr_attrs = NULL;
+ int i;
+
+ partattrs[attn] = 0; /* marks the column as expression */
+ *partexprs = lappend(*partexprs, expr);
+
+ /*
+ * Try to simplify the expression before checking for
+ * mutability. The main practical value of doing it in this
+ * order is that an inline-able SQL-language function will be
+ * accepted if its expansion is immutable, whether or not the
+ * function itself is marked immutable.
+ *
+ * Note that expression_planner does not change the passed in
+ * expression destructively and we have already saved the
+ * expression to be stored into the catalog above.
+ */
+ expr = (Node *) expression_planner((Expr *) expr);
+
+ /*
+ * Partition expression cannot contain mutable functions,
+ * because a given row must always map to the same partition
+ * as long as there is no change in the partition boundary
+ * structure.
+ */
+ if (contain_mutable_functions(expr))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("functions in partition key expression must be marked IMMUTABLE")));
+
+ /*
+ * transformPartitionSpec() should have already rejected
+ * subqueries, aggregates, window functions, and SRFs, based
+ * on the EXPR_KIND_ for partition expressions.
+ */
+
+ /*
+ * Cannot allow system column references, since that would
+ * make partition routing impossible: their values won't be
+ * known yet when we need to do that.
+ */
+ pull_varattnos(expr, 1, &expr_attrs);
+ for (i = FirstLowInvalidHeapAttributeNumber; i < 0; i++)
+ {
+ if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber,
+ expr_attrs))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("partition key expressions cannot contain system column references")));
+ }
+
+ /*
+ * Generated columns cannot work: They are computed after
+ * BEFORE triggers, but partition routing is done before all
+ * triggers.
+ */
+ i = -1;
+ while ((i = bms_next_member(expr_attrs, i)) >= 0)
+ {
+ AttrNumber attno = i + FirstLowInvalidHeapAttributeNumber;
+
+ if (attno > 0 &&
+ TupleDescAttr(RelationGetDescr(rel), attno - 1)->attgenerated)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot use generated column in partition key"),
+ errdetail("Column \"%s\" is a generated column.",
+ get_attname(RelationGetRelid(rel), attno, false)),
+ parser_errposition(pstate, pelem->location)));
+ }
+
+ /*
+ * While it is not exactly *wrong* for a partition expression
+ * to be a constant, it seems better to reject such keys.
+ */
+ if (IsA(expr, Const))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot use constant expression as partition key")));
+ }
+ }
+
+ /*
+ * Apply collation override if any
+ */
+ if (pelem->collation)
+ attcollation = get_collation_oid(pelem->collation, false);
+
+ /*
+ * Check we have a collation iff it's a collatable type. The only
+ * expected failures here are (1) COLLATE applied to a noncollatable
+ * type, or (2) partition expression had an unresolved collation. But
+ * we might as well code this to be a complete consistency check.
+ */
+ if (type_is_collatable(atttype))
+ {
+ if (!OidIsValid(attcollation))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for partition expression"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+ else
+ {
+ if (OidIsValid(attcollation))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("collations are not supported by type %s",
+ format_type_be(atttype))));
+ }
+
+ partcollation[attn] = attcollation;
+
+ /*
+ * Identify the appropriate operator class. For list and range
+ * partitioning, we use a btree operator class; hash partitioning uses
+ * a hash operator class.
+ */
+ if (strategy == PARTITION_STRATEGY_HASH)
+ am_oid = HASH_AM_OID;
+ else
+ am_oid = BTREE_AM_OID;
+
+ if (!pelem->opclass)
+ {
+ partopclass[attn] = GetDefaultOpClass(atttype, am_oid);
+
+ if (!OidIsValid(partopclass[attn]))
+ {
+ if (strategy == PARTITION_STRATEGY_HASH)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("data type %s has no default operator class for access method \"%s\"",
+ format_type_be(atttype), "hash"),
+ errhint("You must specify a hash operator class or define a default hash operator class for the data type.")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("data type %s has no default operator class for access method \"%s\"",
+ format_type_be(atttype), "btree"),
+ errhint("You must specify a btree operator class or define a default btree operator class for the data type.")));
+ }
+ }
+ else
+ partopclass[attn] = ResolveOpClass(pelem->opclass,
+ atttype,
+ am_oid == HASH_AM_OID ? "hash" : "btree",
+ am_oid);
+
+ attn++;
+ }
+}
+
+/*
+ * PartConstraintImpliedByRelConstraint
+ * Do scanrel's existing constraints imply the partition constraint?
+ *
+ * "Existing constraints" include its check constraints and column-level
+ * NOT NULL constraints. partConstraint describes the partition constraint,
+ * in implicit-AND form.
+ */
+bool
+PartConstraintImpliedByRelConstraint(Relation scanrel,
+ List *partConstraint)
+{
+ List *existConstraint = NIL;
+ TupleConstr *constr = RelationGetDescr(scanrel)->constr;
+ int i;
+
+ if (constr && constr->has_not_null)
+ {
+ int natts = scanrel->rd_att->natts;
+
+ for (i = 1; i <= natts; i++)
+ {
+ Form_pg_attribute att = TupleDescAttr(scanrel->rd_att, i - 1);
+
+ if (att->attnotnull && !att->attisdropped)
+ {
+ NullTest *ntest = makeNode(NullTest);
+
+ ntest->arg = (Expr *) makeVar(1,
+ i,
+ att->atttypid,
+ att->atttypmod,
+ att->attcollation,
+ 0);
+ ntest->nulltesttype = IS_NOT_NULL;
+
+ /*
+ * argisrow=false is correct even for a composite column,
+ * because attnotnull does not represent a SQL-spec IS NOT
+ * NULL test in such a case, just IS DISTINCT FROM NULL.
+ */
+ ntest->argisrow = false;
+ ntest->location = -1;
+ existConstraint = lappend(existConstraint, ntest);
+ }
+ }
+ }
+
+ return ConstraintImpliedByRelConstraint(scanrel, partConstraint, existConstraint);
+}
+
+/*
+ * ConstraintImpliedByRelConstraint
+ * Do scanrel's existing constraints imply the given constraint?
+ *
+ * testConstraint is the constraint to validate. provenConstraint is a
+ * caller-provided list of conditions which this function may assume
+ * to be true. Both provenConstraint and testConstraint must be in
+ * implicit-AND form, must only contain immutable clauses, and must
+ * contain only Vars with varno = 1.
+ */
+bool
+ConstraintImpliedByRelConstraint(Relation scanrel, List *testConstraint, List *provenConstraint)
+{
+ List *existConstraint = list_copy(provenConstraint);
+ TupleConstr *constr = RelationGetDescr(scanrel)->constr;
+ int num_check,
+ i;
+
+ num_check = (constr != NULL) ? constr->num_check : 0;
+ for (i = 0; i < num_check; i++)
+ {
+ Node *cexpr;
+
+ /*
+ * If this constraint hasn't been fully validated yet, we must ignore
+ * it here.
+ */
+ if (!constr->check[i].ccvalid)
+ continue;
+
+ cexpr = stringToNode(constr->check[i].ccbin);
+
+ /*
+ * Run each expression through const-simplification and
+ * canonicalization. It is necessary, because we will be comparing it
+ * to similarly-processed partition constraint expressions, and may
+ * fail to detect valid matches without this.
+ */
+ cexpr = eval_const_expressions(NULL, cexpr);
+ cexpr = (Node *) canonicalize_qual((Expr *) cexpr, true);
+
+ existConstraint = list_concat(existConstraint,
+ make_ands_implicit((Expr *) cexpr));
+ }
+
+ /*
+ * Try to make the proof. Since we are comparing CHECK constraints, we
+ * need to use weak implication, i.e., we assume existConstraint is
+ * not-false and try to prove the same for testConstraint.
+ *
+ * Note that predicate_implied_by assumes its first argument is known
+ * immutable. That should always be true for both NOT NULL and partition
+ * constraints, so we don't test it here.
+ */
+ return predicate_implied_by(testConstraint, existConstraint, true);
+}
+
+/*
+ * QueuePartitionConstraintValidation
+ *
+ * Add an entry to wqueue to have the given partition constraint validated by
+ * Phase 3, for the given relation, and all its children.
+ *
+ * We first verify whether the given constraint is implied by pre-existing
+ * relation constraints; if it is, there's no need to scan the table to
+ * validate, so don't queue in that case.
+ */
+static void
+QueuePartitionConstraintValidation(List **wqueue, Relation scanrel,
+ List *partConstraint,
+ bool validate_default)
+{
+ /*
+ * Based on the table's existing constraints, determine whether or not we
+ * may skip scanning the table.
+ */
+ if (PartConstraintImpliedByRelConstraint(scanrel, partConstraint))
+ {
+ if (!validate_default)
+ ereport(DEBUG1,
+ (errmsg_internal("partition constraint for table \"%s\" is implied by existing constraints",
+ RelationGetRelationName(scanrel))));
+ else
+ ereport(DEBUG1,
+ (errmsg_internal("updated partition constraint for default partition \"%s\" is implied by existing constraints",
+ RelationGetRelationName(scanrel))));
+ return;
+ }
+
+ /*
+ * Constraints proved insufficient. For plain relations, queue a
+ * validation item now; for partitioned tables, recurse to process each
+ * partition.
+ */
+ if (scanrel->rd_rel->relkind == RELKIND_RELATION)
+ {
+ AlteredTableInfo *tab;
+
+ /* Grab a work queue entry. */
+ tab = ATGetQueueEntry(wqueue, scanrel);
+ Assert(tab->partition_constraint == NULL);
+ tab->partition_constraint = (Expr *) linitial(partConstraint);
+ tab->validate_default = validate_default;
+ }
+ else if (scanrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ PartitionDesc partdesc = RelationGetPartitionDesc(scanrel, true);
+ int i;
+
+ for (i = 0; i < partdesc->nparts; i++)
+ {
+ Relation part_rel;
+ List *thisPartConstraint;
+
+ /*
+ * This is the minimum lock we need to prevent deadlocks.
+ */
+ part_rel = table_open(partdesc->oids[i], AccessExclusiveLock);
+
+ /*
+ * Adjust the constraint for scanrel so that it matches this
+ * partition's attribute numbers.
+ */
+ thisPartConstraint =
+ map_partition_varattnos(partConstraint, 1,
+ part_rel, scanrel);
+
+ QueuePartitionConstraintValidation(wqueue, part_rel,
+ thisPartConstraint,
+ validate_default);
+ table_close(part_rel, NoLock); /* keep lock till commit */
+ }
+ }
+}
+
+/*
+ * ALTER TABLE <name> ATTACH PARTITION <partition-name> FOR VALUES
+ *
+ * Return the address of the newly attached partition.
+ */
+static ObjectAddress
+ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd,
+ AlterTableUtilityContext *context)
+{
+ Relation attachrel,
+ catalog;
+ List *attachrel_children;
+ List *partConstraint;
+ SysScanDesc scan;
+ ScanKeyData skey;
+ AttrNumber attno;
+ int natts;
+ TupleDesc tupleDesc;
+ ObjectAddress address;
+ const char *trigger_name;
+ Oid defaultPartOid;
+ List *partBoundConstraint;
+ ParseState *pstate = make_parsestate(NULL);
+
+ pstate->p_sourcetext = context->queryString;
+
+ /*
+ * We must lock the default partition if one exists, because attaching a
+ * new partition will change its partition constraint.
+ */
+ defaultPartOid =
+ get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, true));
+ if (OidIsValid(defaultPartOid))
+ LockRelationOid(defaultPartOid, AccessExclusiveLock);
+
+ attachrel = table_openrv(cmd->name, AccessExclusiveLock);
+
+ /*
+ * XXX I think it'd be a good idea to grab locks on all tables referenced
+ * by FKs at this point also.
+ */
+
+ /*
+ * Must be owner of both parent and source table -- parent was checked by
+ * ATSimplePermissions call in ATPrepCmd
+ */
+ ATSimplePermissions(AT_AttachPartition, attachrel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+ /* A partition can only have one parent */
+ if (attachrel->rd_rel->relispartition)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is already a partition",
+ RelationGetRelationName(attachrel))));
+
+ if (OidIsValid(attachrel->rd_rel->reloftype))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach a typed table as partition")));
+
+ /*
+ * Table being attached should not already be part of inheritance; either
+ * as a child table...
+ */
+ catalog = table_open(InheritsRelationId, AccessShareLock);
+ ScanKeyInit(&skey,
+ Anum_pg_inherits_inhrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(attachrel)));
+ scan = systable_beginscan(catalog, InheritsRelidSeqnoIndexId, true,
+ NULL, 1, &skey);
+ if (HeapTupleIsValid(systable_getnext(scan)))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach inheritance child as partition")));
+ systable_endscan(scan);
+
+ /* ...or as a parent table (except the case when it is partitioned) */
+ ScanKeyInit(&skey,
+ Anum_pg_inherits_inhparent,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(attachrel)));
+ scan = systable_beginscan(catalog, InheritsParentIndexId, true, NULL,
+ 1, &skey);
+ if (HeapTupleIsValid(systable_getnext(scan)) &&
+ attachrel->rd_rel->relkind == RELKIND_RELATION)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach inheritance parent as partition")));
+ systable_endscan(scan);
+ table_close(catalog, AccessShareLock);
+
+ /*
+ * Prevent circularity by seeing if rel is a partition of attachrel. (In
+ * particular, this disallows making a rel a partition of itself.)
+ *
+ * We do that by checking if rel is a member of the list of attachrel's
+ * partitions provided the latter is partitioned at all. We want to avoid
+ * having to construct this list again, so we request the strongest lock
+ * on all partitions. We need the strongest lock, because we may decide
+ * to scan them if we find out that the table being attached (or its leaf
+ * partitions) may contain rows that violate the partition constraint. If
+ * the table has a constraint that would prevent such rows, which by
+ * definition is present in all the partitions, we need not scan the
+ * table, nor its partitions. But we cannot risk a deadlock by taking a
+ * weaker lock now and the stronger one only when needed.
+ */
+ attachrel_children = find_all_inheritors(RelationGetRelid(attachrel),
+ AccessExclusiveLock, NULL);
+ if (list_member_oid(attachrel_children, RelationGetRelid(rel)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_TABLE),
+ errmsg("circular inheritance not allowed"),
+ errdetail("\"%s\" is already a child of \"%s\".",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(attachrel))));
+
+ /* If the parent is permanent, so must be all of its partitions. */
+ if (rel->rd_rel->relpersistence != RELPERSISTENCE_TEMP &&
+ attachrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach a temporary relation as partition of permanent relation \"%s\"",
+ RelationGetRelationName(rel))));
+
+ /* Temp parent cannot have a partition that is itself not a temp */
+ if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+ attachrel->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach a permanent relation as partition of temporary relation \"%s\"",
+ RelationGetRelationName(rel))));
+
+ /* If the parent is temp, it must belong to this session */
+ if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+ !rel->rd_islocaltemp)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach as partition of temporary relation of another session")));
+
+ /* Ditto for the partition */
+ if (attachrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+ !attachrel->rd_islocaltemp)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach temporary relation of another session as partition")));
+
+ /* Check if there are any columns in attachrel that aren't in the parent */
+ tupleDesc = RelationGetDescr(attachrel);
+ natts = tupleDesc->natts;
+ for (attno = 1; attno <= natts; attno++)
+ {
+ Form_pg_attribute attribute = TupleDescAttr(tupleDesc, attno - 1);
+ char *attributeName = NameStr(attribute->attname);
+
+ /* Ignore dropped */
+ if (attribute->attisdropped)
+ continue;
+
+ /* Try to find the column in parent (matching on column name) */
+ if (!SearchSysCacheExists2(ATTNAME,
+ ObjectIdGetDatum(RelationGetRelid(rel)),
+ CStringGetDatum(attributeName)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("table \"%s\" contains column \"%s\" not found in parent \"%s\"",
+ RelationGetRelationName(attachrel), attributeName,
+ RelationGetRelationName(rel)),
+ errdetail("The new partition may contain only the columns present in parent.")));
+ }
+
+ /*
+ * If child_rel has row-level triggers with transition tables, we
+ * currently don't allow it to become a partition. See also prohibitions
+ * in ATExecAddInherit() and CreateTrigger().
+ */
+ trigger_name = FindTriggerIncompatibleWithInheritance(attachrel->trigdesc);
+ if (trigger_name != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("trigger \"%s\" prevents table \"%s\" from becoming a partition",
+ trigger_name, RelationGetRelationName(attachrel)),
+ errdetail("ROW triggers with transition tables are not supported on partitions.")));
+
+ /*
+ * Check that the new partition's bound is valid and does not overlap any
+ * of existing partitions of the parent - note that it does not return on
+ * error.
+ */
+ check_new_partition_bound(RelationGetRelationName(attachrel), rel,
+ cmd->bound, pstate);
+
+ /* OK to create inheritance. Rest of the checks performed there */
+ CreateInheritance(attachrel, rel);
+
+ /* Update the pg_class entry. */
+ StorePartitionBound(attachrel, rel, cmd->bound);
+
+ /* Ensure there exists a correct set of indexes in the partition. */
+ AttachPartitionEnsureIndexes(rel, attachrel);
+
+ /* and triggers */
+ CloneRowTriggersToPartition(rel, attachrel);
+
+ /*
+ * Clone foreign key constraints. Callee is responsible for setting up
+ * for phase 3 constraint verification.
+ */
+ CloneForeignKeyConstraints(wqueue, rel, attachrel);
+
+ /*
+ * Generate partition constraint from the partition bound specification.
+ * If the parent itself is a partition, make sure to include its
+ * constraint as well.
+ */
+ partBoundConstraint = get_qual_from_partbound(rel, cmd->bound);
+ partConstraint = list_concat(partBoundConstraint,
+ RelationGetPartitionQual(rel));
+
+ /* Skip validation if there are no constraints to validate. */
+ if (partConstraint)
+ {
+ /*
+ * Run the partition quals through const-simplification similar to
+ * check constraints. We skip canonicalize_qual, though, because
+ * partition quals should be in canonical form already.
+ */
+ partConstraint =
+ (List *) eval_const_expressions(NULL,
+ (Node *) partConstraint);
+
+ /* XXX this sure looks wrong */
+ partConstraint = list_make1(make_ands_explicit(partConstraint));
+
+ /*
+ * Adjust the generated constraint to match this partition's attribute
+ * numbers.
+ */
+ partConstraint = map_partition_varattnos(partConstraint, 1, attachrel,
+ rel);
+
+ /* Validate partition constraints against the table being attached. */
+ QueuePartitionConstraintValidation(wqueue, attachrel, partConstraint,
+ false);
+ }
+
+ /*
+ * If we're attaching a partition other than the default partition and a
+ * default one exists, then that partition's partition constraint changes,
+ * so add an entry to the work queue to validate it, too. (We must not do
+ * this when the partition being attached is the default one; we already
+ * did it above!)
+ */
+ if (OidIsValid(defaultPartOid))
+ {
+ Relation defaultrel;
+ List *defPartConstraint;
+
+ Assert(!cmd->bound->is_default);
+
+ /* we already hold a lock on the default partition */
+ defaultrel = table_open(defaultPartOid, NoLock);
+ defPartConstraint =
+ get_proposed_default_constraint(partBoundConstraint);
+
+ /*
+ * Map the Vars in the constraint expression from rel's attnos to
+ * defaultrel's.
+ */
+ defPartConstraint =
+ map_partition_varattnos(defPartConstraint,
+ 1, defaultrel, rel);
+ QueuePartitionConstraintValidation(wqueue, defaultrel,
+ defPartConstraint, true);
+
+ /* keep our lock until commit. */
+ table_close(defaultrel, NoLock);
+ }
+
+ ObjectAddressSet(address, RelationRelationId, RelationGetRelid(attachrel));
+
+ /*
+ * If the partition we just attached is partitioned itself, invalidate
+ * relcache for all descendent partitions too to ensure that their
+ * rd_partcheck expression trees are rebuilt; partitions already locked at
+ * the beginning of this function.
+ */
+ if (attachrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ ListCell *l;
+
+ foreach(l, attachrel_children)
+ {
+ CacheInvalidateRelcacheByRelid(lfirst_oid(l));
+ }
+ }
+
+ /* keep our lock until commit */
+ table_close(attachrel, NoLock);
+
+ return address;
+}
+
+/*
+ * AttachPartitionEnsureIndexes
+ * subroutine for ATExecAttachPartition to create/match indexes
+ *
+ * Enforce the indexing rule for partitioned tables during ALTER TABLE / ATTACH
+ * PARTITION: every partition must have an index attached to each index on the
+ * partitioned table.
+ */
+static void
+AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
+{
+ List *idxes;
+ List *attachRelIdxs;
+ Relation *attachrelIdxRels;
+ IndexInfo **attachInfos;
+ int i;
+ ListCell *cell;
+ MemoryContext cxt;
+ MemoryContext oldcxt;
+
+ cxt = AllocSetContextCreate(CurrentMemoryContext,
+ "AttachPartitionEnsureIndexes",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcxt = MemoryContextSwitchTo(cxt);
+
+ idxes = RelationGetIndexList(rel);
+ attachRelIdxs = RelationGetIndexList(attachrel);
+ attachrelIdxRels = palloc(sizeof(Relation) * list_length(attachRelIdxs));
+ attachInfos = palloc(sizeof(IndexInfo *) * list_length(attachRelIdxs));
+
+ /* Build arrays of all existing indexes and their IndexInfos */
+ i = 0;
+ foreach(cell, attachRelIdxs)
+ {
+ Oid cldIdxId = lfirst_oid(cell);
+
+ attachrelIdxRels[i] = index_open(cldIdxId, AccessShareLock);
+ attachInfos[i] = BuildIndexInfo(attachrelIdxRels[i]);
+ i++;
+ }
+
+ /*
+ * If we're attaching a foreign table, we must fail if any of the indexes
+ * is a constraint index; otherwise, there's nothing to do here. Do this
+ * before starting work, to avoid wasting the effort of building a few
+ * non-unique indexes before coming across a unique one.
+ */
+ if (attachrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ {
+ foreach(cell, idxes)
+ {
+ Oid idx = lfirst_oid(cell);
+ Relation idxRel = index_open(idx, AccessShareLock);
+
+ if (idxRel->rd_index->indisunique ||
+ idxRel->rd_index->indisprimary)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach foreign table \"%s\" as partition of partitioned table \"%s\"",
+ RelationGetRelationName(attachrel),
+ RelationGetRelationName(rel)),
+ errdetail("Partitioned table \"%s\" contains unique indexes.",
+ RelationGetRelationName(rel))));
+ index_close(idxRel, AccessShareLock);
+ }
+
+ goto out;
+ }
+
+ /*
+ * For each index on the partitioned table, find a matching one in the
+ * partition-to-be; if one is not found, create one.
+ */
+ foreach(cell, idxes)
+ {
+ Oid idx = lfirst_oid(cell);
+ Relation idxRel = index_open(idx, AccessShareLock);
+ IndexInfo *info;
+ AttrMap *attmap;
+ bool found = false;
+ Oid constraintOid;
+
+ /*
+ * Ignore indexes in the partitioned table other than partitioned
+ * indexes.
+ */
+ if (idxRel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
+ {
+ index_close(idxRel, AccessShareLock);
+ continue;
+ }
+
+ /* construct an indexinfo to compare existing indexes against */
+ info = BuildIndexInfo(idxRel);
+ attmap = build_attrmap_by_name(RelationGetDescr(attachrel),
+ RelationGetDescr(rel));
+ constraintOid = get_relation_idx_constraint_oid(RelationGetRelid(rel), idx);
+
+ /*
+ * Scan the list of existing indexes in the partition-to-be, and mark
+ * the first matching, valid, unattached one we find, if any, as
+ * partition of the parent index. If we find one, we're done.
+ */
+ for (i = 0; i < list_length(attachRelIdxs); i++)
+ {
+ Oid cldIdxId = RelationGetRelid(attachrelIdxRels[i]);
+ Oid cldConstrOid = InvalidOid;
+
+ /* does this index have a parent? if so, can't use it */
+ if (attachrelIdxRels[i]->rd_rel->relispartition)
+ continue;
+
+ /* If this index is invalid, can't use it */
+ if (!attachrelIdxRels[i]->rd_index->indisvalid)
+ continue;
+
+ if (CompareIndexInfo(attachInfos[i], info,
+ attachrelIdxRels[i]->rd_indcollation,
+ idxRel->rd_indcollation,
+ attachrelIdxRels[i]->rd_opfamily,
+ idxRel->rd_opfamily,
+ attmap))
+ {
+ /*
+ * If this index is being created in the parent because of a
+ * constraint, then the child needs to have a constraint also,
+ * so look for one. If there is no such constraint, this
+ * index is no good, so keep looking.
+ */
+ if (OidIsValid(constraintOid))
+ {
+ cldConstrOid =
+ get_relation_idx_constraint_oid(RelationGetRelid(attachrel),
+ cldIdxId);
+ /* no dice */
+ if (!OidIsValid(cldConstrOid))
+ continue;
+ }
+
+ /* bingo. */
+ IndexSetParentIndex(attachrelIdxRels[i], idx);
+ if (OidIsValid(constraintOid))
+ ConstraintSetParentConstraint(cldConstrOid, constraintOid,
+ RelationGetRelid(attachrel));
+ found = true;
+
+ CommandCounterIncrement();
+ break;
+ }
+ }
+
+ /*
+ * If no suitable index was found in the partition-to-be, create one
+ * now.
+ */
+ if (!found)
+ {
+ IndexStmt *stmt;
+ Oid constraintOid;
+
+ stmt = generateClonedIndexStmt(NULL,
+ idxRel, attmap,
+ &constraintOid);
+ DefineIndex(RelationGetRelid(attachrel), stmt, InvalidOid,
+ RelationGetRelid(idxRel),
+ constraintOid,
+ true, false, false, false, false);
+ }
+
+ index_close(idxRel, AccessShareLock);
+ }
+
+out:
+ /* Clean up. */
+ for (i = 0; i < list_length(attachRelIdxs); i++)
+ index_close(attachrelIdxRels[i], AccessShareLock);
+ MemoryContextSwitchTo(oldcxt);
+ MemoryContextDelete(cxt);
+}
+
+/*
+ * CloneRowTriggersToPartition
+ * subroutine for ATExecAttachPartition/DefineRelation to create row
+ * triggers on partitions
+ */
+static void
+CloneRowTriggersToPartition(Relation parent, Relation partition)
+{
+ Relation pg_trigger;
+ ScanKeyData key;
+ SysScanDesc scan;
+ HeapTuple tuple;
+ MemoryContext perTupCxt;
+
+ ScanKeyInit(&key, Anum_pg_trigger_tgrelid, BTEqualStrategyNumber,
+ F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(parent)));
+ pg_trigger = table_open(TriggerRelationId, RowExclusiveLock);
+ scan = systable_beginscan(pg_trigger, TriggerRelidNameIndexId,
+ true, NULL, 1, &key);
+
+ perTupCxt = AllocSetContextCreate(CurrentMemoryContext,
+ "clone trig", ALLOCSET_SMALL_SIZES);
+
+ while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+ {
+ Form_pg_trigger trigForm = (Form_pg_trigger) GETSTRUCT(tuple);
+ CreateTrigStmt *trigStmt;
+ Node *qual = NULL;
+ Datum value;
+ bool isnull;
+ List *cols = NIL;
+ List *trigargs = NIL;
+ MemoryContext oldcxt;
+
+ /*
+ * Ignore statement-level triggers; those are not cloned.
+ */
+ if (!TRIGGER_FOR_ROW(trigForm->tgtype))
+ continue;
+
+ /*
+ * Don't clone internal triggers, because the constraint cloning code
+ * will.
+ */
+ if (trigForm->tgisinternal)
+ continue;
+
+ /*
+ * Complain if we find an unexpected trigger type.
+ */
+ if (!TRIGGER_FOR_BEFORE(trigForm->tgtype) &&
+ !TRIGGER_FOR_AFTER(trigForm->tgtype))
+ elog(ERROR, "unexpected trigger \"%s\" found",
+ NameStr(trigForm->tgname));
+
+ /* Use short-lived context for CREATE TRIGGER */
+ oldcxt = MemoryContextSwitchTo(perTupCxt);
+
+ /*
+ * If there is a WHEN clause, generate a 'cooked' version of it that's
+ * appropriate for the partition.
+ */
+ value = heap_getattr(tuple, Anum_pg_trigger_tgqual,
+ RelationGetDescr(pg_trigger), &isnull);
+ if (!isnull)
+ {
+ qual = stringToNode(TextDatumGetCString(value));
+ qual = (Node *) map_partition_varattnos((List *) qual, PRS2_OLD_VARNO,
+ partition, parent);
+ qual = (Node *) map_partition_varattnos((List *) qual, PRS2_NEW_VARNO,
+ partition, parent);
+ }
+
+ /*
+ * If there is a column list, transform it to a list of column names.
+ * Note we don't need to map this list in any way ...
+ */
+ if (trigForm->tgattr.dim1 > 0)
+ {
+ int i;
+
+ for (i = 0; i < trigForm->tgattr.dim1; i++)
+ {
+ Form_pg_attribute col;
+
+ col = TupleDescAttr(parent->rd_att,
+ trigForm->tgattr.values[i] - 1);
+ cols = lappend(cols,
+ makeString(pstrdup(NameStr(col->attname))));
+ }
+ }
+
+ /* Reconstruct trigger arguments list. */
+ if (trigForm->tgnargs > 0)
+ {
+ char *p;
+
+ value = heap_getattr(tuple, Anum_pg_trigger_tgargs,
+ RelationGetDescr(pg_trigger), &isnull);
+ if (isnull)
+ elog(ERROR, "tgargs is null for trigger \"%s\" in partition \"%s\"",
+ NameStr(trigForm->tgname), RelationGetRelationName(partition));
+
+ p = (char *) VARDATA_ANY(DatumGetByteaPP(value));
+
+ for (int i = 0; i < trigForm->tgnargs; i++)
+ {
+ trigargs = lappend(trigargs, makeString(pstrdup(p)));
+ p += strlen(p) + 1;
+ }
+ }
+
+ trigStmt = makeNode(CreateTrigStmt);
+ trigStmt->replace = false;
+ trigStmt->isconstraint = OidIsValid(trigForm->tgconstraint);
+ trigStmt->trigname = NameStr(trigForm->tgname);
+ trigStmt->relation = NULL;
+ trigStmt->funcname = NULL; /* passed separately */
+ trigStmt->args = trigargs;
+ trigStmt->row = true;
+ trigStmt->timing = trigForm->tgtype & TRIGGER_TYPE_TIMING_MASK;
+ trigStmt->events = trigForm->tgtype & TRIGGER_TYPE_EVENT_MASK;
+ trigStmt->columns = cols;
+ trigStmt->whenClause = NULL; /* passed separately */
+ trigStmt->transitionRels = NIL; /* not supported at present */
+ trigStmt->deferrable = trigForm->tgdeferrable;
+ trigStmt->initdeferred = trigForm->tginitdeferred;
+ trigStmt->constrrel = NULL; /* passed separately */
+
+ CreateTriggerFiringOn(trigStmt, NULL, RelationGetRelid(partition),
+ trigForm->tgconstrrelid, InvalidOid, InvalidOid,
+ trigForm->tgfoid, trigForm->oid, qual,
+ false, true, trigForm->tgenabled);
+
+ MemoryContextSwitchTo(oldcxt);
+ MemoryContextReset(perTupCxt);
+ }
+
+ MemoryContextDelete(perTupCxt);
+
+ systable_endscan(scan);
+ table_close(pg_trigger, RowExclusiveLock);
+}
+
+/*
+ * ALTER TABLE DETACH PARTITION
+ *
+ * Return the address of the relation that is no longer a partition of rel.
+ *
+ * If concurrent mode is requested, we run in two transactions. A side-
+ * effect is that this command cannot run in a multi-part ALTER TABLE.
+ * Currently, that's enforced by the grammar.
+ *
+ * The strategy for concurrency is to first modify the partition's
+ * pg_inherit catalog row to make it visible to everyone that the
+ * partition is detached, lock the partition against writes, and commit
+ * the transaction; anyone who requests the partition descriptor from
+ * that point onwards has to ignore such a partition. In a second
+ * transaction, we wait until all transactions that could have seen the
+ * partition as attached are gone, then we remove the rest of partition
+ * metadata (pg_inherits and pg_class.relpartbounds).
+ */
+static ObjectAddress
+ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel,
+ RangeVar *name, bool concurrent)
+{
+ Relation partRel;
+ ObjectAddress address;
+ Oid defaultPartOid;
+
+ /*
+ * We must lock the default partition, because detaching this partition
+ * will change its partition constraint.
+ */
+ defaultPartOid =
+ get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, true));
+ if (OidIsValid(defaultPartOid))
+ {
+ /*
+ * Concurrent detaching when a default partition exists is not
+ * supported. The main problem is that the default partition
+ * constraint would change. And there's a definitional problem: what
+ * should happen to the tuples that are being inserted that belong to
+ * the partition being detached? Putting them on the partition being
+ * detached would be wrong, since they'd become "lost" after the
+ * detaching completes but we cannot put them in the default partition
+ * either until we alter its partition constraint.
+ *
+ * I think we could solve this problem if we effected the constraint
+ * change before committing the first transaction. But the lock would
+ * have to remain AEL and it would cause concurrent query planning to
+ * be blocked, so changing it that way would be even worse.
+ */
+ if (concurrent)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot detach partitions concurrently when a default partition exists")));
+ LockRelationOid(defaultPartOid, AccessExclusiveLock);
+ }
+
+ /*
+ * In concurrent mode, the partition is locked with share-update-exclusive
+ * in the first transaction. This allows concurrent transactions to be
+ * doing DML to the partition.
+ */
+ partRel = table_openrv(name, concurrent ? ShareUpdateExclusiveLock :
+ AccessExclusiveLock);
+
+ /*
+ * Check inheritance conditions and either delete the pg_inherits row (in
+ * non-concurrent mode) or just set the inhdetachpending flag.
+ */
+ if (!concurrent)
+ RemoveInheritance(partRel, rel, false);
+ else
+ MarkInheritDetached(partRel, rel);
+
+ /*
+ * Ensure that foreign keys still hold after this detach. This keeps
+ * locks on the referencing tables, which prevents concurrent transactions
+ * from adding rows that we wouldn't see. For this to work in concurrent
+ * mode, it is critical that the partition appears as no longer attached
+ * for the RI queries as soon as the first transaction commits.
+ */
+ ATDetachCheckNoForeignKeyRefs(partRel);
+
+ /*
+ * Concurrent mode has to work harder; first we add a new constraint to
+ * the partition that matches the partition constraint. Then we close our
+ * existing transaction, and in a new one wait for all processes to catch
+ * up on the catalog updates we've done so far; at that point we can
+ * complete the operation.
+ */
+ if (concurrent)
+ {
+ Oid partrelid,
+ parentrelid;
+ LOCKTAG tag;
+ char *parentrelname;
+ char *partrelname;
+
+ /*
+ * Add a new constraint to the partition being detached, which
+ * supplants the partition constraint (unless there is one already).
+ */
+ DetachAddConstraintIfNeeded(wqueue, partRel);
+
+ /*
+ * We're almost done now; the only traces that remain are the
+ * pg_inherits tuple and the partition's relpartbounds. Before we can
+ * remove those, we need to wait until all transactions that know that
+ * this is a partition are gone.
+ */
+
+ /*
+ * Remember relation OIDs to re-acquire them later; and relation names
+ * too, for error messages if something is dropped in between.
+ */
+ partrelid = RelationGetRelid(partRel);
+ parentrelid = RelationGetRelid(rel);
+ parentrelname = MemoryContextStrdup(PortalContext,
+ RelationGetRelationName(rel));
+ partrelname = MemoryContextStrdup(PortalContext,
+ RelationGetRelationName(partRel));
+
+ /* Invalidate relcache entries for the parent -- must be before close */
+ CacheInvalidateRelcache(rel);
+
+ table_close(partRel, NoLock);
+ table_close(rel, NoLock);
+ tab->rel = NULL;
+
+ /* Make updated catalog entry visible */
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+
+ StartTransactionCommand();
+
+ /*
+ * Now wait. This ensures that all queries that were planned
+ * including the partition are finished before we remove the rest of
+ * catalog entries. We don't need or indeed want to acquire this
+ * lock, though -- that would block later queries.
+ *
+ * We don't need to concern ourselves with waiting for a lock on the
+ * partition itself, since we will acquire AccessExclusiveLock below.
+ */
+ SET_LOCKTAG_RELATION(tag, MyDatabaseId, parentrelid);
+ WaitForLockersMultiple(list_make1(&tag), AccessExclusiveLock, false);
+
+ /*
+ * Now acquire locks in both relations again. Note they may have been
+ * removed in the meantime, so care is required.
+ */
+ rel = try_relation_open(parentrelid, ShareUpdateExclusiveLock);
+ partRel = try_relation_open(partrelid, AccessExclusiveLock);
+
+ /* If the relations aren't there, something bad happened; bail out */
+ if (rel == NULL)
+ {
+ if (partRel != NULL) /* shouldn't happen */
+ elog(WARNING, "dangling partition \"%s\" remains, can't fix",
+ partrelname);
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("partitioned table \"%s\" was removed concurrently",
+ parentrelname)));
+ }
+ if (partRel == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("partition \"%s\" was removed concurrently", partrelname)));
+
+ tab->rel = rel;
+ }
+
+ /* Do the final part of detaching */
+ DetachPartitionFinalize(rel, partRel, concurrent, defaultPartOid);
+
+ ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
+
+ /* keep our lock until commit */
+ table_close(partRel, NoLock);
+
+ return address;
+}
+
+/*
+ * Second part of ALTER TABLE .. DETACH.
+ *
+ * This is separate so that it can be run independently when the second
+ * transaction of the concurrent algorithm fails (crash or abort).
+ */
+static void
+DetachPartitionFinalize(Relation rel, Relation partRel, bool concurrent,
+ Oid defaultPartOid)
+{
+ Relation classRel;
+ List *fks;
+ ListCell *cell;
+ List *indexes;
+ Datum new_val[Natts_pg_class];
+ bool new_null[Natts_pg_class],
+ new_repl[Natts_pg_class];
+ HeapTuple tuple,
+ newtuple;
+ Relation trigrel = NULL;
+
+ if (concurrent)
+ {
+ /*
+ * We can remove the pg_inherits row now. (In the non-concurrent case,
+ * this was already done).
+ */
+ RemoveInheritance(partRel, rel, true);
+ }
+
+ /* Drop any triggers that were cloned on creation/attach. */
+ DropClonedTriggersFromPartition(RelationGetRelid(partRel));
+
+ /*
+ * Detach any foreign keys that are inherited. This includes creating
+ * additional action triggers.
+ */
+ fks = copyObject(RelationGetFKeyList(partRel));
+ if (fks != NIL)
+ trigrel = table_open(TriggerRelationId, RowExclusiveLock);
+ foreach(cell, fks)
+ {
+ ForeignKeyCacheInfo *fk = lfirst(cell);
+ HeapTuple contup;
+ Form_pg_constraint conform;
+ Constraint *fkconstraint;
+ Oid insertTriggerOid,
+ updateTriggerOid;
+
+ contup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(fk->conoid));
+ if (!HeapTupleIsValid(contup))
+ elog(ERROR, "cache lookup failed for constraint %u", fk->conoid);
+ conform = (Form_pg_constraint) GETSTRUCT(contup);
+
+ /* consider only the inherited foreign keys */
+ if (conform->contype != CONSTRAINT_FOREIGN ||
+ !OidIsValid(conform->conparentid))
+ {
+ ReleaseSysCache(contup);
+ continue;
+ }
+
+ /* unset conparentid and adjust conislocal, coninhcount, etc. */
+ ConstraintSetParentConstraint(fk->conoid, InvalidOid, InvalidOid);
+
+ /*
+ * Also, look up the partition's "check" triggers corresponding to the
+ * constraint being detached and detach them from the parent triggers.
+ */
+ GetForeignKeyCheckTriggers(trigrel,
+ fk->conoid, fk->confrelid, fk->conrelid,
+ &insertTriggerOid, &updateTriggerOid);
+ Assert(OidIsValid(insertTriggerOid));
+ TriggerSetParentTrigger(trigrel, insertTriggerOid, InvalidOid,
+ RelationGetRelid(partRel));
+ Assert(OidIsValid(updateTriggerOid));
+ TriggerSetParentTrigger(trigrel, updateTriggerOid, InvalidOid,
+ RelationGetRelid(partRel));
+
+ /*
+ * Make the action triggers on the referenced relation. When this was
+ * a partition the action triggers pointed to the parent rel (they
+ * still do), but now we need separate ones of our own.
+ */
+ fkconstraint = makeNode(Constraint);
+ fkconstraint->contype = CONSTRAINT_FOREIGN;
+ fkconstraint->conname = pstrdup(NameStr(conform->conname));
+ fkconstraint->deferrable = conform->condeferrable;
+ fkconstraint->initdeferred = conform->condeferred;
+ fkconstraint->location = -1;
+ fkconstraint->pktable = NULL;
+ fkconstraint->fk_attrs = NIL;
+ fkconstraint->pk_attrs = NIL;
+ fkconstraint->fk_matchtype = conform->confmatchtype;
+ fkconstraint->fk_upd_action = conform->confupdtype;
+ fkconstraint->fk_del_action = conform->confdeltype;
+ fkconstraint->fk_del_set_cols = NIL;
+ fkconstraint->old_conpfeqop = NIL;
+ fkconstraint->old_pktable_oid = InvalidOid;
+ fkconstraint->skip_validation = false;
+ fkconstraint->initially_valid = true;
+
+ createForeignKeyActionTriggers(partRel, conform->confrelid,
+ fkconstraint, fk->conoid,
+ conform->conindid,
+ InvalidOid, InvalidOid,
+ NULL, NULL);
+
+ ReleaseSysCache(contup);
+ }
+ list_free_deep(fks);
+ if (trigrel)
+ table_close(trigrel, RowExclusiveLock);
+
+ /*
+ * Any sub-constraints that are in the referenced-side of a larger
+ * constraint have to be removed. This partition is no longer part of the
+ * key space of the constraint.
+ */
+ foreach(cell, GetParentedForeignKeyRefs(partRel))
+ {
+ Oid constrOid = lfirst_oid(cell);
+ ObjectAddress constraint;
+
+ ConstraintSetParentConstraint(constrOid, InvalidOid, InvalidOid);
+ deleteDependencyRecordsForClass(ConstraintRelationId,
+ constrOid,
+ ConstraintRelationId,
+ DEPENDENCY_INTERNAL);
+ CommandCounterIncrement();
+
+ ObjectAddressSet(constraint, ConstraintRelationId, constrOid);
+ performDeletion(&constraint, DROP_RESTRICT, 0);
+ }
+
+ /* Now we can detach indexes */
+ indexes = RelationGetIndexList(partRel);
+ foreach(cell, indexes)
+ {
+ Oid idxid = lfirst_oid(cell);
+ Relation idx;
+ Oid constrOid;
+
+ if (!has_superclass(idxid))
+ continue;
+
+ Assert((IndexGetRelation(get_partition_parent(idxid, false), false) ==
+ RelationGetRelid(rel)));
+
+ idx = index_open(idxid, AccessExclusiveLock);
+ IndexSetParentIndex(idx, InvalidOid);
+
+ /* If there's a constraint associated with the index, detach it too */
+ constrOid = get_relation_idx_constraint_oid(RelationGetRelid(partRel),
+ idxid);
+ if (OidIsValid(constrOid))
+ ConstraintSetParentConstraint(constrOid, InvalidOid, InvalidOid);
+
+ index_close(idx, NoLock);
+ }
+
+ /* Update pg_class tuple */
+ classRel = table_open(RelationRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheCopy1(RELOID,
+ ObjectIdGetDatum(RelationGetRelid(partRel)));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u",
+ RelationGetRelid(partRel));
+ Assert(((Form_pg_class) GETSTRUCT(tuple))->relispartition);
+
+ /* Clear relpartbound and reset relispartition */
+ memset(new_val, 0, sizeof(new_val));
+ memset(new_null, false, sizeof(new_null));
+ memset(new_repl, false, sizeof(new_repl));
+ new_val[Anum_pg_class_relpartbound - 1] = (Datum) 0;
+ new_null[Anum_pg_class_relpartbound - 1] = true;
+ new_repl[Anum_pg_class_relpartbound - 1] = true;
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel),
+ new_val, new_null, new_repl);
+
+ ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = false;
+ CatalogTupleUpdate(classRel, &newtuple->t_self, newtuple);
+ heap_freetuple(newtuple);
+ table_close(classRel, RowExclusiveLock);
+
+ if (OidIsValid(defaultPartOid))
+ {
+ /*
+ * If the relation being detached is the default partition itself,
+ * remove it from the parent's pg_partitioned_table entry.
+ *
+ * If not, we must invalidate default partition's relcache entry, as
+ * in StorePartitionBound: its partition constraint depends on every
+ * other partition's partition constraint.
+ */
+ if (RelationGetRelid(partRel) == defaultPartOid)
+ update_default_partition_oid(RelationGetRelid(rel), InvalidOid);
+ else
+ CacheInvalidateRelcacheByRelid(defaultPartOid);
+ }
+
+ /*
+ * Invalidate the parent's relcache so that the partition is no longer
+ * included in its partition descriptor.
+ */
+ CacheInvalidateRelcache(rel);
+
+ /*
+ * If the partition we just detached is partitioned itself, invalidate
+ * relcache for all descendent partitions too to ensure that their
+ * rd_partcheck expression trees are rebuilt; must lock partitions before
+ * doing so, using the same lockmode as what partRel has been locked with
+ * by the caller.
+ */
+ if (partRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ List *children;
+
+ children = find_all_inheritors(RelationGetRelid(partRel),
+ AccessExclusiveLock, NULL);
+ foreach(cell, children)
+ {
+ CacheInvalidateRelcacheByRelid(lfirst_oid(cell));
+ }
+ }
+}
+
+/*
+ * ALTER TABLE ... DETACH PARTITION ... FINALIZE
+ *
+ * To use when a DETACH PARTITION command previously did not run to
+ * completion; this completes the detaching process.
+ */
+static ObjectAddress
+ATExecDetachPartitionFinalize(Relation rel, RangeVar *name)
+{
+ Relation partRel;
+ ObjectAddress address;
+ Snapshot snap = GetActiveSnapshot();
+
+ partRel = table_openrv(name, AccessExclusiveLock);
+
+ /*
+ * Wait until existing snapshots are gone. This is important if the
+ * second transaction of DETACH PARTITION CONCURRENTLY is canceled: the
+ * user could immediately run DETACH FINALIZE without actually waiting for
+ * existing transactions. We must not complete the detach action until
+ * all such queries are complete (otherwise we would present them with an
+ * inconsistent view of catalogs).
+ */
+ WaitForOlderSnapshots(snap->xmin, false);
+
+ DetachPartitionFinalize(rel, partRel, true, InvalidOid);
+
+ ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
+
+ table_close(partRel, NoLock);
+
+ return address;
+}
+
+/*
+ * DetachAddConstraintIfNeeded
+ * Subroutine for ATExecDetachPartition. Create a constraint that
+ * takes the place of the partition constraint, but avoid creating
+ * a dupe if an constraint already exists which implies the needed
+ * constraint.
+ */
+static void
+DetachAddConstraintIfNeeded(List **wqueue, Relation partRel)
+{
+ List *constraintExpr;
+
+ constraintExpr = RelationGetPartitionQual(partRel);
+ constraintExpr = (List *) eval_const_expressions(NULL, (Node *) constraintExpr);
+
+ /*
+ * Avoid adding a new constraint if the needed constraint is implied by an
+ * existing constraint
+ */
+ if (!PartConstraintImpliedByRelConstraint(partRel, constraintExpr))
+ {
+ AlteredTableInfo *tab;
+ Constraint *n;
+
+ tab = ATGetQueueEntry(wqueue, partRel);
+
+ /* Add constraint on partition, equivalent to the partition constraint */
+ n = makeNode(Constraint);
+ n->contype = CONSTR_CHECK;
+ n->conname = NULL;
+ n->location = -1;
+ n->is_no_inherit = false;
+ n->raw_expr = NULL;
+ n->cooked_expr = nodeToString(make_ands_explicit(constraintExpr));
+ n->initially_valid = true;
+ n->skip_validation = true;
+ /* It's a re-add, since it nominally already exists */
+ ATAddCheckConstraint(wqueue, tab, partRel, n,
+ true, false, true, ShareUpdateExclusiveLock);
+ }
+}
+
+/*
+ * DropClonedTriggersFromPartition
+ * subroutine for ATExecDetachPartition to remove any triggers that were
+ * cloned to the partition when it was created-as-partition or attached.
+ * This undoes what CloneRowTriggersToPartition did.
+ */
+static void
+DropClonedTriggersFromPartition(Oid partitionId)
+{
+ ScanKeyData skey;
+ SysScanDesc scan;
+ HeapTuple trigtup;
+ Relation tgrel;
+ ObjectAddresses *objects;
+
+ objects = new_object_addresses();
+
+ /*
+ * Scan pg_trigger to search for all triggers on this rel.
+ */
+ ScanKeyInit(&skey, Anum_pg_trigger_tgrelid, BTEqualStrategyNumber,
+ F_OIDEQ, ObjectIdGetDatum(partitionId));
+ tgrel = table_open(TriggerRelationId, RowExclusiveLock);
+ scan = systable_beginscan(tgrel, TriggerRelidNameIndexId,
+ true, NULL, 1, &skey);
+ while (HeapTupleIsValid(trigtup = systable_getnext(scan)))
+ {
+ Form_pg_trigger pg_trigger = (Form_pg_trigger) GETSTRUCT(trigtup);
+ ObjectAddress trig;
+
+ /* Ignore triggers that weren't cloned */
+ if (!OidIsValid(pg_trigger->tgparentid))
+ continue;
+
+ /*
+ * Ignore internal triggers that are implementation objects of foreign
+ * keys, because these will be detached when the foreign keys
+ * themselves are.
+ */
+ if (OidIsValid(pg_trigger->tgconstrrelid))
+ continue;
+
+ /*
+ * This is ugly, but necessary: remove the dependency markings on the
+ * trigger so that it can be removed.
+ */
+ deleteDependencyRecordsForClass(TriggerRelationId, pg_trigger->oid,
+ TriggerRelationId,
+ DEPENDENCY_PARTITION_PRI);
+ deleteDependencyRecordsForClass(TriggerRelationId, pg_trigger->oid,
+ RelationRelationId,
+ DEPENDENCY_PARTITION_SEC);
+
+ /* remember this trigger to remove it below */
+ ObjectAddressSet(trig, TriggerRelationId, pg_trigger->oid);
+ add_exact_object_address(&trig, objects);
+ }
+
+ /* make the dependency removal visible to the deletion below */
+ CommandCounterIncrement();
+ performMultipleDeletions(objects, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
+
+ /* done */
+ free_object_addresses(objects);
+ systable_endscan(scan);
+ table_close(tgrel, RowExclusiveLock);
+}
+
+/*
+ * Before acquiring lock on an index, acquire the same lock on the owning
+ * table.
+ */
+struct AttachIndexCallbackState
+{
+ Oid partitionOid;
+ Oid parentTblOid;
+ bool lockedParentTbl;
+};
+
+static void
+RangeVarCallbackForAttachIndex(const RangeVar *rv, Oid relOid, Oid oldRelOid,
+ void *arg)
+{
+ struct AttachIndexCallbackState *state;
+ Form_pg_class classform;
+ HeapTuple tuple;
+
+ state = (struct AttachIndexCallbackState *) arg;
+
+ if (!state->lockedParentTbl)
+ {
+ LockRelationOid(state->parentTblOid, AccessShareLock);
+ state->lockedParentTbl = true;
+ }
+
+ /*
+ * If we previously locked some other heap, and the name we're looking up
+ * no longer refers to an index on that relation, release the now-useless
+ * lock. XXX maybe we should do *after* we verify whether the index does
+ * not actually belong to the same relation ...
+ */
+ if (relOid != oldRelOid && OidIsValid(state->partitionOid))
+ {
+ UnlockRelationOid(state->partitionOid, AccessShareLock);
+ state->partitionOid = InvalidOid;
+ }
+
+ /* Didn't find a relation, so no need for locking or permission checks. */
+ if (!OidIsValid(relOid))
+ return;
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relOid));
+ if (!HeapTupleIsValid(tuple))
+ return; /* concurrently dropped, so nothing to do */
+ classform = (Form_pg_class) GETSTRUCT(tuple);
+ if (classform->relkind != RELKIND_PARTITIONED_INDEX &&
+ classform->relkind != RELKIND_INDEX)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("\"%s\" is not an index", rv->relname)));
+ ReleaseSysCache(tuple);
+
+ /*
+ * Since we need only examine the heap's tupledesc, an access share lock
+ * on it (preventing any DDL) is sufficient.
+ */
+ state->partitionOid = IndexGetRelation(relOid, false);
+ LockRelationOid(state->partitionOid, AccessShareLock);
+}
+
+/*
+ * ALTER INDEX i1 ATTACH PARTITION i2
+ */
+static ObjectAddress
+ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name)
+{
+ Relation partIdx;
+ Relation partTbl;
+ Relation parentTbl;
+ ObjectAddress address;
+ Oid partIdxId;
+ Oid currParent;
+ struct AttachIndexCallbackState state;
+
+ /*
+ * We need to obtain lock on the index 'name' to modify it, but we also
+ * need to read its owning table's tuple descriptor -- so we need to lock
+ * both. To avoid deadlocks, obtain lock on the table before doing so on
+ * the index. Furthermore, we need to examine the parent table of the
+ * partition, so lock that one too.
+ */
+ state.partitionOid = InvalidOid;
+ state.parentTblOid = parentIdx->rd_index->indrelid;
+ state.lockedParentTbl = false;
+ partIdxId =
+ RangeVarGetRelidExtended(name, AccessExclusiveLock, 0,
+ RangeVarCallbackForAttachIndex,
+ (void *) &state);
+ /* Not there? */
+ if (!OidIsValid(partIdxId))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("index \"%s\" does not exist", name->relname)));
+
+ /* no deadlock risk: RangeVarGetRelidExtended already acquired the lock */
+ partIdx = relation_open(partIdxId, AccessExclusiveLock);
+
+ /* we already hold locks on both tables, so this is safe: */
+ parentTbl = relation_open(parentIdx->rd_index->indrelid, AccessShareLock);
+ partTbl = relation_open(partIdx->rd_index->indrelid, NoLock);
+
+ ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partIdx));
+
+ /* Silently do nothing if already in the right state */
+ currParent = partIdx->rd_rel->relispartition ?
+ get_partition_parent(partIdxId, false) : InvalidOid;
+ if (currParent != RelationGetRelid(parentIdx))
+ {
+ IndexInfo *childInfo;
+ IndexInfo *parentInfo;
+ AttrMap *attmap;
+ bool found;
+ int i;
+ PartitionDesc partDesc;
+ Oid constraintOid,
+ cldConstrId = InvalidOid;
+
+ /*
+ * If this partition already has an index attached, refuse the
+ * operation.
+ */
+ refuseDupeIndexAttach(parentIdx, partIdx, partTbl);
+
+ if (OidIsValid(currParent))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot attach index \"%s\" as a partition of index \"%s\"",
+ RelationGetRelationName(partIdx),
+ RelationGetRelationName(parentIdx)),
+ errdetail("Index \"%s\" is already attached to another index.",
+ RelationGetRelationName(partIdx))));
+
+ /* Make sure it indexes a partition of the other index's table */
+ partDesc = RelationGetPartitionDesc(parentTbl, true);
+ found = false;
+ for (i = 0; i < partDesc->nparts; i++)
+ {
+ if (partDesc->oids[i] == state.partitionOid)
+ {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot attach index \"%s\" as a partition of index \"%s\"",
+ RelationGetRelationName(partIdx),
+ RelationGetRelationName(parentIdx)),
+ errdetail("Index \"%s\" is not an index on any partition of table \"%s\".",
+ RelationGetRelationName(partIdx),
+ RelationGetRelationName(parentTbl))));
+
+ /* Ensure the indexes are compatible */
+ childInfo = BuildIndexInfo(partIdx);
+ parentInfo = BuildIndexInfo(parentIdx);
+ attmap = build_attrmap_by_name(RelationGetDescr(partTbl),
+ RelationGetDescr(parentTbl));
+ if (!CompareIndexInfo(childInfo, parentInfo,
+ partIdx->rd_indcollation,
+ parentIdx->rd_indcollation,
+ partIdx->rd_opfamily,
+ parentIdx->rd_opfamily,
+ attmap))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot attach index \"%s\" as a partition of index \"%s\"",
+ RelationGetRelationName(partIdx),
+ RelationGetRelationName(parentIdx)),
+ errdetail("The index definitions do not match.")));
+
+ /*
+ * If there is a constraint in the parent, make sure there is one in
+ * the child too.
+ */
+ constraintOid = get_relation_idx_constraint_oid(RelationGetRelid(parentTbl),
+ RelationGetRelid(parentIdx));
+
+ if (OidIsValid(constraintOid))
+ {
+ cldConstrId = get_relation_idx_constraint_oid(RelationGetRelid(partTbl),
+ partIdxId);
+ if (!OidIsValid(cldConstrId))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot attach index \"%s\" as a partition of index \"%s\"",
+ RelationGetRelationName(partIdx),
+ RelationGetRelationName(parentIdx)),
+ errdetail("The index \"%s\" belongs to a constraint in table \"%s\" but no constraint exists for index \"%s\".",
+ RelationGetRelationName(parentIdx),
+ RelationGetRelationName(parentTbl),
+ RelationGetRelationName(partIdx))));
+ }
+
+ /* All good -- do it */
+ IndexSetParentIndex(partIdx, RelationGetRelid(parentIdx));
+ if (OidIsValid(constraintOid))
+ ConstraintSetParentConstraint(cldConstrId, constraintOid,
+ RelationGetRelid(partTbl));
+
+ free_attrmap(attmap);
+
+ validatePartitionedIndex(parentIdx, parentTbl);
+ }
+
+ relation_close(parentTbl, AccessShareLock);
+ /* keep these locks till commit */
+ relation_close(partTbl, NoLock);
+ relation_close(partIdx, NoLock);
+
+ return address;
+}
+
+/*
+ * Verify whether the given partition already contains an index attached
+ * to the given partitioned index. If so, raise an error.
+ */
+static void
+refuseDupeIndexAttach(Relation parentIdx, Relation partIdx, Relation partitionTbl)
+{
+ Oid existingIdx;
+
+ existingIdx = index_get_partition(partitionTbl,
+ RelationGetRelid(parentIdx));
+ if (OidIsValid(existingIdx))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot attach index \"%s\" as a partition of index \"%s\"",
+ RelationGetRelationName(partIdx),
+ RelationGetRelationName(parentIdx)),
+ errdetail("Another index is already attached for partition \"%s\".",
+ RelationGetRelationName(partitionTbl))));
+}
+
+/*
+ * Verify whether the set of attached partition indexes to a parent index on
+ * a partitioned table is complete. If it is, mark the parent index valid.
+ *
+ * This should be called each time a partition index is attached.
+ */
+static void
+validatePartitionedIndex(Relation partedIdx, Relation partedTbl)
+{
+ Relation inheritsRel;
+ SysScanDesc scan;
+ ScanKeyData key;
+ int tuples = 0;
+ HeapTuple inhTup;
+ bool updated = false;
+
+ Assert(partedIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX);
+
+ /*
+ * Scan pg_inherits for this parent index. Count each valid index we find
+ * (verifying the pg_index entry for each), and if we reach the total
+ * amount we expect, we can mark this parent index as valid.
+ */
+ inheritsRel = table_open(InheritsRelationId, AccessShareLock);
+ ScanKeyInit(&key, Anum_pg_inherits_inhparent,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(partedIdx)));
+ scan = systable_beginscan(inheritsRel, InheritsParentIndexId, true,
+ NULL, 1, &key);
+ while ((inhTup = systable_getnext(scan)) != NULL)
+ {
+ Form_pg_inherits inhForm = (Form_pg_inherits) GETSTRUCT(inhTup);
+ HeapTuple indTup;
+ Form_pg_index indexForm;
+
+ indTup = SearchSysCache1(INDEXRELID,
+ ObjectIdGetDatum(inhForm->inhrelid));
+ if (!HeapTupleIsValid(indTup))
+ elog(ERROR, "cache lookup failed for index %u", inhForm->inhrelid);
+ indexForm = (Form_pg_index) GETSTRUCT(indTup);
+ if (indexForm->indisvalid)
+ tuples += 1;
+ ReleaseSysCache(indTup);
+ }
+
+ /* Done with pg_inherits */
+ systable_endscan(scan);
+ table_close(inheritsRel, AccessShareLock);
+
+ /*
+ * If we found as many inherited indexes as the partitioned table has
+ * partitions, we're good; update pg_index to set indisvalid.
+ */
+ if (tuples == RelationGetPartitionDesc(partedTbl, true)->nparts)
+ {
+ Relation idxRel;
+ HeapTuple indTup;
+ Form_pg_index indexForm;
+
+ idxRel = table_open(IndexRelationId, RowExclusiveLock);
+ indTup = SearchSysCacheCopy1(INDEXRELID,
+ ObjectIdGetDatum(RelationGetRelid(partedIdx)));
+ if (!HeapTupleIsValid(indTup))
+ elog(ERROR, "cache lookup failed for index %u",
+ RelationGetRelid(partedIdx));
+ indexForm = (Form_pg_index) GETSTRUCT(indTup);
+
+ indexForm->indisvalid = true;
+ updated = true;
+
+ CatalogTupleUpdate(idxRel, &indTup->t_self, indTup);
+
+ table_close(idxRel, RowExclusiveLock);
+ heap_freetuple(indTup);
+ }
+
+ /*
+ * If this index is in turn a partition of a larger index, validating it
+ * might cause the parent to become valid also. Try that.
+ */
+ if (updated && partedIdx->rd_rel->relispartition)
+ {
+ Oid parentIdxId,
+ parentTblId;
+ Relation parentIdx,
+ parentTbl;
+
+ /* make sure we see the validation we just did */
+ CommandCounterIncrement();
+
+ parentIdxId = get_partition_parent(RelationGetRelid(partedIdx), false);
+ parentTblId = get_partition_parent(RelationGetRelid(partedTbl), false);
+ parentIdx = relation_open(parentIdxId, AccessExclusiveLock);
+ parentTbl = relation_open(parentTblId, AccessExclusiveLock);
+ Assert(!parentIdx->rd_index->indisvalid);
+
+ validatePartitionedIndex(parentIdx, parentTbl);
+
+ relation_close(parentIdx, AccessExclusiveLock);
+ relation_close(parentTbl, AccessExclusiveLock);
+ }
+}
+
+/*
+ * Return an OID list of constraints that reference the given relation
+ * that are marked as having a parent constraints.
+ */
+static List *
+GetParentedForeignKeyRefs(Relation partition)
+{
+ Relation pg_constraint;
+ HeapTuple tuple;
+ SysScanDesc scan;
+ ScanKeyData key[2];
+ List *constraints = NIL;
+
+ /*
+ * If no indexes, or no columns are referenceable by FKs, we can avoid the
+ * scan.
+ */
+ if (RelationGetIndexList(partition) == NIL ||
+ bms_is_empty(RelationGetIndexAttrBitmap(partition,
+ INDEX_ATTR_BITMAP_KEY)))
+ return NIL;
+
+ /* Search for constraints referencing this table */
+ pg_constraint = table_open(ConstraintRelationId, AccessShareLock);
+ ScanKeyInit(&key[0],
+ Anum_pg_constraint_confrelid, BTEqualStrategyNumber,
+ F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(partition)));
+ ScanKeyInit(&key[1],
+ Anum_pg_constraint_contype, BTEqualStrategyNumber,
+ F_CHAREQ, CharGetDatum(CONSTRAINT_FOREIGN));
+
+ /* XXX This is a seqscan, as we don't have a usable index */
+ scan = systable_beginscan(pg_constraint, InvalidOid, true, NULL, 2, key);
+ while ((tuple = systable_getnext(scan)) != NULL)
+ {
+ Form_pg_constraint constrForm = (Form_pg_constraint) GETSTRUCT(tuple);
+
+ /*
+ * We only need to process constraints that are part of larger ones.
+ */
+ if (!OidIsValid(constrForm->conparentid))
+ continue;
+
+ constraints = lappend_oid(constraints, constrForm->oid);
+ }
+
+ systable_endscan(scan);
+ table_close(pg_constraint, AccessShareLock);
+
+ return constraints;
+}
+
+/*
+ * During DETACH PARTITION, verify that any foreign keys pointing to the
+ * partitioned table would not become invalid. An error is raised if any
+ * referenced values exist.
+ */
+static void
+ATDetachCheckNoForeignKeyRefs(Relation partition)
+{
+ List *constraints;
+ ListCell *cell;
+
+ constraints = GetParentedForeignKeyRefs(partition);
+
+ foreach(cell, constraints)
+ {
+ Oid constrOid = lfirst_oid(cell);
+ HeapTuple tuple;
+ Form_pg_constraint constrForm;
+ Relation rel;
+ Trigger trig;
+
+ tuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(constrOid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for constraint %u", constrOid);
+ constrForm = (Form_pg_constraint) GETSTRUCT(tuple);
+
+ Assert(OidIsValid(constrForm->conparentid));
+ Assert(constrForm->confrelid == RelationGetRelid(partition));
+
+ /* prevent data changes into the referencing table until commit */
+ rel = table_open(constrForm->conrelid, ShareLock);
+
+ MemSet(&trig, 0, sizeof(trig));
+ trig.tgoid = InvalidOid;
+ trig.tgname = NameStr(constrForm->conname);
+ trig.tgenabled = TRIGGER_FIRES_ON_ORIGIN;
+ trig.tgisinternal = true;
+ trig.tgconstrrelid = RelationGetRelid(partition);
+ trig.tgconstrindid = constrForm->conindid;
+ trig.tgconstraint = constrForm->oid;
+ trig.tgdeferrable = false;
+ trig.tginitdeferred = false;
+ /* we needn't fill in remaining fields */
+
+ RI_PartitionRemove_Check(&trig, rel, partition);
+
+ ReleaseSysCache(tuple);
+
+ table_close(rel, NoLock);
+ }
+}
+
+/*
+ * resolve column compression specification to compression method.
+ */
+static char
+GetAttributeCompression(Oid atttypid, char *compression)
+{
+ char cmethod;
+
+ if (compression == NULL || strcmp(compression, "default") == 0)
+ return InvalidCompressionMethod;
+
+ /*
+ * To specify a nondefault method, the column data type must be toastable.
+ * Note this says nothing about whether the column's attstorage setting
+ * permits compression; we intentionally allow attstorage and
+ * attcompression to be independent. But with a non-toastable type,
+ * attstorage could not be set to a value that would permit compression.
+ *
+ * We don't actually need to enforce this, since nothing bad would happen
+ * if attcompression were non-default; it would never be consulted. But
+ * it seems more user-friendly to complain about a certainly-useless
+ * attempt to set the property.
+ */
+ if (!TypeIsToastable(atttypid))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("column data type %s does not support compression",
+ format_type_be(atttypid))));
+
+ cmethod = CompressionNameToMethod(compression);
+ if (!CompressionMethodIsValid(cmethod))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid compression method \"%s\"", compression)));
+
+ return cmethod;
+}
diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c
new file mode 100644
index 0000000..9bdfef9
--- /dev/null
+++ b/src/backend/commands/tablespace.c
@@ -0,0 +1,1595 @@
+/*-------------------------------------------------------------------------
+ *
+ * tablespace.c
+ * Commands to manipulate table spaces
+ *
+ * Tablespaces in PostgreSQL are designed to allow users to determine
+ * where the data file(s) for a given database object reside on the file
+ * system.
+ *
+ * A tablespace represents a directory on the file system. At tablespace
+ * creation time, the directory must be empty. To simplify things and
+ * remove the possibility of having file name conflicts, we isolate
+ * files within a tablespace into database-specific subdirectories.
+ *
+ * To support file access via the information given in RelFileNode, we
+ * maintain a symbolic-link map in $PGDATA/pg_tblspc. The symlinks are
+ * named by tablespace OIDs and point to the actual tablespace directories.
+ * There is also a per-cluster version directory in each tablespace.
+ * Thus the full path to an arbitrary file is
+ * $PGDATA/pg_tblspc/spcoid/PG_MAJORVER_CATVER/dboid/relfilenode
+ * e.g.
+ * $PGDATA/pg_tblspc/20981/PG_9.0_201002161/719849/83292814
+ *
+ * There are two tablespaces created at initdb time: pg_global (for shared
+ * tables) and pg_default (for everything else). For backwards compatibility
+ * and to remain functional on platforms without symlinks, these tablespaces
+ * are accessed specially: they are respectively
+ * $PGDATA/global/relfilenode
+ * $PGDATA/base/dboid/relfilenode
+ *
+ * To allow CREATE DATABASE to give a new database a default tablespace
+ * that's different from the template database's default, we make the
+ * provision that a zero in pg_class.reltablespace means the database's
+ * default tablespace. Without this, CREATE DATABASE would have to go in
+ * and munge the system catalogs of the new database.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/tablespace.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <unistd.h>
+#include <dirent.h>
+#include <sys/stat.h>
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/reloptions.h"
+#include "access/sysattr.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "access/xloginsert.h"
+#include "access/xlogutils.h"
+#include "catalog/binary_upgrade.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_tablespace.h"
+#include "commands/comment.h"
+#include "commands/seclabel.h"
+#include "commands/tablecmds.h"
+#include "commands/tablespace.h"
+#include "common/file_perm.h"
+#include "miscadmin.h"
+#include "postmaster/bgwriter.h"
+#include "storage/fd.h"
+#include "storage/lmgr.h"
+#include "storage/standby.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/varlena.h"
+
+/* GUC variables */
+char *default_tablespace = NULL;
+char *temp_tablespaces = NULL;
+bool allow_in_place_tablespaces = false;
+
+Oid binary_upgrade_next_pg_tablespace_oid = InvalidOid;
+
+static void create_tablespace_directories(const char *location,
+ const Oid tablespaceoid);
+static bool destroy_tablespace_directories(Oid tablespaceoid, bool redo);
+
+
+/*
+ * Each database using a table space is isolated into its own name space
+ * by a subdirectory named for the database OID. On first creation of an
+ * object in the tablespace, create the subdirectory. If the subdirectory
+ * already exists, fall through quietly.
+ *
+ * isRedo indicates that we are creating an object during WAL replay.
+ * In this case we will cope with the possibility of the tablespace
+ * directory not being there either --- this could happen if we are
+ * replaying an operation on a table in a subsequently-dropped tablespace.
+ * We handle this by making a directory in the place where the tablespace
+ * symlink would normally be. This isn't an exact replay of course, but
+ * it's the best we can do given the available information.
+ *
+ * If tablespaces are not supported, we still need it in case we have to
+ * re-create a database subdirectory (of $PGDATA/base) during WAL replay.
+ */
+void
+TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo)
+{
+ struct stat st;
+ char *dir;
+
+ /*
+ * The global tablespace doesn't have per-database subdirectories, so
+ * nothing to do for it.
+ */
+ if (spcNode == GLOBALTABLESPACE_OID)
+ return;
+
+ Assert(OidIsValid(spcNode));
+ Assert(OidIsValid(dbNode));
+
+ dir = GetDatabasePath(dbNode, spcNode);
+
+ if (stat(dir, &st) < 0)
+ {
+ /* Directory does not exist? */
+ if (errno == ENOENT)
+ {
+ /*
+ * Acquire TablespaceCreateLock to ensure that no DROP TABLESPACE
+ * or TablespaceCreateDbspace is running concurrently.
+ */
+ LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
+
+ /*
+ * Recheck to see if someone created the directory while we were
+ * waiting for lock.
+ */
+ if (stat(dir, &st) == 0 && S_ISDIR(st.st_mode))
+ {
+ /* Directory was created */
+ }
+ else
+ {
+ /* Directory creation failed? */
+ if (MakePGDirectory(dir) < 0)
+ {
+ /* Failure other than not exists or not in WAL replay? */
+ if (errno != ENOENT || !isRedo)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create directory \"%s\": %m",
+ dir)));
+
+ /*
+ * During WAL replay, it's conceivable that several levels
+ * of directories are missing if tablespaces are dropped
+ * further ahead of the WAL stream than we're currently
+ * replaying. An easy way forward is to create them as
+ * plain directories and hope they are removed by further
+ * WAL replay if necessary. If this also fails, there is
+ * trouble we cannot get out of, so just report that and
+ * bail out.
+ */
+ if (pg_mkdir_p(dir, pg_dir_create_mode) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create directory \"%s\": %m",
+ dir)));
+ }
+ }
+
+ LWLockRelease(TablespaceCreateLock);
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat directory \"%s\": %m", dir)));
+ }
+ }
+ else
+ {
+ /* Is it not a directory? */
+ if (!S_ISDIR(st.st_mode))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" exists but is not a directory",
+ dir)));
+ }
+
+ pfree(dir);
+}
+
+/*
+ * Create a table space
+ *
+ * Only superusers can create a tablespace. This seems a reasonable restriction
+ * since we're determining the system layout and, anyway, we probably have
+ * root if we're doing this kind of activity
+ */
+Oid
+CreateTableSpace(CreateTableSpaceStmt *stmt)
+{
+#ifdef HAVE_SYMLINK
+ Relation rel;
+ Datum values[Natts_pg_tablespace];
+ bool nulls[Natts_pg_tablespace];
+ HeapTuple tuple;
+ Oid tablespaceoid;
+ char *location;
+ Oid ownerId;
+ Datum newOptions;
+ bool in_place;
+
+ /* Must be superuser */
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to create tablespace \"%s\"",
+ stmt->tablespacename),
+ errhint("Must be superuser to create a tablespace.")));
+
+ /* However, the eventual owner of the tablespace need not be */
+ if (stmt->owner)
+ ownerId = get_rolespec_oid(stmt->owner, false);
+ else
+ ownerId = GetUserId();
+
+ /* Unix-ify the offered path, and strip any trailing slashes */
+ location = pstrdup(stmt->location);
+ canonicalize_path(location);
+
+ /* disallow quotes, else CREATE DATABASE would be at risk */
+ if (strchr(location, '\''))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("tablespace location cannot contain single quotes")));
+
+ in_place = allow_in_place_tablespaces && strlen(location) == 0;
+
+ /*
+ * Allowing relative paths seems risky
+ *
+ * This also helps us ensure that location is not empty or whitespace,
+ * unless specifying a developer-only in-place tablespace.
+ */
+ if (!in_place && !is_absolute_path(location))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("tablespace location must be an absolute path")));
+
+ /*
+ * Check that location isn't too long. Remember that we're going to append
+ * 'PG_XXX/<dboid>/<relid>_<fork>.<nnn>'. FYI, we never actually
+ * reference the whole path here, but MakePGDirectory() uses the first two
+ * parts.
+ */
+ if (strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 +
+ OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1 + OIDCHARS > MAXPGPATH)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("tablespace location \"%s\" is too long",
+ location)));
+
+ /* Warn if the tablespace is in the data directory. */
+ if (path_is_prefix_of_path(DataDir, location))
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("tablespace location should not be inside the data directory")));
+
+ /*
+ * Disallow creation of tablespaces named "pg_xxx"; we reserve this
+ * namespace for system purposes.
+ */
+ if (!allowSystemTableMods && IsReservedName(stmt->tablespacename))
+ ereport(ERROR,
+ (errcode(ERRCODE_RESERVED_NAME),
+ errmsg("unacceptable tablespace name \"%s\"",
+ stmt->tablespacename),
+ errdetail("The prefix \"pg_\" is reserved for system tablespaces.")));
+
+ /*
+ * If built with appropriate switch, whine when regression-testing
+ * conventions for tablespace names are violated.
+ */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+ if (strncmp(stmt->tablespacename, "regress_", 8) != 0)
+ elog(WARNING, "tablespaces created by regression test cases should have names starting with \"regress_\"");
+#endif
+
+ /*
+ * Check that there is no other tablespace by this name. (The unique
+ * index would catch this anyway, but might as well give a friendlier
+ * message.)
+ */
+ if (OidIsValid(get_tablespace_oid(stmt->tablespacename, true)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("tablespace \"%s\" already exists",
+ stmt->tablespacename)));
+
+ /*
+ * Insert tuple into pg_tablespace. The purpose of doing this first is to
+ * lock the proposed tablename against other would-be creators. The
+ * insertion will roll back if we find problems below.
+ */
+ rel = table_open(TableSpaceRelationId, RowExclusiveLock);
+
+ MemSet(nulls, false, sizeof(nulls));
+
+ if (IsBinaryUpgrade)
+ {
+ /* Use binary-upgrade override for tablespace oid */
+ if (!OidIsValid(binary_upgrade_next_pg_tablespace_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("pg_tablespace OID value not set when in binary upgrade mode")));
+
+ tablespaceoid = binary_upgrade_next_pg_tablespace_oid;
+ binary_upgrade_next_pg_tablespace_oid = InvalidOid;
+ }
+ else
+ tablespaceoid = GetNewOidWithIndex(rel, TablespaceOidIndexId,
+ Anum_pg_tablespace_oid);
+ values[Anum_pg_tablespace_oid - 1] = ObjectIdGetDatum(tablespaceoid);
+ values[Anum_pg_tablespace_spcname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(stmt->tablespacename));
+ values[Anum_pg_tablespace_spcowner - 1] =
+ ObjectIdGetDatum(ownerId);
+ nulls[Anum_pg_tablespace_spcacl - 1] = true;
+
+ /* Generate new proposed spcoptions (text array) */
+ newOptions = transformRelOptions((Datum) 0,
+ stmt->options,
+ NULL, NULL, false, false);
+ (void) tablespace_reloptions(newOptions, true);
+ if (newOptions != (Datum) 0)
+ values[Anum_pg_tablespace_spcoptions - 1] = newOptions;
+ else
+ nulls[Anum_pg_tablespace_spcoptions - 1] = true;
+
+ tuple = heap_form_tuple(rel->rd_att, values, nulls);
+
+ CatalogTupleInsert(rel, tuple);
+
+ heap_freetuple(tuple);
+
+ /* Record dependency on owner */
+ recordDependencyOnOwner(TableSpaceRelationId, tablespaceoid, ownerId);
+
+ /* Post creation hook for new tablespace */
+ InvokeObjectPostCreateHook(TableSpaceRelationId, tablespaceoid, 0);
+
+ create_tablespace_directories(location, tablespaceoid);
+
+ /* Record the filesystem change in XLOG */
+ {
+ xl_tblspc_create_rec xlrec;
+
+ xlrec.ts_id = tablespaceoid;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec,
+ offsetof(xl_tblspc_create_rec, ts_path));
+ XLogRegisterData((char *) location, strlen(location) + 1);
+
+ (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE);
+ }
+
+ /*
+ * Force synchronous commit, to minimize the window between creating the
+ * symlink on-disk and marking the transaction committed. It's not great
+ * that there is any window at all, but definitely we don't want to make
+ * it larger than necessary.
+ */
+ ForceSyncCommit();
+
+ pfree(location);
+
+ /* We keep the lock on pg_tablespace until commit */
+ table_close(rel, NoLock);
+
+ return tablespaceoid;
+#else /* !HAVE_SYMLINK */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablespaces are not supported on this platform")));
+ return InvalidOid; /* keep compiler quiet */
+#endif /* HAVE_SYMLINK */
+}
+
+/*
+ * Drop a table space
+ *
+ * Be careful to check that the tablespace is empty.
+ */
+void
+DropTableSpace(DropTableSpaceStmt *stmt)
+{
+#ifdef HAVE_SYMLINK
+ char *tablespacename = stmt->tablespacename;
+ TableScanDesc scandesc;
+ Relation rel;
+ HeapTuple tuple;
+ Form_pg_tablespace spcform;
+ ScanKeyData entry[1];
+ Oid tablespaceoid;
+ char *detail;
+ char *detail_log;
+
+ /*
+ * Find the target tuple
+ */
+ rel = table_open(TableSpaceRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&entry[0],
+ Anum_pg_tablespace_spcname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(tablespacename));
+ scandesc = table_beginscan_catalog(rel, 1, entry);
+ tuple = heap_getnext(scandesc, ForwardScanDirection);
+
+ if (!HeapTupleIsValid(tuple))
+ {
+ if (!stmt->missing_ok)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tablespace \"%s\" does not exist",
+ tablespacename)));
+ }
+ else
+ {
+ ereport(NOTICE,
+ (errmsg("tablespace \"%s\" does not exist, skipping",
+ tablespacename)));
+ table_endscan(scandesc);
+ table_close(rel, NoLock);
+ }
+ return;
+ }
+
+ spcform = (Form_pg_tablespace) GETSTRUCT(tuple);
+ tablespaceoid = spcform->oid;
+
+ /* Must be tablespace owner */
+ if (!pg_tablespace_ownercheck(tablespaceoid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TABLESPACE,
+ tablespacename);
+
+ /* Disallow drop of the standard tablespaces, even by superuser */
+ if (IsPinnedObject(TableSpaceRelationId, tablespaceoid))
+ aclcheck_error(ACLCHECK_NO_PRIV, OBJECT_TABLESPACE,
+ tablespacename);
+
+ /* Check for pg_shdepend entries depending on this tablespace */
+ if (checkSharedDependencies(TableSpaceRelationId, tablespaceoid,
+ &detail, &detail_log))
+ ereport(ERROR,
+ (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST),
+ errmsg("tablespace \"%s\" cannot be dropped because some objects depend on it",
+ tablespacename),
+ errdetail_internal("%s", detail),
+ errdetail_log("%s", detail_log)));
+
+ /* DROP hook for the tablespace being removed */
+ InvokeObjectDropHook(TableSpaceRelationId, tablespaceoid, 0);
+
+ /*
+ * Remove the pg_tablespace tuple (this will roll back if we fail below)
+ */
+ CatalogTupleDelete(rel, &tuple->t_self);
+
+ table_endscan(scandesc);
+
+ /*
+ * Remove any comments or security labels on this tablespace.
+ */
+ DeleteSharedComments(tablespaceoid, TableSpaceRelationId);
+ DeleteSharedSecurityLabel(tablespaceoid, TableSpaceRelationId);
+
+ /*
+ * Remove dependency on owner.
+ */
+ deleteSharedDependencyRecordsFor(TableSpaceRelationId, tablespaceoid, 0);
+
+ /*
+ * Acquire TablespaceCreateLock to ensure that no TablespaceCreateDbspace
+ * is running concurrently.
+ */
+ LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
+
+ /*
+ * Try to remove the physical infrastructure.
+ */
+ if (!destroy_tablespace_directories(tablespaceoid, false))
+ {
+ /*
+ * Not all files deleted? However, there can be lingering empty files
+ * in the directories, left behind by for example DROP TABLE, that
+ * have been scheduled for deletion at next checkpoint (see comments
+ * in mdunlink() for details). We could just delete them immediately,
+ * but we can't tell them apart from important data files that we
+ * mustn't delete. So instead, we force a checkpoint which will clean
+ * out any lingering files, and try again.
+ */
+ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+
+ /*
+ * On Windows, an unlinked file persists in the directory listing
+ * until no process retains an open handle for the file. The DDL
+ * commands that schedule files for unlink send invalidation messages
+ * directing other PostgreSQL processes to close the files, but
+ * nothing guarantees they'll be processed in time. So, we'll also
+ * use a global barrier to ask all backends to close all files, and
+ * wait until they're finished.
+ */
+ LWLockRelease(TablespaceCreateLock);
+ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
+ LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
+
+ /* And now try again. */
+ if (!destroy_tablespace_directories(tablespaceoid, false))
+ {
+ /* Still not empty, the files must be important then */
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("tablespace \"%s\" is not empty",
+ tablespacename)));
+ }
+ }
+
+ /* Record the filesystem change in XLOG */
+ {
+ xl_tblspc_drop_rec xlrec;
+
+ xlrec.ts_id = tablespaceoid;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xl_tblspc_drop_rec));
+
+ (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP);
+ }
+
+ /*
+ * Note: because we checked that the tablespace was empty, there should be
+ * no need to worry about flushing shared buffers or free space map
+ * entries for relations in the tablespace.
+ */
+
+ /*
+ * Force synchronous commit, to minimize the window between removing the
+ * files on-disk and marking the transaction committed. It's not great
+ * that there is any window at all, but definitely we don't want to make
+ * it larger than necessary.
+ */
+ ForceSyncCommit();
+
+ /*
+ * Allow TablespaceCreateDbspace again.
+ */
+ LWLockRelease(TablespaceCreateLock);
+
+ /* We keep the lock on pg_tablespace until commit */
+ table_close(rel, NoLock);
+#else /* !HAVE_SYMLINK */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablespaces are not supported on this platform")));
+#endif /* HAVE_SYMLINK */
+}
+
+
+/*
+ * create_tablespace_directories
+ *
+ * Attempt to create filesystem infrastructure linking $PGDATA/pg_tblspc/
+ * to the specified directory
+ */
+static void
+create_tablespace_directories(const char *location, const Oid tablespaceoid)
+{
+ char *linkloc;
+ char *location_with_version_dir;
+ struct stat st;
+ bool in_place;
+
+ linkloc = psprintf("pg_tblspc/%u", tablespaceoid);
+
+ /*
+ * If we're asked to make an 'in place' tablespace, create the directory
+ * directly where the symlink would normally go. This is a developer-only
+ * option for now, to facilitate regression testing.
+ */
+ in_place = strlen(location) == 0;
+
+ if (in_place)
+ {
+ if (MakePGDirectory(linkloc) < 0 && errno != EEXIST)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create directory \"%s\": %m",
+ linkloc)));
+ }
+
+ location_with_version_dir = psprintf("%s/%s", in_place ? linkloc : location,
+ TABLESPACE_VERSION_DIRECTORY);
+
+ /*
+ * Attempt to coerce target directory to safe permissions. If this fails,
+ * it doesn't exist or has the wrong owner. Not needed for in-place mode,
+ * because in that case we created the directory with the desired
+ * permissions.
+ */
+ if (!in_place && chmod(location, pg_dir_create_mode) != 0)
+ {
+ if (errno == ENOENT)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FILE),
+ errmsg("directory \"%s\" does not exist", location),
+ InRecovery ? errhint("Create this directory for the tablespace before "
+ "restarting the server.") : 0));
+ else
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not set permissions on directory \"%s\": %m",
+ location)));
+ }
+
+ /*
+ * The creation of the version directory prevents more than one tablespace
+ * in a single location. This imitates TablespaceCreateDbspace(), but it
+ * ignores concurrency and missing parent directories. The chmod() would
+ * have failed in the absence of a parent. pg_tablespace_spcname_index
+ * prevents concurrency.
+ */
+ if (stat(location_with_version_dir, &st) < 0)
+ {
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat directory \"%s\": %m",
+ location_with_version_dir)));
+ else if (MakePGDirectory(location_with_version_dir) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create directory \"%s\": %m",
+ location_with_version_dir)));
+ }
+ else if (!S_ISDIR(st.st_mode))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" exists but is not a directory",
+ location_with_version_dir)));
+ else if (!InRecovery)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ errmsg("directory \"%s\" already in use as a tablespace",
+ location_with_version_dir)));
+
+ /*
+ * In recovery, remove old symlink, in case it points to the wrong place.
+ */
+ if (!in_place && InRecovery)
+ remove_tablespace_symlink(linkloc);
+
+ /*
+ * Create the symlink under PGDATA
+ */
+ if (!in_place && symlink(location, linkloc) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create symbolic link \"%s\": %m",
+ linkloc)));
+
+ pfree(linkloc);
+ pfree(location_with_version_dir);
+}
+
+
+/*
+ * destroy_tablespace_directories
+ *
+ * Attempt to remove filesystem infrastructure for the tablespace.
+ *
+ * 'redo' indicates we are redoing a drop from XLOG; in that case we should
+ * not throw an ERROR for problems, just LOG them. The worst consequence of
+ * not removing files here would be failure to release some disk space, which
+ * does not justify throwing an error that would require manual intervention
+ * to get the database running again.
+ *
+ * Returns true if successful, false if some subdirectory is not empty
+ */
+static bool
+destroy_tablespace_directories(Oid tablespaceoid, bool redo)
+{
+ char *linkloc;
+ char *linkloc_with_version_dir;
+ DIR *dirdesc;
+ struct dirent *de;
+ char *subfile;
+ struct stat st;
+
+ linkloc_with_version_dir = psprintf("pg_tblspc/%u/%s", tablespaceoid,
+ TABLESPACE_VERSION_DIRECTORY);
+
+ /*
+ * Check if the tablespace still contains any files. We try to rmdir each
+ * per-database directory we find in it. rmdir failure implies there are
+ * still files in that subdirectory, so give up. (We do not have to worry
+ * about undoing any already completed rmdirs, since the next attempt to
+ * use the tablespace from that database will simply recreate the
+ * subdirectory via TablespaceCreateDbspace.)
+ *
+ * Since we hold TablespaceCreateLock, no one else should be creating any
+ * fresh subdirectories in parallel. It is possible that new files are
+ * being created within subdirectories, though, so the rmdir call could
+ * fail. Worst consequence is a less friendly error message.
+ *
+ * If redo is true then ENOENT is a likely outcome here, and we allow it
+ * to pass without comment. In normal operation we still allow it, but
+ * with a warning. This is because even though ProcessUtility disallows
+ * DROP TABLESPACE in a transaction block, it's possible that a previous
+ * DROP failed and rolled back after removing the tablespace directories
+ * and/or symlink. We want to allow a new DROP attempt to succeed at
+ * removing the catalog entries (and symlink if still present), so we
+ * should not give a hard error here.
+ */
+ dirdesc = AllocateDir(linkloc_with_version_dir);
+ if (dirdesc == NULL)
+ {
+ if (errno == ENOENT)
+ {
+ if (!redo)
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("could not open directory \"%s\": %m",
+ linkloc_with_version_dir)));
+ /* The symlink might still exist, so go try to remove it */
+ goto remove_symlink;
+ }
+ else if (redo)
+ {
+ /* in redo, just log other types of error */
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not open directory \"%s\": %m",
+ linkloc_with_version_dir)));
+ pfree(linkloc_with_version_dir);
+ return false;
+ }
+ /* else let ReadDir report the error */
+ }
+
+ while ((de = ReadDir(dirdesc, linkloc_with_version_dir)) != NULL)
+ {
+ if (strcmp(de->d_name, ".") == 0 ||
+ strcmp(de->d_name, "..") == 0)
+ continue;
+
+ subfile = psprintf("%s/%s", linkloc_with_version_dir, de->d_name);
+
+ /* This check is just to deliver a friendlier error message */
+ if (!redo && !directory_is_empty(subfile))
+ {
+ FreeDir(dirdesc);
+ pfree(subfile);
+ pfree(linkloc_with_version_dir);
+ return false;
+ }
+
+ /* remove empty directory */
+ if (rmdir(subfile) < 0)
+ ereport(redo ? LOG : ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove directory \"%s\": %m",
+ subfile)));
+
+ pfree(subfile);
+ }
+
+ FreeDir(dirdesc);
+
+ /* remove version directory */
+ if (rmdir(linkloc_with_version_dir) < 0)
+ {
+ ereport(redo ? LOG : ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove directory \"%s\": %m",
+ linkloc_with_version_dir)));
+ pfree(linkloc_with_version_dir);
+ return false;
+ }
+
+ /*
+ * Try to remove the symlink. We must however deal with the possibility
+ * that it's a directory instead of a symlink --- this could happen during
+ * WAL replay (see TablespaceCreateDbspace), and it is also the case on
+ * Windows where junction points lstat() as directories.
+ *
+ * Note: in the redo case, we'll return true if this final step fails;
+ * there's no point in retrying it. Also, ENOENT should provoke no more
+ * than a warning.
+ */
+remove_symlink:
+ linkloc = pstrdup(linkloc_with_version_dir);
+ get_parent_directory(linkloc);
+ if (lstat(linkloc, &st) < 0)
+ {
+ int saved_errno = errno;
+
+ ereport(redo ? LOG : (saved_errno == ENOENT ? WARNING : ERROR),
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ linkloc)));
+ }
+ else if (S_ISDIR(st.st_mode))
+ {
+ if (rmdir(linkloc) < 0)
+ {
+ int saved_errno = errno;
+
+ ereport(redo ? LOG : (saved_errno == ENOENT ? WARNING : ERROR),
+ (errcode_for_file_access(),
+ errmsg("could not remove directory \"%s\": %m",
+ linkloc)));
+ }
+ }
+#ifdef S_ISLNK
+ else if (S_ISLNK(st.st_mode))
+ {
+ if (unlink(linkloc) < 0)
+ {
+ int saved_errno = errno;
+
+ ereport(redo ? LOG : (saved_errno == ENOENT ? WARNING : ERROR),
+ (errcode_for_file_access(),
+ errmsg("could not remove symbolic link \"%s\": %m",
+ linkloc)));
+ }
+ }
+#endif
+ else
+ {
+ /* Refuse to remove anything that's not a directory or symlink */
+ ereport(redo ? LOG : ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("\"%s\" is not a directory or symbolic link",
+ linkloc)));
+ }
+
+ pfree(linkloc_with_version_dir);
+ pfree(linkloc);
+
+ return true;
+}
+
+
+/*
+ * Check if a directory is empty.
+ *
+ * This probably belongs somewhere else, but not sure where...
+ */
+bool
+directory_is_empty(const char *path)
+{
+ DIR *dirdesc;
+ struct dirent *de;
+
+ dirdesc = AllocateDir(path);
+
+ while ((de = ReadDir(dirdesc, path)) != NULL)
+ {
+ if (strcmp(de->d_name, ".") == 0 ||
+ strcmp(de->d_name, "..") == 0)
+ continue;
+ FreeDir(dirdesc);
+ return false;
+ }
+
+ FreeDir(dirdesc);
+ return true;
+}
+
+/*
+ * remove_tablespace_symlink
+ *
+ * This function removes symlinks in pg_tblspc. On Windows, junction points
+ * act like directories so we must be able to apply rmdir. This function
+ * works like the symlink removal code in destroy_tablespace_directories,
+ * except that failure to remove is always an ERROR. But if the file doesn't
+ * exist at all, that's OK.
+ */
+void
+remove_tablespace_symlink(const char *linkloc)
+{
+ struct stat st;
+
+ if (lstat(linkloc, &st) < 0)
+ {
+ if (errno == ENOENT)
+ return;
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m", linkloc)));
+ }
+
+ if (S_ISDIR(st.st_mode))
+ {
+ /*
+ * This will fail if the directory isn't empty, but not if it's a
+ * junction point.
+ */
+ if (rmdir(linkloc) < 0 && errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove directory \"%s\": %m",
+ linkloc)));
+ }
+#ifdef S_ISLNK
+ else if (S_ISLNK(st.st_mode))
+ {
+ if (unlink(linkloc) < 0 && errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove symbolic link \"%s\": %m",
+ linkloc)));
+ }
+#endif
+ else
+ {
+ /* Refuse to remove anything that's not a directory or symlink */
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("\"%s\" is not a directory or symbolic link",
+ linkloc)));
+ }
+}
+
+/*
+ * Rename a tablespace
+ */
+ObjectAddress
+RenameTableSpace(const char *oldname, const char *newname)
+{
+ Oid tspId;
+ Relation rel;
+ ScanKeyData entry[1];
+ TableScanDesc scan;
+ HeapTuple tup;
+ HeapTuple newtuple;
+ Form_pg_tablespace newform;
+ ObjectAddress address;
+
+ /* Search pg_tablespace */
+ rel = table_open(TableSpaceRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&entry[0],
+ Anum_pg_tablespace_spcname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(oldname));
+ scan = table_beginscan_catalog(rel, 1, entry);
+ tup = heap_getnext(scan, ForwardScanDirection);
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tablespace \"%s\" does not exist",
+ oldname)));
+
+ newtuple = heap_copytuple(tup);
+ newform = (Form_pg_tablespace) GETSTRUCT(newtuple);
+ tspId = newform->oid;
+
+ table_endscan(scan);
+
+ /* Must be owner */
+ if (!pg_tablespace_ownercheck(tspId, GetUserId()))
+ aclcheck_error(ACLCHECK_NO_PRIV, OBJECT_TABLESPACE, oldname);
+
+ /* Validate new name */
+ if (!allowSystemTableMods && IsReservedName(newname))
+ ereport(ERROR,
+ (errcode(ERRCODE_RESERVED_NAME),
+ errmsg("unacceptable tablespace name \"%s\"", newname),
+ errdetail("The prefix \"pg_\" is reserved for system tablespaces.")));
+
+ /*
+ * If built with appropriate switch, whine when regression-testing
+ * conventions for tablespace names are violated.
+ */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+ if (strncmp(newname, "regress_", 8) != 0)
+ elog(WARNING, "tablespaces created by regression test cases should have names starting with \"regress_\"");
+#endif
+
+ /* Make sure the new name doesn't exist */
+ ScanKeyInit(&entry[0],
+ Anum_pg_tablespace_spcname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(newname));
+ scan = table_beginscan_catalog(rel, 1, entry);
+ tup = heap_getnext(scan, ForwardScanDirection);
+ if (HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("tablespace \"%s\" already exists",
+ newname)));
+
+ table_endscan(scan);
+
+ /* OK, update the entry */
+ namestrcpy(&(newform->spcname), newname);
+
+ CatalogTupleUpdate(rel, &newtuple->t_self, newtuple);
+
+ InvokeObjectPostAlterHook(TableSpaceRelationId, tspId, 0);
+
+ ObjectAddressSet(address, TableSpaceRelationId, tspId);
+
+ table_close(rel, NoLock);
+
+ return address;
+}
+
+/*
+ * Alter table space options
+ */
+Oid
+AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt)
+{
+ Relation rel;
+ ScanKeyData entry[1];
+ TableScanDesc scandesc;
+ HeapTuple tup;
+ Oid tablespaceoid;
+ Datum datum;
+ Datum newOptions;
+ Datum repl_val[Natts_pg_tablespace];
+ bool isnull;
+ bool repl_null[Natts_pg_tablespace];
+ bool repl_repl[Natts_pg_tablespace];
+ HeapTuple newtuple;
+
+ /* Search pg_tablespace */
+ rel = table_open(TableSpaceRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&entry[0],
+ Anum_pg_tablespace_spcname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(stmt->tablespacename));
+ scandesc = table_beginscan_catalog(rel, 1, entry);
+ tup = heap_getnext(scandesc, ForwardScanDirection);
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tablespace \"%s\" does not exist",
+ stmt->tablespacename)));
+
+ tablespaceoid = ((Form_pg_tablespace) GETSTRUCT(tup))->oid;
+
+ /* Must be owner of the existing object */
+ if (!pg_tablespace_ownercheck(tablespaceoid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TABLESPACE,
+ stmt->tablespacename);
+
+ /* Generate new proposed spcoptions (text array) */
+ datum = heap_getattr(tup, Anum_pg_tablespace_spcoptions,
+ RelationGetDescr(rel), &isnull);
+ newOptions = transformRelOptions(isnull ? (Datum) 0 : datum,
+ stmt->options, NULL, NULL, false,
+ stmt->isReset);
+ (void) tablespace_reloptions(newOptions, true);
+
+ /* Build new tuple. */
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+ if (newOptions != (Datum) 0)
+ repl_val[Anum_pg_tablespace_spcoptions - 1] = newOptions;
+ else
+ repl_null[Anum_pg_tablespace_spcoptions - 1] = true;
+ repl_repl[Anum_pg_tablespace_spcoptions - 1] = true;
+ newtuple = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val,
+ repl_null, repl_repl);
+
+ /* Update system catalog. */
+ CatalogTupleUpdate(rel, &newtuple->t_self, newtuple);
+
+ InvokeObjectPostAlterHook(TableSpaceRelationId, tablespaceoid, 0);
+
+ heap_freetuple(newtuple);
+
+ /* Conclude heap scan. */
+ table_endscan(scandesc);
+ table_close(rel, NoLock);
+
+ return tablespaceoid;
+}
+
+/*
+ * Routines for handling the GUC variable 'default_tablespace'.
+ */
+
+/* check_hook: validate new default_tablespace */
+bool
+check_default_tablespace(char **newval, void **extra, GucSource source)
+{
+ /*
+ * If we aren't inside a transaction, or connected to a database, we
+ * cannot do the catalog accesses necessary to verify the name. Must
+ * accept the value on faith.
+ */
+ if (IsTransactionState() && MyDatabaseId != InvalidOid)
+ {
+ if (**newval != '\0' &&
+ !OidIsValid(get_tablespace_oid(*newval, true)))
+ {
+ /*
+ * When source == PGC_S_TEST, don't throw a hard error for a
+ * nonexistent tablespace, only a NOTICE. See comments in guc.h.
+ */
+ if (source == PGC_S_TEST)
+ {
+ ereport(NOTICE,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tablespace \"%s\" does not exist",
+ *newval)));
+ }
+ else
+ {
+ GUC_check_errdetail("Tablespace \"%s\" does not exist.",
+ *newval);
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/*
+ * GetDefaultTablespace -- get the OID of the current default tablespace
+ *
+ * Temporary objects have different default tablespaces, hence the
+ * relpersistence parameter must be specified. Also, for partitioned tables,
+ * we disallow specifying the database default, so that needs to be specified
+ * too.
+ *
+ * May return InvalidOid to indicate "use the database's default tablespace".
+ *
+ * Note that caller is expected to check appropriate permissions for any
+ * result other than InvalidOid.
+ *
+ * This exists to hide (and possibly optimize the use of) the
+ * default_tablespace GUC variable.
+ */
+Oid
+GetDefaultTablespace(char relpersistence, bool partitioned)
+{
+ Oid result;
+
+ /* The temp-table case is handled elsewhere */
+ if (relpersistence == RELPERSISTENCE_TEMP)
+ {
+ PrepareTempTablespaces();
+ return GetNextTempTableSpace();
+ }
+
+ /* Fast path for default_tablespace == "" */
+ if (default_tablespace == NULL || default_tablespace[0] == '\0')
+ return InvalidOid;
+
+ /*
+ * It is tempting to cache this lookup for more speed, but then we would
+ * fail to detect the case where the tablespace was dropped since the GUC
+ * variable was set. Note also that we don't complain if the value fails
+ * to refer to an existing tablespace; we just silently return InvalidOid,
+ * causing the new object to be created in the database's tablespace.
+ */
+ result = get_tablespace_oid(default_tablespace, true);
+
+ /*
+ * Allow explicit specification of database's default tablespace in
+ * default_tablespace without triggering permissions checks. Don't allow
+ * specifying that when creating a partitioned table, however, since the
+ * result is confusing.
+ */
+ if (result == MyDatabaseTableSpace)
+ {
+ if (partitioned)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot specify default tablespace for partitioned relations")));
+ result = InvalidOid;
+ }
+ return result;
+}
+
+
+/*
+ * Routines for handling the GUC variable 'temp_tablespaces'.
+ */
+
+typedef struct
+{
+ /* Array of OIDs to be passed to SetTempTablespaces() */
+ int numSpcs;
+ Oid tblSpcs[FLEXIBLE_ARRAY_MEMBER];
+} temp_tablespaces_extra;
+
+/* check_hook: validate new temp_tablespaces */
+bool
+check_temp_tablespaces(char **newval, void **extra, GucSource source)
+{
+ char *rawname;
+ List *namelist;
+
+ /* Need a modifiable copy of string */
+ rawname = pstrdup(*newval);
+
+ /* Parse string into list of identifiers */
+ if (!SplitIdentifierString(rawname, ',', &namelist))
+ {
+ /* syntax error in name list */
+ GUC_check_errdetail("List syntax is invalid.");
+ pfree(rawname);
+ list_free(namelist);
+ return false;
+ }
+
+ /*
+ * If we aren't inside a transaction, or connected to a database, we
+ * cannot do the catalog accesses necessary to verify the name. Must
+ * accept the value on faith. Fortunately, there's then also no need to
+ * pass the data to fd.c.
+ */
+ if (IsTransactionState() && MyDatabaseId != InvalidOid)
+ {
+ temp_tablespaces_extra *myextra;
+ Oid *tblSpcs;
+ int numSpcs;
+ ListCell *l;
+
+ /* temporary workspace until we are done verifying the list */
+ tblSpcs = (Oid *) palloc(list_length(namelist) * sizeof(Oid));
+ numSpcs = 0;
+ foreach(l, namelist)
+ {
+ char *curname = (char *) lfirst(l);
+ Oid curoid;
+ AclResult aclresult;
+
+ /* Allow an empty string (signifying database default) */
+ if (curname[0] == '\0')
+ {
+ /* InvalidOid signifies database's default tablespace */
+ tblSpcs[numSpcs++] = InvalidOid;
+ continue;
+ }
+
+ /*
+ * In an interactive SET command, we ereport for bad info. When
+ * source == PGC_S_TEST, don't throw a hard error for a
+ * nonexistent tablespace, only a NOTICE. See comments in guc.h.
+ */
+ curoid = get_tablespace_oid(curname, source <= PGC_S_TEST);
+ if (curoid == InvalidOid)
+ {
+ if (source == PGC_S_TEST)
+ ereport(NOTICE,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tablespace \"%s\" does not exist",
+ curname)));
+ continue;
+ }
+
+ /*
+ * Allow explicit specification of database's default tablespace
+ * in temp_tablespaces without triggering permissions checks.
+ */
+ if (curoid == MyDatabaseTableSpace)
+ {
+ /* InvalidOid signifies database's default tablespace */
+ tblSpcs[numSpcs++] = InvalidOid;
+ continue;
+ }
+
+ /* Check permissions, similarly complaining only if interactive */
+ aclresult = pg_tablespace_aclcheck(curoid, GetUserId(),
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ {
+ if (source >= PGC_S_INTERACTIVE)
+ aclcheck_error(aclresult, OBJECT_TABLESPACE, curname);
+ continue;
+ }
+
+ tblSpcs[numSpcs++] = curoid;
+ }
+
+ /* Now prepare an "extra" struct for assign_temp_tablespaces */
+ myextra = malloc(offsetof(temp_tablespaces_extra, tblSpcs) +
+ numSpcs * sizeof(Oid));
+ if (!myextra)
+ return false;
+ myextra->numSpcs = numSpcs;
+ memcpy(myextra->tblSpcs, tblSpcs, numSpcs * sizeof(Oid));
+ *extra = (void *) myextra;
+
+ pfree(tblSpcs);
+ }
+
+ pfree(rawname);
+ list_free(namelist);
+
+ return true;
+}
+
+/* assign_hook: do extra actions as needed */
+void
+assign_temp_tablespaces(const char *newval, void *extra)
+{
+ temp_tablespaces_extra *myextra = (temp_tablespaces_extra *) extra;
+
+ /*
+ * If check_temp_tablespaces was executed inside a transaction, then pass
+ * the list it made to fd.c. Otherwise, clear fd.c's list; we must be
+ * still outside a transaction, or else restoring during transaction exit,
+ * and in either case we can just let the next PrepareTempTablespaces call
+ * make things sane.
+ */
+ if (myextra)
+ SetTempTablespaces(myextra->tblSpcs, myextra->numSpcs);
+ else
+ SetTempTablespaces(NULL, 0);
+}
+
+/*
+ * PrepareTempTablespaces -- prepare to use temp tablespaces
+ *
+ * If we have not already done so in the current transaction, parse the
+ * temp_tablespaces GUC variable and tell fd.c which tablespace(s) to use
+ * for temp files.
+ */
+void
+PrepareTempTablespaces(void)
+{
+ char *rawname;
+ List *namelist;
+ Oid *tblSpcs;
+ int numSpcs;
+ ListCell *l;
+
+ /* No work if already done in current transaction */
+ if (TempTablespacesAreSet())
+ return;
+
+ /*
+ * Can't do catalog access unless within a transaction. This is just a
+ * safety check in case this function is called by low-level code that
+ * could conceivably execute outside a transaction. Note that in such a
+ * scenario, fd.c will fall back to using the current database's default
+ * tablespace, which should always be OK.
+ */
+ if (!IsTransactionState())
+ return;
+
+ /* Need a modifiable copy of string */
+ rawname = pstrdup(temp_tablespaces);
+
+ /* Parse string into list of identifiers */
+ if (!SplitIdentifierString(rawname, ',', &namelist))
+ {
+ /* syntax error in name list */
+ SetTempTablespaces(NULL, 0);
+ pfree(rawname);
+ list_free(namelist);
+ return;
+ }
+
+ /* Store tablespace OIDs in an array in TopTransactionContext */
+ tblSpcs = (Oid *) MemoryContextAlloc(TopTransactionContext,
+ list_length(namelist) * sizeof(Oid));
+ numSpcs = 0;
+ foreach(l, namelist)
+ {
+ char *curname = (char *) lfirst(l);
+ Oid curoid;
+ AclResult aclresult;
+
+ /* Allow an empty string (signifying database default) */
+ if (curname[0] == '\0')
+ {
+ /* InvalidOid signifies database's default tablespace */
+ tblSpcs[numSpcs++] = InvalidOid;
+ continue;
+ }
+
+ /* Else verify that name is a valid tablespace name */
+ curoid = get_tablespace_oid(curname, true);
+ if (curoid == InvalidOid)
+ {
+ /* Skip any bad list elements */
+ continue;
+ }
+
+ /*
+ * Allow explicit specification of database's default tablespace in
+ * temp_tablespaces without triggering permissions checks.
+ */
+ if (curoid == MyDatabaseTableSpace)
+ {
+ /* InvalidOid signifies database's default tablespace */
+ tblSpcs[numSpcs++] = InvalidOid;
+ continue;
+ }
+
+ /* Check permissions similarly */
+ aclresult = pg_tablespace_aclcheck(curoid, GetUserId(),
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ continue;
+
+ tblSpcs[numSpcs++] = curoid;
+ }
+
+ SetTempTablespaces(tblSpcs, numSpcs);
+
+ pfree(rawname);
+ list_free(namelist);
+}
+
+
+/*
+ * get_tablespace_oid - given a tablespace name, look up the OID
+ *
+ * If missing_ok is false, throw an error if tablespace name not found. If
+ * true, just return InvalidOid.
+ */
+Oid
+get_tablespace_oid(const char *tablespacename, bool missing_ok)
+{
+ Oid result;
+ Relation rel;
+ TableScanDesc scandesc;
+ HeapTuple tuple;
+ ScanKeyData entry[1];
+
+ /*
+ * Search pg_tablespace. We use a heapscan here even though there is an
+ * index on name, on the theory that pg_tablespace will usually have just
+ * a few entries and so an indexed lookup is a waste of effort.
+ */
+ rel = table_open(TableSpaceRelationId, AccessShareLock);
+
+ ScanKeyInit(&entry[0],
+ Anum_pg_tablespace_spcname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(tablespacename));
+ scandesc = table_beginscan_catalog(rel, 1, entry);
+ tuple = heap_getnext(scandesc, ForwardScanDirection);
+
+ /* We assume that there can be at most one matching tuple */
+ if (HeapTupleIsValid(tuple))
+ result = ((Form_pg_tablespace) GETSTRUCT(tuple))->oid;
+ else
+ result = InvalidOid;
+
+ table_endscan(scandesc);
+ table_close(rel, AccessShareLock);
+
+ if (!OidIsValid(result) && !missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tablespace \"%s\" does not exist",
+ tablespacename)));
+
+ return result;
+}
+
+/*
+ * get_tablespace_name - given a tablespace OID, look up the name
+ *
+ * Returns a palloc'd string, or NULL if no such tablespace.
+ */
+char *
+get_tablespace_name(Oid spc_oid)
+{
+ char *result;
+ Relation rel;
+ TableScanDesc scandesc;
+ HeapTuple tuple;
+ ScanKeyData entry[1];
+
+ /*
+ * Search pg_tablespace. We use a heapscan here even though there is an
+ * index on oid, on the theory that pg_tablespace will usually have just a
+ * few entries and so an indexed lookup is a waste of effort.
+ */
+ rel = table_open(TableSpaceRelationId, AccessShareLock);
+
+ ScanKeyInit(&entry[0],
+ Anum_pg_tablespace_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(spc_oid));
+ scandesc = table_beginscan_catalog(rel, 1, entry);
+ tuple = heap_getnext(scandesc, ForwardScanDirection);
+
+ /* We assume that there can be at most one matching tuple */
+ if (HeapTupleIsValid(tuple))
+ result = pstrdup(NameStr(((Form_pg_tablespace) GETSTRUCT(tuple))->spcname));
+ else
+ result = NULL;
+
+ table_endscan(scandesc);
+ table_close(rel, AccessShareLock);
+
+ return result;
+}
+
+
+/*
+ * TABLESPACE resource manager's routines
+ */
+void
+tblspc_redo(XLogReaderState *record)
+{
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+
+ /* Backup blocks are not used in tblspc records */
+ Assert(!XLogRecHasAnyBlockRefs(record));
+
+ if (info == XLOG_TBLSPC_CREATE)
+ {
+ xl_tblspc_create_rec *xlrec = (xl_tblspc_create_rec *) XLogRecGetData(record);
+ char *location = xlrec->ts_path;
+
+ create_tablespace_directories(location, xlrec->ts_id);
+ }
+ else if (info == XLOG_TBLSPC_DROP)
+ {
+ xl_tblspc_drop_rec *xlrec = (xl_tblspc_drop_rec *) XLogRecGetData(record);
+
+ /* Close all smgr fds in all backends. */
+ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
+
+ /*
+ * If we issued a WAL record for a drop tablespace it implies that
+ * there were no files in it at all when the DROP was done. That means
+ * that no permanent objects can exist in it at this point.
+ *
+ * It is possible for standby users to be using this tablespace as a
+ * location for their temporary files, so if we fail to remove all
+ * files then do conflict processing and try again, if currently
+ * enabled.
+ *
+ * Other possible reasons for failure include bollixed file
+ * permissions on a standby server when they were okay on the primary,
+ * etc etc. There's not much we can do about that, so just remove what
+ * we can and press on.
+ */
+ if (!destroy_tablespace_directories(xlrec->ts_id, true))
+ {
+ ResolveRecoveryConflictWithTablespace(xlrec->ts_id);
+
+ /*
+ * If we did recovery processing then hopefully the backends who
+ * wrote temp files should have cleaned up and exited by now. So
+ * retry before complaining. If we fail again, this is just a LOG
+ * condition, because it's not worth throwing an ERROR for (as
+ * that would crash the database and require manual intervention
+ * before we could get past this WAL record on restart).
+ */
+ if (!destroy_tablespace_directories(xlrec->ts_id, true))
+ ereport(LOG,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("directories for tablespace %u could not be removed",
+ xlrec->ts_id),
+ errhint("You can remove the directories manually if necessary.")));
+ }
+ }
+ else
+ elog(PANIC, "tblspc_redo: unknown op code %u", info);
+}
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
new file mode 100644
index 0000000..0769ae3
--- /dev/null
+++ b/src/backend/commands/trigger.c
@@ -0,0 +1,6664 @@
+/*-------------------------------------------------------------------------
+ *
+ * trigger.c
+ * PostgreSQL TRIGGERs support code.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/commands/trigger.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/relation.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/index.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/partition.h"
+#include "catalog/pg_constraint.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_trigger.h"
+#include "catalog/pg_type.h"
+#include "commands/dbcommands.h"
+#include "commands/defrem.h"
+#include "commands/trigger.h"
+#include "executor/executor.h"
+#include "executor/execPartition.h"
+#include "miscadmin.h"
+#include "nodes/bitmapset.h"
+#include "nodes/makefuncs.h"
+#include "optimizer/optimizer.h"
+#include "parser/parse_clause.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_func.h"
+#include "parser/parse_relation.h"
+#include "parser/parsetree.h"
+#include "partitioning/partdesc.h"
+#include "pgstat.h"
+#include "rewrite/rewriteManip.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "tcop/utility.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/bytea.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/tuplestore.h"
+
+
+/* GUC variables */
+int SessionReplicationRole = SESSION_REPLICATION_ROLE_ORIGIN;
+
+/* How many levels deep into trigger execution are we? */
+static int MyTriggerDepth = 0;
+
+/* Local function prototypes */
+static void renametrig_internal(Relation tgrel, Relation targetrel,
+ HeapTuple trigtup, const char *newname,
+ const char *expected_name);
+static void renametrig_partition(Relation tgrel, Oid partitionId,
+ Oid parentTriggerOid, const char *newname,
+ const char *expected_name);
+static void SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger);
+static bool GetTupleForTrigger(EState *estate,
+ EPQState *epqstate,
+ ResultRelInfo *relinfo,
+ ItemPointer tid,
+ LockTupleMode lockmode,
+ TupleTableSlot *oldslot,
+ TupleTableSlot **epqslot,
+ TM_Result *tmresultp,
+ TM_FailureData *tmfdp);
+static bool TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
+ Trigger *trigger, TriggerEvent event,
+ Bitmapset *modifiedCols,
+ TupleTableSlot *oldslot, TupleTableSlot *newslot);
+static HeapTuple ExecCallTriggerFunc(TriggerData *trigdata,
+ int tgindx,
+ FmgrInfo *finfo,
+ Instrumentation *instr,
+ MemoryContext per_tuple_context);
+static void AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
+ ResultRelInfo *src_partinfo,
+ ResultRelInfo *dst_partinfo,
+ int event, bool row_trigger,
+ TupleTableSlot *oldtup, TupleTableSlot *newtup,
+ List *recheckIndexes, Bitmapset *modifiedCols,
+ TransitionCaptureState *transition_capture,
+ bool is_crosspart_update);
+static void AfterTriggerEnlargeQueryState(void);
+static bool before_stmt_triggers_fired(Oid relid, CmdType cmdType);
+
+
+/*
+ * Create a trigger. Returns the address of the created trigger.
+ *
+ * queryString is the source text of the CREATE TRIGGER command.
+ * This must be supplied if a whenClause is specified, else it can be NULL.
+ *
+ * relOid, if nonzero, is the relation on which the trigger should be
+ * created. If zero, the name provided in the statement will be looked up.
+ *
+ * refRelOid, if nonzero, is the relation to which the constraint trigger
+ * refers. If zero, the constraint relation name provided in the statement
+ * will be looked up as needed.
+ *
+ * constraintOid, if nonzero, says that this trigger is being created
+ * internally to implement that constraint. A suitable pg_depend entry will
+ * be made to link the trigger to that constraint. constraintOid is zero when
+ * executing a user-entered CREATE TRIGGER command. (For CREATE CONSTRAINT
+ * TRIGGER, we build a pg_constraint entry internally.)
+ *
+ * indexOid, if nonzero, is the OID of an index associated with the constraint.
+ * We do nothing with this except store it into pg_trigger.tgconstrindid;
+ * but when creating a trigger for a deferrable unique constraint on a
+ * partitioned table, its children are looked up. Note we don't cope with
+ * invalid indexes in that case.
+ *
+ * funcoid, if nonzero, is the OID of the function to invoke. When this is
+ * given, stmt->funcname is ignored.
+ *
+ * parentTriggerOid, if nonzero, is a trigger that begets this one; so that
+ * if that trigger is dropped, this one should be too. There are two cases
+ * when a nonzero value is passed for this: 1) when this function recurses to
+ * create the trigger on partitions, 2) when creating child foreign key
+ * triggers; see CreateFKCheckTrigger() and createForeignKeyActionTriggers().
+ *
+ * If whenClause is passed, it is an already-transformed expression for
+ * WHEN. In this case, we ignore any that may come in stmt->whenClause.
+ *
+ * If isInternal is true then this is an internally-generated trigger.
+ * This argument sets the tgisinternal field of the pg_trigger entry, and
+ * if true causes us to modify the given trigger name to ensure uniqueness.
+ *
+ * When isInternal is not true we require ACL_TRIGGER permissions on the
+ * relation, as well as ACL_EXECUTE on the trigger function. For internal
+ * triggers the caller must apply any required permission checks.
+ *
+ * When called on partitioned tables, this function recurses to create the
+ * trigger on all the partitions, except if isInternal is true, in which
+ * case caller is expected to execute recursion on its own. in_partition
+ * indicates such a recursive call; outside callers should pass "false"
+ * (but see CloneRowTriggersToPartition).
+ */
+ObjectAddress
+CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
+ Oid relOid, Oid refRelOid, Oid constraintOid, Oid indexOid,
+ Oid funcoid, Oid parentTriggerOid, Node *whenClause,
+ bool isInternal, bool in_partition)
+{
+ return
+ CreateTriggerFiringOn(stmt, queryString, relOid, refRelOid,
+ constraintOid, indexOid, funcoid,
+ parentTriggerOid, whenClause, isInternal,
+ in_partition, TRIGGER_FIRES_ON_ORIGIN);
+}
+
+/*
+ * Like the above; additionally the firing condition
+ * (always/origin/replica/disabled) can be specified.
+ */
+ObjectAddress
+CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString,
+ Oid relOid, Oid refRelOid, Oid constraintOid,
+ Oid indexOid, Oid funcoid, Oid parentTriggerOid,
+ Node *whenClause, bool isInternal, bool in_partition,
+ char trigger_fires_when)
+{
+ int16 tgtype;
+ int ncolumns;
+ int16 *columns;
+ int2vector *tgattr;
+ List *whenRtable;
+ char *qual;
+ Datum values[Natts_pg_trigger];
+ bool nulls[Natts_pg_trigger];
+ Relation rel;
+ AclResult aclresult;
+ Relation tgrel;
+ Relation pgrel;
+ HeapTuple tuple = NULL;
+ Oid funcrettype;
+ Oid trigoid = InvalidOid;
+ char internaltrigname[NAMEDATALEN];
+ char *trigname;
+ Oid constrrelid = InvalidOid;
+ ObjectAddress myself,
+ referenced;
+ char *oldtablename = NULL;
+ char *newtablename = NULL;
+ bool partition_recurse;
+ bool trigger_exists = false;
+ Oid existing_constraint_oid = InvalidOid;
+ bool existing_isInternal = false;
+ bool existing_isClone = false;
+
+ if (OidIsValid(relOid))
+ rel = table_open(relOid, ShareRowExclusiveLock);
+ else
+ rel = table_openrv(stmt->relation, ShareRowExclusiveLock);
+
+ /*
+ * Triggers must be on tables or views, and there are additional
+ * relation-type-specific restrictions.
+ */
+ if (rel->rd_rel->relkind == RELKIND_RELATION)
+ {
+ /* Tables can't have INSTEAD OF triggers */
+ if (stmt->timing != TRIGGER_TYPE_BEFORE &&
+ stmt->timing != TRIGGER_TYPE_AFTER)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is a table",
+ RelationGetRelationName(rel)),
+ errdetail("Tables cannot have INSTEAD OF triggers.")));
+ }
+ else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ /* Partitioned tables can't have INSTEAD OF triggers */
+ if (stmt->timing != TRIGGER_TYPE_BEFORE &&
+ stmt->timing != TRIGGER_TYPE_AFTER)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is a table",
+ RelationGetRelationName(rel)),
+ errdetail("Tables cannot have INSTEAD OF triggers.")));
+
+ /*
+ * FOR EACH ROW triggers have further restrictions
+ */
+ if (stmt->row)
+ {
+ /*
+ * Disallow use of transition tables.
+ *
+ * Note that we have another restriction about transition tables
+ * in partitions; search for 'has_superclass' below for an
+ * explanation. The check here is just to protect from the fact
+ * that if we allowed it here, the creation would succeed for a
+ * partitioned table with no partitions, but would be blocked by
+ * the other restriction when the first partition was created,
+ * which is very unfriendly behavior.
+ */
+ if (stmt->transitionRels != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("\"%s\" is a partitioned table",
+ RelationGetRelationName(rel)),
+ errdetail("ROW triggers with transition tables are not supported on partitioned tables.")));
+ }
+ }
+ else if (rel->rd_rel->relkind == RELKIND_VIEW)
+ {
+ /*
+ * Views can have INSTEAD OF triggers (which we check below are
+ * row-level), or statement-level BEFORE/AFTER triggers.
+ */
+ if (stmt->timing != TRIGGER_TYPE_INSTEAD && stmt->row)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is a view",
+ RelationGetRelationName(rel)),
+ errdetail("Views cannot have row-level BEFORE or AFTER triggers.")));
+ /* Disallow TRUNCATE triggers on VIEWs */
+ if (TRIGGER_FOR_TRUNCATE(stmt->events))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is a view",
+ RelationGetRelationName(rel)),
+ errdetail("Views cannot have TRUNCATE triggers.")));
+ }
+ else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ {
+ if (stmt->timing != TRIGGER_TYPE_BEFORE &&
+ stmt->timing != TRIGGER_TYPE_AFTER)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is a foreign table",
+ RelationGetRelationName(rel)),
+ errdetail("Foreign tables cannot have INSTEAD OF triggers.")));
+
+ if (TRIGGER_FOR_TRUNCATE(stmt->events))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is a foreign table",
+ RelationGetRelationName(rel)),
+ errdetail("Foreign tables cannot have TRUNCATE triggers.")));
+
+ /*
+ * We disallow constraint triggers to protect the assumption that
+ * triggers on FKs can't be deferred. See notes with AfterTriggers
+ * data structures, below.
+ */
+ if (stmt->isconstraint)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is a foreign table",
+ RelationGetRelationName(rel)),
+ errdetail("Foreign tables cannot have constraint triggers.")));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("relation \"%s\" cannot have triggers",
+ RelationGetRelationName(rel)),
+ errdetail_relkind_not_supported(rel->rd_rel->relkind)));
+
+ if (!allowSystemTableMods && IsSystemRelation(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied: \"%s\" is a system catalog",
+ RelationGetRelationName(rel))));
+
+ if (stmt->isconstraint)
+ {
+ /*
+ * We must take a lock on the target relation to protect against
+ * concurrent drop. It's not clear that AccessShareLock is strong
+ * enough, but we certainly need at least that much... otherwise, we
+ * might end up creating a pg_constraint entry referencing a
+ * nonexistent table.
+ */
+ if (OidIsValid(refRelOid))
+ {
+ LockRelationOid(refRelOid, AccessShareLock);
+ constrrelid = refRelOid;
+ }
+ else if (stmt->constrrel != NULL)
+ constrrelid = RangeVarGetRelid(stmt->constrrel, AccessShareLock,
+ false);
+ }
+
+ /* permission checks */
+ if (!isInternal)
+ {
+ aclresult = pg_class_aclcheck(RelationGetRelid(rel), GetUserId(),
+ ACL_TRIGGER);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, get_relkind_objtype(rel->rd_rel->relkind),
+ RelationGetRelationName(rel));
+
+ if (OidIsValid(constrrelid))
+ {
+ aclresult = pg_class_aclcheck(constrrelid, GetUserId(),
+ ACL_TRIGGER);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, get_relkind_objtype(get_rel_relkind(constrrelid)),
+ get_rel_name(constrrelid));
+ }
+ }
+
+ /*
+ * When called on a partitioned table to create a FOR EACH ROW trigger
+ * that's not internal, we create one trigger for each partition, too.
+ *
+ * For that, we'd better hold lock on all of them ahead of time.
+ */
+ partition_recurse = !isInternal && stmt->row &&
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
+ if (partition_recurse)
+ list_free(find_all_inheritors(RelationGetRelid(rel),
+ ShareRowExclusiveLock, NULL));
+
+ /* Compute tgtype */
+ TRIGGER_CLEAR_TYPE(tgtype);
+ if (stmt->row)
+ TRIGGER_SETT_ROW(tgtype);
+ tgtype |= stmt->timing;
+ tgtype |= stmt->events;
+
+ /* Disallow ROW-level TRUNCATE triggers */
+ if (TRIGGER_FOR_ROW(tgtype) && TRIGGER_FOR_TRUNCATE(tgtype))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("TRUNCATE FOR EACH ROW triggers are not supported")));
+
+ /* INSTEAD triggers must be row-level, and can't have WHEN or columns */
+ if (TRIGGER_FOR_INSTEAD(tgtype))
+ {
+ if (!TRIGGER_FOR_ROW(tgtype))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("INSTEAD OF triggers must be FOR EACH ROW")));
+ if (stmt->whenClause)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("INSTEAD OF triggers cannot have WHEN conditions")));
+ if (stmt->columns != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("INSTEAD OF triggers cannot have column lists")));
+ }
+
+ /*
+ * We don't yet support naming ROW transition variables, but the parser
+ * recognizes the syntax so we can give a nicer message here.
+ *
+ * Per standard, REFERENCING TABLE names are only allowed on AFTER
+ * triggers. Per standard, REFERENCING ROW names are not allowed with FOR
+ * EACH STATEMENT. Per standard, each OLD/NEW, ROW/TABLE permutation is
+ * only allowed once. Per standard, OLD may not be specified when
+ * creating a trigger only for INSERT, and NEW may not be specified when
+ * creating a trigger only for DELETE.
+ *
+ * Notice that the standard allows an AFTER ... FOR EACH ROW trigger to
+ * reference both ROW and TABLE transition data.
+ */
+ if (stmt->transitionRels != NIL)
+ {
+ List *varList = stmt->transitionRels;
+ ListCell *lc;
+
+ foreach(lc, varList)
+ {
+ TriggerTransition *tt = lfirst_node(TriggerTransition, lc);
+
+ if (!(tt->isTable))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ROW variable naming in the REFERENCING clause is not supported"),
+ errhint("Use OLD TABLE or NEW TABLE for naming transition tables.")));
+
+ /*
+ * Because of the above test, we omit further ROW-related testing
+ * below. If we later allow naming OLD and NEW ROW variables,
+ * adjustments will be needed below.
+ */
+
+ if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is a foreign table",
+ RelationGetRelationName(rel)),
+ errdetail("Triggers on foreign tables cannot have transition tables.")));
+
+ if (rel->rd_rel->relkind == RELKIND_VIEW)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is a view",
+ RelationGetRelationName(rel)),
+ errdetail("Triggers on views cannot have transition tables.")));
+
+ /*
+ * We currently don't allow row-level triggers with transition
+ * tables on partition or inheritance children. Such triggers
+ * would somehow need to see tuples converted to the format of the
+ * table they're attached to, and it's not clear which subset of
+ * tuples each child should see. See also the prohibitions in
+ * ATExecAttachPartition() and ATExecAddInherit().
+ */
+ if (TRIGGER_FOR_ROW(tgtype) && has_superclass(rel->rd_id))
+ {
+ /* Use appropriate error message. */
+ if (rel->rd_rel->relispartition)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ROW triggers with transition tables are not supported on partitions")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ROW triggers with transition tables are not supported on inheritance children")));
+ }
+
+ if (stmt->timing != TRIGGER_TYPE_AFTER)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("transition table name can only be specified for an AFTER trigger")));
+
+ if (TRIGGER_FOR_TRUNCATE(tgtype))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("TRUNCATE triggers with transition tables are not supported")));
+
+ /*
+ * We currently don't allow multi-event triggers ("INSERT OR
+ * UPDATE") with transition tables, because it's not clear how to
+ * handle INSERT ... ON CONFLICT statements which can fire both
+ * INSERT and UPDATE triggers. We show the inserted tuples to
+ * INSERT triggers and the updated tuples to UPDATE triggers, but
+ * it's not yet clear what INSERT OR UPDATE trigger should see.
+ * This restriction could be lifted if we can decide on the right
+ * semantics in a later release.
+ */
+ if (((TRIGGER_FOR_INSERT(tgtype) ? 1 : 0) +
+ (TRIGGER_FOR_UPDATE(tgtype) ? 1 : 0) +
+ (TRIGGER_FOR_DELETE(tgtype) ? 1 : 0)) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("transition tables cannot be specified for triggers with more than one event")));
+
+ /*
+ * We currently don't allow column-specific triggers with
+ * transition tables. Per spec, that seems to require
+ * accumulating separate transition tables for each combination of
+ * columns, which is a lot of work for a rather marginal feature.
+ */
+ if (stmt->columns != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("transition tables cannot be specified for triggers with column lists")));
+
+ /*
+ * We disallow constraint triggers with transition tables, to
+ * protect the assumption that such triggers can't be deferred.
+ * See notes with AfterTriggers data structures, below.
+ *
+ * Currently this is enforced by the grammar, so just Assert here.
+ */
+ Assert(!stmt->isconstraint);
+
+ if (tt->isNew)
+ {
+ if (!(TRIGGER_FOR_INSERT(tgtype) ||
+ TRIGGER_FOR_UPDATE(tgtype)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("NEW TABLE can only be specified for an INSERT or UPDATE trigger")));
+
+ if (newtablename != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("NEW TABLE cannot be specified multiple times")));
+
+ newtablename = tt->name;
+ }
+ else
+ {
+ if (!(TRIGGER_FOR_DELETE(tgtype) ||
+ TRIGGER_FOR_UPDATE(tgtype)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("OLD TABLE can only be specified for a DELETE or UPDATE trigger")));
+
+ if (oldtablename != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("OLD TABLE cannot be specified multiple times")));
+
+ oldtablename = tt->name;
+ }
+ }
+
+ if (newtablename != NULL && oldtablename != NULL &&
+ strcmp(newtablename, oldtablename) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("OLD TABLE name and NEW TABLE name cannot be the same")));
+ }
+
+ /*
+ * Parse the WHEN clause, if any and we weren't passed an already
+ * transformed one.
+ *
+ * Note that as a side effect, we fill whenRtable when parsing. If we got
+ * an already parsed clause, this does not occur, which is what we want --
+ * no point in adding redundant dependencies below.
+ */
+ if (!whenClause && stmt->whenClause)
+ {
+ ParseState *pstate;
+ ParseNamespaceItem *nsitem;
+ List *varList;
+ ListCell *lc;
+
+ /* Set up a pstate to parse with */
+ pstate = make_parsestate(NULL);
+ pstate->p_sourcetext = queryString;
+
+ /*
+ * Set up nsitems for OLD and NEW references.
+ *
+ * 'OLD' must always have varno equal to 1 and 'NEW' equal to 2.
+ */
+ nsitem = addRangeTableEntryForRelation(pstate, rel,
+ AccessShareLock,
+ makeAlias("old", NIL),
+ false, false);
+ addNSItemToQuery(pstate, nsitem, false, true, true);
+ nsitem = addRangeTableEntryForRelation(pstate, rel,
+ AccessShareLock,
+ makeAlias("new", NIL),
+ false, false);
+ addNSItemToQuery(pstate, nsitem, false, true, true);
+
+ /* Transform expression. Copy to be sure we don't modify original */
+ whenClause = transformWhereClause(pstate,
+ copyObject(stmt->whenClause),
+ EXPR_KIND_TRIGGER_WHEN,
+ "WHEN");
+ /* we have to fix its collations too */
+ assign_expr_collations(pstate, whenClause);
+
+ /*
+ * Check for disallowed references to OLD/NEW.
+ *
+ * NB: pull_var_clause is okay here only because we don't allow
+ * subselects in WHEN clauses; it would fail to examine the contents
+ * of subselects.
+ */
+ varList = pull_var_clause(whenClause, 0);
+ foreach(lc, varList)
+ {
+ Var *var = (Var *) lfirst(lc);
+
+ switch (var->varno)
+ {
+ case PRS2_OLD_VARNO:
+ if (!TRIGGER_FOR_ROW(tgtype))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("statement trigger's WHEN condition cannot reference column values"),
+ parser_errposition(pstate, var->location)));
+ if (TRIGGER_FOR_INSERT(tgtype))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("INSERT trigger's WHEN condition cannot reference OLD values"),
+ parser_errposition(pstate, var->location)));
+ /* system columns are okay here */
+ break;
+ case PRS2_NEW_VARNO:
+ if (!TRIGGER_FOR_ROW(tgtype))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("statement trigger's WHEN condition cannot reference column values"),
+ parser_errposition(pstate, var->location)));
+ if (TRIGGER_FOR_DELETE(tgtype))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("DELETE trigger's WHEN condition cannot reference NEW values"),
+ parser_errposition(pstate, var->location)));
+ if (var->varattno < 0 && TRIGGER_FOR_BEFORE(tgtype))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("BEFORE trigger's WHEN condition cannot reference NEW system columns"),
+ parser_errposition(pstate, var->location)));
+ if (TRIGGER_FOR_BEFORE(tgtype) &&
+ var->varattno == 0 &&
+ RelationGetDescr(rel)->constr &&
+ RelationGetDescr(rel)->constr->has_generated_stored)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("BEFORE trigger's WHEN condition cannot reference NEW generated columns"),
+ errdetail("A whole-row reference is used and the table contains generated columns."),
+ parser_errposition(pstate, var->location)));
+ if (TRIGGER_FOR_BEFORE(tgtype) &&
+ var->varattno > 0 &&
+ TupleDescAttr(RelationGetDescr(rel), var->varattno - 1)->attgenerated)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("BEFORE trigger's WHEN condition cannot reference NEW generated columns"),
+ errdetail("Column \"%s\" is a generated column.",
+ NameStr(TupleDescAttr(RelationGetDescr(rel), var->varattno - 1)->attname)),
+ parser_errposition(pstate, var->location)));
+ break;
+ default:
+ /* can't happen without add_missing_from, so just elog */
+ elog(ERROR, "trigger WHEN condition cannot contain references to other relations");
+ break;
+ }
+ }
+
+ /* we'll need the rtable for recordDependencyOnExpr */
+ whenRtable = pstate->p_rtable;
+
+ qual = nodeToString(whenClause);
+
+ free_parsestate(pstate);
+ }
+ else if (!whenClause)
+ {
+ whenClause = NULL;
+ whenRtable = NIL;
+ qual = NULL;
+ }
+ else
+ {
+ qual = nodeToString(whenClause);
+ whenRtable = NIL;
+ }
+
+ /*
+ * Find and validate the trigger function.
+ */
+ if (!OidIsValid(funcoid))
+ funcoid = LookupFuncName(stmt->funcname, 0, NULL, false);
+ if (!isInternal)
+ {
+ aclresult = pg_proc_aclcheck(funcoid, GetUserId(), ACL_EXECUTE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_FUNCTION,
+ NameListToString(stmt->funcname));
+ }
+ funcrettype = get_func_rettype(funcoid);
+ if (funcrettype != TRIGGEROID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("function %s must return type %s",
+ NameListToString(stmt->funcname), "trigger")));
+
+ /*
+ * Scan pg_trigger to see if there is already a trigger of the same name.
+ * Skip this for internally generated triggers, since we'll modify the
+ * name to be unique below.
+ *
+ * NOTE that this is cool only because we have ShareRowExclusiveLock on
+ * the relation, so the trigger set won't be changing underneath us.
+ */
+ tgrel = table_open(TriggerRelationId, RowExclusiveLock);
+ if (!isInternal)
+ {
+ ScanKeyData skeys[2];
+ SysScanDesc tgscan;
+
+ ScanKeyInit(&skeys[0],
+ Anum_pg_trigger_tgrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+
+ ScanKeyInit(&skeys[1],
+ Anum_pg_trigger_tgname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(stmt->trigname));
+
+ tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true,
+ NULL, 2, skeys);
+
+ /* There should be at most one matching tuple */
+ if (HeapTupleIsValid(tuple = systable_getnext(tgscan)))
+ {
+ Form_pg_trigger oldtrigger = (Form_pg_trigger) GETSTRUCT(tuple);
+
+ trigoid = oldtrigger->oid;
+ existing_constraint_oid = oldtrigger->tgconstraint;
+ existing_isInternal = oldtrigger->tgisinternal;
+ existing_isClone = OidIsValid(oldtrigger->tgparentid);
+ trigger_exists = true;
+ /* copy the tuple to use in CatalogTupleUpdate() */
+ tuple = heap_copytuple(tuple);
+ }
+ systable_endscan(tgscan);
+ }
+
+ if (!trigger_exists)
+ {
+ /* Generate the OID for the new trigger. */
+ trigoid = GetNewOidWithIndex(tgrel, TriggerOidIndexId,
+ Anum_pg_trigger_oid);
+ }
+ else
+ {
+ /*
+ * If OR REPLACE was specified, we'll replace the old trigger;
+ * otherwise complain about the duplicate name.
+ */
+ if (!stmt->replace)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("trigger \"%s\" for relation \"%s\" already exists",
+ stmt->trigname, RelationGetRelationName(rel))));
+
+ /*
+ * An internal trigger or a child trigger (isClone) cannot be replaced
+ * by a user-defined trigger. However, skip this test when
+ * in_partition, because then we're recursing from a partitioned table
+ * and the check was made at the parent level.
+ */
+ if ((existing_isInternal || existing_isClone) &&
+ !isInternal && !in_partition)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("trigger \"%s\" for relation \"%s\" is an internal or a child trigger",
+ stmt->trigname, RelationGetRelationName(rel))));
+
+ /*
+ * It is not allowed to replace with a constraint trigger; gram.y
+ * should have enforced this already.
+ */
+ Assert(!stmt->isconstraint);
+
+ /*
+ * It is not allowed to replace an existing constraint trigger,
+ * either. (The reason for these restrictions is partly that it seems
+ * difficult to deal with pending trigger events in such cases, and
+ * partly that the command might imply changing the constraint's
+ * properties as well, which doesn't seem nice.)
+ */
+ if (OidIsValid(existing_constraint_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("trigger \"%s\" for relation \"%s\" is a constraint trigger",
+ stmt->trigname, RelationGetRelationName(rel))));
+ }
+
+ /*
+ * If it's a user-entered CREATE CONSTRAINT TRIGGER command, make a
+ * corresponding pg_constraint entry.
+ */
+ if (stmt->isconstraint && !OidIsValid(constraintOid))
+ {
+ /* Internal callers should have made their own constraints */
+ Assert(!isInternal);
+ constraintOid = CreateConstraintEntry(stmt->trigname,
+ RelationGetNamespace(rel),
+ CONSTRAINT_TRIGGER,
+ stmt->deferrable,
+ stmt->initdeferred,
+ true,
+ InvalidOid, /* no parent */
+ RelationGetRelid(rel),
+ NULL, /* no conkey */
+ 0,
+ 0,
+ InvalidOid, /* no domain */
+ InvalidOid, /* no index */
+ InvalidOid, /* no foreign key */
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ 0,
+ ' ',
+ ' ',
+ NULL,
+ 0,
+ ' ',
+ NULL, /* no exclusion */
+ NULL, /* no check constraint */
+ NULL,
+ true, /* islocal */
+ 0, /* inhcount */
+ true, /* noinherit */
+ isInternal); /* is_internal */
+ }
+
+ /*
+ * If trigger is internally generated, modify the provided trigger name to
+ * ensure uniqueness by appending the trigger OID. (Callers will usually
+ * supply a simple constant trigger name in these cases.)
+ */
+ if (isInternal)
+ {
+ snprintf(internaltrigname, sizeof(internaltrigname),
+ "%s_%u", stmt->trigname, trigoid);
+ trigname = internaltrigname;
+ }
+ else
+ {
+ /* user-defined trigger; use the specified trigger name as-is */
+ trigname = stmt->trigname;
+ }
+
+ /*
+ * Build the new pg_trigger tuple.
+ */
+ memset(nulls, false, sizeof(nulls));
+
+ values[Anum_pg_trigger_oid - 1] = ObjectIdGetDatum(trigoid);
+ values[Anum_pg_trigger_tgrelid - 1] = ObjectIdGetDatum(RelationGetRelid(rel));
+ values[Anum_pg_trigger_tgparentid - 1] = ObjectIdGetDatum(parentTriggerOid);
+ values[Anum_pg_trigger_tgname - 1] = DirectFunctionCall1(namein,
+ CStringGetDatum(trigname));
+ values[Anum_pg_trigger_tgfoid - 1] = ObjectIdGetDatum(funcoid);
+ values[Anum_pg_trigger_tgtype - 1] = Int16GetDatum(tgtype);
+ values[Anum_pg_trigger_tgenabled - 1] = trigger_fires_when;
+ values[Anum_pg_trigger_tgisinternal - 1] = BoolGetDatum(isInternal);
+ values[Anum_pg_trigger_tgconstrrelid - 1] = ObjectIdGetDatum(constrrelid);
+ values[Anum_pg_trigger_tgconstrindid - 1] = ObjectIdGetDatum(indexOid);
+ values[Anum_pg_trigger_tgconstraint - 1] = ObjectIdGetDatum(constraintOid);
+ values[Anum_pg_trigger_tgdeferrable - 1] = BoolGetDatum(stmt->deferrable);
+ values[Anum_pg_trigger_tginitdeferred - 1] = BoolGetDatum(stmt->initdeferred);
+
+ if (stmt->args)
+ {
+ ListCell *le;
+ char *args;
+ int16 nargs = list_length(stmt->args);
+ int len = 0;
+
+ foreach(le, stmt->args)
+ {
+ char *ar = strVal(lfirst(le));
+
+ len += strlen(ar) + 4;
+ for (; *ar; ar++)
+ {
+ if (*ar == '\\')
+ len++;
+ }
+ }
+ args = (char *) palloc(len + 1);
+ args[0] = '\0';
+ foreach(le, stmt->args)
+ {
+ char *s = strVal(lfirst(le));
+ char *d = args + strlen(args);
+
+ while (*s)
+ {
+ if (*s == '\\')
+ *d++ = '\\';
+ *d++ = *s++;
+ }
+ strcpy(d, "\\000");
+ }
+ values[Anum_pg_trigger_tgnargs - 1] = Int16GetDatum(nargs);
+ values[Anum_pg_trigger_tgargs - 1] = DirectFunctionCall1(byteain,
+ CStringGetDatum(args));
+ }
+ else
+ {
+ values[Anum_pg_trigger_tgnargs - 1] = Int16GetDatum(0);
+ values[Anum_pg_trigger_tgargs - 1] = DirectFunctionCall1(byteain,
+ CStringGetDatum(""));
+ }
+
+ /* build column number array if it's a column-specific trigger */
+ ncolumns = list_length(stmt->columns);
+ if (ncolumns == 0)
+ columns = NULL;
+ else
+ {
+ ListCell *cell;
+ int i = 0;
+
+ columns = (int16 *) palloc(ncolumns * sizeof(int16));
+ foreach(cell, stmt->columns)
+ {
+ char *name = strVal(lfirst(cell));
+ int16 attnum;
+ int j;
+
+ /* Lookup column name. System columns are not allowed */
+ attnum = attnameAttNum(rel, name, false);
+ if (attnum == InvalidAttrNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ name, RelationGetRelationName(rel))));
+
+ /* Check for duplicates */
+ for (j = i - 1; j >= 0; j--)
+ {
+ if (columns[j] == attnum)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_COLUMN),
+ errmsg("column \"%s\" specified more than once",
+ name)));
+ }
+
+ columns[i++] = attnum;
+ }
+ }
+ tgattr = buildint2vector(columns, ncolumns);
+ values[Anum_pg_trigger_tgattr - 1] = PointerGetDatum(tgattr);
+
+ /* set tgqual if trigger has WHEN clause */
+ if (qual)
+ values[Anum_pg_trigger_tgqual - 1] = CStringGetTextDatum(qual);
+ else
+ nulls[Anum_pg_trigger_tgqual - 1] = true;
+
+ if (oldtablename)
+ values[Anum_pg_trigger_tgoldtable - 1] = DirectFunctionCall1(namein,
+ CStringGetDatum(oldtablename));
+ else
+ nulls[Anum_pg_trigger_tgoldtable - 1] = true;
+ if (newtablename)
+ values[Anum_pg_trigger_tgnewtable - 1] = DirectFunctionCall1(namein,
+ CStringGetDatum(newtablename));
+ else
+ nulls[Anum_pg_trigger_tgnewtable - 1] = true;
+
+ /*
+ * Insert or replace tuple in pg_trigger.
+ */
+ if (!trigger_exists)
+ {
+ tuple = heap_form_tuple(tgrel->rd_att, values, nulls);
+ CatalogTupleInsert(tgrel, tuple);
+ }
+ else
+ {
+ HeapTuple newtup;
+
+ newtup = heap_form_tuple(tgrel->rd_att, values, nulls);
+ CatalogTupleUpdate(tgrel, &tuple->t_self, newtup);
+ heap_freetuple(newtup);
+ }
+
+ heap_freetuple(tuple); /* free either original or new tuple */
+ table_close(tgrel, RowExclusiveLock);
+
+ pfree(DatumGetPointer(values[Anum_pg_trigger_tgname - 1]));
+ pfree(DatumGetPointer(values[Anum_pg_trigger_tgargs - 1]));
+ pfree(DatumGetPointer(values[Anum_pg_trigger_tgattr - 1]));
+ if (oldtablename)
+ pfree(DatumGetPointer(values[Anum_pg_trigger_tgoldtable - 1]));
+ if (newtablename)
+ pfree(DatumGetPointer(values[Anum_pg_trigger_tgnewtable - 1]));
+
+ /*
+ * Update relation's pg_class entry; if necessary; and if not, send an SI
+ * message to make other backends (and this one) rebuild relcache entries.
+ */
+ pgrel = table_open(RelationRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheCopy1(RELOID,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u",
+ RelationGetRelid(rel));
+ if (!((Form_pg_class) GETSTRUCT(tuple))->relhastriggers)
+ {
+ ((Form_pg_class) GETSTRUCT(tuple))->relhastriggers = true;
+
+ CatalogTupleUpdate(pgrel, &tuple->t_self, tuple);
+
+ CommandCounterIncrement();
+ }
+ else
+ CacheInvalidateRelcacheByTuple(tuple);
+
+ heap_freetuple(tuple);
+ table_close(pgrel, RowExclusiveLock);
+
+ /*
+ * If we're replacing a trigger, flush all the old dependencies before
+ * recording new ones.
+ */
+ if (trigger_exists)
+ deleteDependencyRecordsFor(TriggerRelationId, trigoid, true);
+
+ /*
+ * Record dependencies for trigger. Always place a normal dependency on
+ * the function.
+ */
+ myself.classId = TriggerRelationId;
+ myself.objectId = trigoid;
+ myself.objectSubId = 0;
+
+ referenced.classId = ProcedureRelationId;
+ referenced.objectId = funcoid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+ if (isInternal && OidIsValid(constraintOid))
+ {
+ /*
+ * Internally-generated trigger for a constraint, so make it an
+ * internal dependency of the constraint. We can skip depending on
+ * the relation(s), as there'll be an indirect dependency via the
+ * constraint.
+ */
+ referenced.classId = ConstraintRelationId;
+ referenced.objectId = constraintOid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
+ }
+ else
+ {
+ /*
+ * User CREATE TRIGGER, so place dependencies. We make trigger be
+ * auto-dropped if its relation is dropped or if the FK relation is
+ * dropped. (Auto drop is compatible with our pre-7.3 behavior.)
+ */
+ referenced.classId = RelationRelationId;
+ referenced.objectId = RelationGetRelid(rel);
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
+
+ if (OidIsValid(constrrelid))
+ {
+ referenced.classId = RelationRelationId;
+ referenced.objectId = constrrelid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
+ }
+ /* Not possible to have an index dependency in this case */
+ Assert(!OidIsValid(indexOid));
+
+ /*
+ * If it's a user-specified constraint trigger, make the constraint
+ * internally dependent on the trigger instead of vice versa.
+ */
+ if (OidIsValid(constraintOid))
+ {
+ referenced.classId = ConstraintRelationId;
+ referenced.objectId = constraintOid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&referenced, &myself, DEPENDENCY_INTERNAL);
+ }
+
+ /*
+ * If it's a partition trigger, create the partition dependencies.
+ */
+ if (OidIsValid(parentTriggerOid))
+ {
+ ObjectAddressSet(referenced, TriggerRelationId, parentTriggerOid);
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_PRI);
+ ObjectAddressSet(referenced, RelationRelationId, RelationGetRelid(rel));
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_SEC);
+ }
+ }
+
+ /* If column-specific trigger, add normal dependencies on columns */
+ if (columns != NULL)
+ {
+ int i;
+
+ referenced.classId = RelationRelationId;
+ referenced.objectId = RelationGetRelid(rel);
+ for (i = 0; i < ncolumns; i++)
+ {
+ referenced.objectSubId = columns[i];
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+ }
+ }
+
+ /*
+ * If it has a WHEN clause, add dependencies on objects mentioned in the
+ * expression (eg, functions, as well as any columns used).
+ */
+ if (whenRtable != NIL)
+ recordDependencyOnExpr(&myself, whenClause, whenRtable,
+ DEPENDENCY_NORMAL);
+
+ /* Post creation hook for new trigger */
+ InvokeObjectPostCreateHookArg(TriggerRelationId, trigoid, 0,
+ isInternal);
+
+ /*
+ * Lastly, create the trigger on child relations, if needed.
+ */
+ if (partition_recurse)
+ {
+ PartitionDesc partdesc = RelationGetPartitionDesc(rel, true);
+ List *idxs = NIL;
+ List *childTbls = NIL;
+ ListCell *l;
+ int i;
+ MemoryContext oldcxt,
+ perChildCxt;
+
+ perChildCxt = AllocSetContextCreate(CurrentMemoryContext,
+ "part trig clone",
+ ALLOCSET_SMALL_SIZES);
+
+ /*
+ * When a trigger is being created associated with an index, we'll
+ * need to associate the trigger in each child partition with the
+ * corresponding index on it.
+ */
+ if (OidIsValid(indexOid))
+ {
+ ListCell *l;
+ List *idxs = NIL;
+
+ idxs = find_inheritance_children(indexOid, ShareRowExclusiveLock);
+ foreach(l, idxs)
+ childTbls = lappend_oid(childTbls,
+ IndexGetRelation(lfirst_oid(l),
+ false));
+ }
+
+ oldcxt = MemoryContextSwitchTo(perChildCxt);
+
+ /* Iterate to create the trigger on each existing partition */
+ for (i = 0; i < partdesc->nparts; i++)
+ {
+ Oid indexOnChild = InvalidOid;
+ ListCell *l2;
+ CreateTrigStmt *childStmt;
+ Relation childTbl;
+ Node *qual;
+
+ childTbl = table_open(partdesc->oids[i], ShareRowExclusiveLock);
+
+ /* Find which of the child indexes is the one on this partition */
+ if (OidIsValid(indexOid))
+ {
+ forboth(l, idxs, l2, childTbls)
+ {
+ if (lfirst_oid(l2) == partdesc->oids[i])
+ {
+ indexOnChild = lfirst_oid(l);
+ break;
+ }
+ }
+ if (!OidIsValid(indexOnChild))
+ elog(ERROR, "failed to find index matching index \"%s\" in partition \"%s\"",
+ get_rel_name(indexOid),
+ get_rel_name(partdesc->oids[i]));
+ }
+
+ /*
+ * Initialize our fabricated parse node by copying the original
+ * one, then resetting fields that we pass separately.
+ */
+ childStmt = (CreateTrigStmt *) copyObject(stmt);
+ childStmt->funcname = NIL;
+ childStmt->whenClause = NULL;
+
+ /* If there is a WHEN clause, create a modified copy of it */
+ qual = copyObject(whenClause);
+ qual = (Node *)
+ map_partition_varattnos((List *) qual, PRS2_OLD_VARNO,
+ childTbl, rel);
+ qual = (Node *)
+ map_partition_varattnos((List *) qual, PRS2_NEW_VARNO,
+ childTbl, rel);
+
+ CreateTriggerFiringOn(childStmt, queryString,
+ partdesc->oids[i], refRelOid,
+ InvalidOid, indexOnChild,
+ funcoid, trigoid, qual,
+ isInternal, true, trigger_fires_when);
+
+ table_close(childTbl, NoLock);
+
+ MemoryContextReset(perChildCxt);
+ }
+
+ MemoryContextSwitchTo(oldcxt);
+ MemoryContextDelete(perChildCxt);
+ list_free(idxs);
+ list_free(childTbls);
+ }
+
+ /* Keep lock on target rel until end of xact */
+ table_close(rel, NoLock);
+
+ return myself;
+}
+
+/*
+ * TriggerSetParentTrigger
+ * Set a partition's trigger as child of its parent trigger,
+ * or remove the linkage if parentTrigId is InvalidOid.
+ *
+ * This updates the constraint's pg_trigger row to show it as inherited, and
+ * adds PARTITION dependencies to prevent the trigger from being deleted
+ * on its own. Alternatively, reverse that.
+ */
+void
+TriggerSetParentTrigger(Relation trigRel,
+ Oid childTrigId,
+ Oid parentTrigId,
+ Oid childTableId)
+{
+ SysScanDesc tgscan;
+ ScanKeyData skey[1];
+ Form_pg_trigger trigForm;
+ HeapTuple tuple,
+ newtup;
+ ObjectAddress depender;
+ ObjectAddress referenced;
+
+ /*
+ * Find the trigger to delete.
+ */
+ ScanKeyInit(&skey[0],
+ Anum_pg_trigger_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(childTrigId));
+
+ tgscan = systable_beginscan(trigRel, TriggerOidIndexId, true,
+ NULL, 1, skey);
+
+ tuple = systable_getnext(tgscan);
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "could not find tuple for trigger %u", childTrigId);
+ newtup = heap_copytuple(tuple);
+ trigForm = (Form_pg_trigger) GETSTRUCT(newtup);
+ if (OidIsValid(parentTrigId))
+ {
+ /* don't allow setting parent for a constraint that already has one */
+ if (OidIsValid(trigForm->tgparentid))
+ elog(ERROR, "trigger %u already has a parent trigger",
+ childTrigId);
+
+ trigForm->tgparentid = parentTrigId;
+
+ CatalogTupleUpdate(trigRel, &tuple->t_self, newtup);
+
+ ObjectAddressSet(depender, TriggerRelationId, childTrigId);
+
+ ObjectAddressSet(referenced, TriggerRelationId, parentTrigId);
+ recordDependencyOn(&depender, &referenced, DEPENDENCY_PARTITION_PRI);
+
+ ObjectAddressSet(referenced, RelationRelationId, childTableId);
+ recordDependencyOn(&depender, &referenced, DEPENDENCY_PARTITION_SEC);
+ }
+ else
+ {
+ trigForm->tgparentid = InvalidOid;
+
+ CatalogTupleUpdate(trigRel, &tuple->t_self, newtup);
+
+ deleteDependencyRecordsForClass(TriggerRelationId, childTrigId,
+ TriggerRelationId,
+ DEPENDENCY_PARTITION_PRI);
+ deleteDependencyRecordsForClass(TriggerRelationId, childTrigId,
+ RelationRelationId,
+ DEPENDENCY_PARTITION_SEC);
+ }
+
+ heap_freetuple(newtup);
+ systable_endscan(tgscan);
+}
+
+
+/*
+ * Guts of trigger deletion.
+ */
+void
+RemoveTriggerById(Oid trigOid)
+{
+ Relation tgrel;
+ SysScanDesc tgscan;
+ ScanKeyData skey[1];
+ HeapTuple tup;
+ Oid relid;
+ Relation rel;
+
+ tgrel = table_open(TriggerRelationId, RowExclusiveLock);
+
+ /*
+ * Find the trigger to delete.
+ */
+ ScanKeyInit(&skey[0],
+ Anum_pg_trigger_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(trigOid));
+
+ tgscan = systable_beginscan(tgrel, TriggerOidIndexId, true,
+ NULL, 1, skey);
+
+ tup = systable_getnext(tgscan);
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "could not find tuple for trigger %u", trigOid);
+
+ /*
+ * Open and exclusive-lock the relation the trigger belongs to.
+ */
+ relid = ((Form_pg_trigger) GETSTRUCT(tup))->tgrelid;
+
+ rel = table_open(relid, AccessExclusiveLock);
+
+ if (rel->rd_rel->relkind != RELKIND_RELATION &&
+ rel->rd_rel->relkind != RELKIND_VIEW &&
+ rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("relation \"%s\" cannot have triggers",
+ RelationGetRelationName(rel)),
+ errdetail_relkind_not_supported(rel->rd_rel->relkind)));
+
+ if (!allowSystemTableMods && IsSystemRelation(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied: \"%s\" is a system catalog",
+ RelationGetRelationName(rel))));
+
+ /*
+ * Delete the pg_trigger tuple.
+ */
+ CatalogTupleDelete(tgrel, &tup->t_self);
+
+ systable_endscan(tgscan);
+ table_close(tgrel, RowExclusiveLock);
+
+ /*
+ * We do not bother to try to determine whether any other triggers remain,
+ * which would be needed in order to decide whether it's safe to clear the
+ * relation's relhastriggers. (In any case, there might be a concurrent
+ * process adding new triggers.) Instead, just force a relcache inval to
+ * make other backends (and this one too!) rebuild their relcache entries.
+ * There's no great harm in leaving relhastriggers true even if there are
+ * no triggers left.
+ */
+ CacheInvalidateRelcache(rel);
+
+ /* Keep lock on trigger's rel until end of xact */
+ table_close(rel, NoLock);
+}
+
+/*
+ * get_trigger_oid - Look up a trigger by name to find its OID.
+ *
+ * If missing_ok is false, throw an error if trigger not found. If
+ * true, just return InvalidOid.
+ */
+Oid
+get_trigger_oid(Oid relid, const char *trigname, bool missing_ok)
+{
+ Relation tgrel;
+ ScanKeyData skey[2];
+ SysScanDesc tgscan;
+ HeapTuple tup;
+ Oid oid;
+
+ /*
+ * Find the trigger, verify permissions, set up object address
+ */
+ tgrel = table_open(TriggerRelationId, AccessShareLock);
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_trigger_tgrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid));
+ ScanKeyInit(&skey[1],
+ Anum_pg_trigger_tgname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(trigname));
+
+ tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true,
+ NULL, 2, skey);
+
+ tup = systable_getnext(tgscan);
+
+ if (!HeapTupleIsValid(tup))
+ {
+ if (!missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("trigger \"%s\" for table \"%s\" does not exist",
+ trigname, get_rel_name(relid))));
+ oid = InvalidOid;
+ }
+ else
+ {
+ oid = ((Form_pg_trigger) GETSTRUCT(tup))->oid;
+ }
+
+ systable_endscan(tgscan);
+ table_close(tgrel, AccessShareLock);
+ return oid;
+}
+
+/*
+ * Perform permissions and integrity checks before acquiring a relation lock.
+ */
+static void
+RangeVarCallbackForRenameTrigger(const RangeVar *rv, Oid relid, Oid oldrelid,
+ void *arg)
+{
+ HeapTuple tuple;
+ Form_pg_class form;
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ return; /* concurrently dropped */
+ form = (Form_pg_class) GETSTRUCT(tuple);
+
+ /* only tables and views can have triggers */
+ if (form->relkind != RELKIND_RELATION && form->relkind != RELKIND_VIEW &&
+ form->relkind != RELKIND_FOREIGN_TABLE &&
+ form->relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("relation \"%s\" cannot have triggers",
+ rv->relname),
+ errdetail_relkind_not_supported(form->relkind)));
+
+ /* you must own the table to rename one of its triggers */
+ if (!pg_class_ownercheck(relid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relid)), rv->relname);
+ if (!allowSystemTableMods && IsSystemClass(relid, form))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied: \"%s\" is a system catalog",
+ rv->relname)));
+
+ ReleaseSysCache(tuple);
+}
+
+/*
+ * renametrig - changes the name of a trigger on a relation
+ *
+ * trigger name is changed in trigger catalog.
+ * No record of the previous name is kept.
+ *
+ * get proper relrelation from relation catalog (if not arg)
+ * scan trigger catalog
+ * for name conflict (within rel)
+ * for original trigger (if not arg)
+ * modify tgname in trigger tuple
+ * update row in catalog
+ */
+ObjectAddress
+renametrig(RenameStmt *stmt)
+{
+ Oid tgoid;
+ Relation targetrel;
+ Relation tgrel;
+ HeapTuple tuple;
+ SysScanDesc tgscan;
+ ScanKeyData key[2];
+ Oid relid;
+ ObjectAddress address;
+
+ /*
+ * Look up name, check permissions, and acquire lock (which we will NOT
+ * release until end of transaction).
+ */
+ relid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock,
+ 0,
+ RangeVarCallbackForRenameTrigger,
+ NULL);
+
+ /* Have lock already, so just need to build relcache entry. */
+ targetrel = relation_open(relid, NoLock);
+
+ /*
+ * On partitioned tables, this operation recurses to partitions. Lock all
+ * tables upfront.
+ */
+ if (targetrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ (void) find_all_inheritors(relid, AccessExclusiveLock, NULL);
+
+ tgrel = table_open(TriggerRelationId, RowExclusiveLock);
+
+ /*
+ * Search for the trigger to modify.
+ */
+ ScanKeyInit(&key[0],
+ Anum_pg_trigger_tgrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid));
+ ScanKeyInit(&key[1],
+ Anum_pg_trigger_tgname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ PointerGetDatum(stmt->subname));
+ tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true,
+ NULL, 2, key);
+ if (HeapTupleIsValid(tuple = systable_getnext(tgscan)))
+ {
+ Form_pg_trigger trigform;
+
+ trigform = (Form_pg_trigger) GETSTRUCT(tuple);
+ tgoid = trigform->oid;
+
+ /*
+ * If the trigger descends from a trigger on a parent partitioned
+ * table, reject the rename. We don't allow a trigger in a partition
+ * to differ in name from that of its parent: that would lead to an
+ * inconsistency that pg_dump would not reproduce.
+ */
+ if (OidIsValid(trigform->tgparentid))
+ ereport(ERROR,
+ errmsg("cannot rename trigger \"%s\" on table \"%s\"",
+ stmt->subname, RelationGetRelationName(targetrel)),
+ errhint("Rename the trigger on the partitioned table \"%s\" instead.",
+ get_rel_name(get_partition_parent(relid, false))));
+
+
+ /* Rename the trigger on this relation ... */
+ renametrig_internal(tgrel, targetrel, tuple, stmt->newname,
+ stmt->subname);
+
+ /* ... and if it is partitioned, recurse to its partitions */
+ if (targetrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ PartitionDesc partdesc = RelationGetPartitionDesc(targetrel, true);
+
+ for (int i = 0; i < partdesc->nparts; i++)
+ {
+ Oid partitionId = partdesc->oids[i];
+
+ renametrig_partition(tgrel, partitionId, trigform->oid,
+ stmt->newname, stmt->subname);
+ }
+ }
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("trigger \"%s\" for table \"%s\" does not exist",
+ stmt->subname, RelationGetRelationName(targetrel))));
+ }
+
+ ObjectAddressSet(address, TriggerRelationId, tgoid);
+
+ systable_endscan(tgscan);
+
+ table_close(tgrel, RowExclusiveLock);
+
+ /*
+ * Close rel, but keep exclusive lock!
+ */
+ relation_close(targetrel, NoLock);
+
+ return address;
+}
+
+/*
+ * Subroutine for renametrig -- perform the actual work of renaming one
+ * trigger on one table.
+ *
+ * If the trigger has a name different from the expected one, raise a
+ * NOTICE about it.
+ */
+static void
+renametrig_internal(Relation tgrel, Relation targetrel, HeapTuple trigtup,
+ const char *newname, const char *expected_name)
+{
+ HeapTuple tuple;
+ Form_pg_trigger tgform;
+ ScanKeyData key[2];
+ SysScanDesc tgscan;
+
+ /* If the trigger already has the new name, nothing to do. */
+ tgform = (Form_pg_trigger) GETSTRUCT(trigtup);
+ if (strcmp(NameStr(tgform->tgname), newname) == 0)
+ return;
+
+ /*
+ * Before actually trying the rename, search for triggers with the same
+ * name. The update would fail with an ugly message in that case, and it
+ * is better to throw a nicer error.
+ */
+ ScanKeyInit(&key[0],
+ Anum_pg_trigger_tgrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(targetrel)));
+ ScanKeyInit(&key[1],
+ Anum_pg_trigger_tgname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ PointerGetDatum(newname));
+ tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true,
+ NULL, 2, key);
+ if (HeapTupleIsValid(tuple = systable_getnext(tgscan)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("trigger \"%s\" for relation \"%s\" already exists",
+ newname, RelationGetRelationName(targetrel))));
+ systable_endscan(tgscan);
+
+ /*
+ * The target name is free; update the existing pg_trigger tuple with it.
+ */
+ tuple = heap_copytuple(trigtup); /* need a modifiable copy */
+ tgform = (Form_pg_trigger) GETSTRUCT(tuple);
+
+ /*
+ * If the trigger has a name different from what we expected, let the user
+ * know. (We can proceed anyway, since we must have reached here following
+ * a tgparentid link.)
+ */
+ if (strcmp(NameStr(tgform->tgname), expected_name) != 0)
+ ereport(NOTICE,
+ errmsg("renamed trigger \"%s\" on relation \"%s\"",
+ NameStr(tgform->tgname),
+ RelationGetRelationName(targetrel)));
+
+ namestrcpy(&tgform->tgname, newname);
+
+ CatalogTupleUpdate(tgrel, &tuple->t_self, tuple);
+
+ InvokeObjectPostAlterHook(TriggerRelationId, tgform->oid, 0);
+
+ /*
+ * Invalidate relation's relcache entry so that other backends (and this
+ * one too!) are sent SI message to make them rebuild relcache entries.
+ * (Ideally this should happen automatically...)
+ */
+ CacheInvalidateRelcache(targetrel);
+}
+
+/*
+ * Subroutine for renametrig -- Helper for recursing to partitions when
+ * renaming triggers on a partitioned table.
+ */
+static void
+renametrig_partition(Relation tgrel, Oid partitionId, Oid parentTriggerOid,
+ const char *newname, const char *expected_name)
+{
+ SysScanDesc tgscan;
+ ScanKeyData key;
+ HeapTuple tuple;
+
+ /*
+ * Given a relation and the OID of a trigger on parent relation, find the
+ * corresponding trigger in the child and rename that trigger to the given
+ * name.
+ */
+ ScanKeyInit(&key,
+ Anum_pg_trigger_tgrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(partitionId));
+ tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true,
+ NULL, 1, &key);
+ while (HeapTupleIsValid(tuple = systable_getnext(tgscan)))
+ {
+ Form_pg_trigger tgform = (Form_pg_trigger) GETSTRUCT(tuple);
+ Relation partitionRel;
+
+ if (tgform->tgparentid != parentTriggerOid)
+ continue; /* not our trigger */
+
+ partitionRel = table_open(partitionId, NoLock);
+
+ /* Rename the trigger on this partition */
+ renametrig_internal(tgrel, partitionRel, tuple, newname, expected_name);
+
+ /* And if this relation is partitioned, recurse to its partitions */
+ if (partitionRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ PartitionDesc partdesc = RelationGetPartitionDesc(partitionRel,
+ true);
+
+ for (int i = 0; i < partdesc->nparts; i++)
+ {
+ Oid partitionId = partdesc->oids[i];
+
+ renametrig_partition(tgrel, partitionId, tgform->oid, newname,
+ NameStr(tgform->tgname));
+ }
+ }
+ table_close(partitionRel, NoLock);
+
+ /* There should be at most one matching tuple */
+ break;
+ }
+ systable_endscan(tgscan);
+}
+
+/*
+ * EnableDisableTrigger()
+ *
+ * Called by ALTER TABLE ENABLE/DISABLE [ REPLICA | ALWAYS ] TRIGGER
+ * to change 'tgenabled' field for the specified trigger(s)
+ *
+ * rel: relation to process (caller must hold suitable lock on it)
+ * tgname: name of trigger to process, or NULL to scan all triggers
+ * tgparent: if not zero, process only triggers with this tgparentid
+ * fires_when: new value for tgenabled field. In addition to generic
+ * enablement/disablement, this also defines when the trigger
+ * should be fired in session replication roles.
+ * skip_system: if true, skip "system" triggers (constraint triggers)
+ * recurse: if true, recurse to partitions
+ *
+ * Caller should have checked permissions for the table; here we also
+ * enforce that superuser privilege is required to alter the state of
+ * system triggers
+ */
+void
+EnableDisableTriggerNew2(Relation rel, const char *tgname, Oid tgparent,
+ char fires_when, bool skip_system, bool recurse,
+ LOCKMODE lockmode)
+{
+ Relation tgrel;
+ int nkeys;
+ ScanKeyData keys[2];
+ SysScanDesc tgscan;
+ HeapTuple tuple;
+ bool found;
+ bool changed;
+
+ /* Scan the relevant entries in pg_triggers */
+ tgrel = table_open(TriggerRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&keys[0],
+ Anum_pg_trigger_tgrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+ if (tgname)
+ {
+ ScanKeyInit(&keys[1],
+ Anum_pg_trigger_tgname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(tgname));
+ nkeys = 2;
+ }
+ else
+ nkeys = 1;
+
+ tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true,
+ NULL, nkeys, keys);
+
+ found = changed = false;
+
+ while (HeapTupleIsValid(tuple = systable_getnext(tgscan)))
+ {
+ Form_pg_trigger oldtrig = (Form_pg_trigger) GETSTRUCT(tuple);
+
+ if (OidIsValid(tgparent) && tgparent != oldtrig->tgparentid)
+ continue;
+
+ if (oldtrig->tgisinternal)
+ {
+ /* system trigger ... ok to process? */
+ if (skip_system)
+ continue;
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied: \"%s\" is a system trigger",
+ NameStr(oldtrig->tgname))));
+ }
+
+ found = true;
+
+ if (oldtrig->tgenabled != fires_when)
+ {
+ /* need to change this one ... make a copy to scribble on */
+ HeapTuple newtup = heap_copytuple(tuple);
+ Form_pg_trigger newtrig = (Form_pg_trigger) GETSTRUCT(newtup);
+
+ newtrig->tgenabled = fires_when;
+
+ CatalogTupleUpdate(tgrel, &newtup->t_self, newtup);
+
+ heap_freetuple(newtup);
+
+ changed = true;
+ }
+
+ /*
+ * When altering FOR EACH ROW triggers on a partitioned table, do the
+ * same on the partitions as well, unless ONLY is specified.
+ *
+ * Note that we recurse even if we didn't change the trigger above,
+ * because the partitions' copy of the trigger may have a different
+ * value of tgenabled than the parent's trigger and thus might need to
+ * be changed.
+ */
+ if (recurse &&
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+ (TRIGGER_FOR_ROW(oldtrig->tgtype)))
+ {
+ PartitionDesc partdesc = RelationGetPartitionDesc(rel, true);
+ int i;
+
+ for (i = 0; i < partdesc->nparts; i++)
+ {
+ Relation part;
+
+ part = relation_open(partdesc->oids[i], lockmode);
+ /* Match on child triggers' tgparentid, not their name */
+ EnableDisableTriggerNew2(part, NULL, oldtrig->oid,
+ fires_when, skip_system, recurse,
+ lockmode);
+ table_close(part, NoLock); /* keep lock till commit */
+ }
+ }
+
+ InvokeObjectPostAlterHook(TriggerRelationId,
+ oldtrig->oid, 0);
+ }
+
+ systable_endscan(tgscan);
+
+ table_close(tgrel, RowExclusiveLock);
+
+ if (tgname && !found)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("trigger \"%s\" for table \"%s\" does not exist",
+ tgname, RelationGetRelationName(rel))));
+
+ /*
+ * If we changed anything, broadcast a SI inval message to force each
+ * backend (including our own!) to rebuild relation's relcache entry.
+ * Otherwise they will fail to apply the change promptly.
+ */
+ if (changed)
+ CacheInvalidateRelcache(rel);
+}
+
+/*
+ * ABI-compatible wrappers to emulate old versions of the above function.
+ * Do not call these versions in new code.
+ */
+void
+EnableDisableTriggerNew(Relation rel, const char *tgname,
+ char fires_when, bool skip_system, bool recurse,
+ LOCKMODE lockmode)
+{
+ EnableDisableTriggerNew2(rel, tgname, InvalidOid,
+ fires_when, skip_system,
+ recurse, lockmode);
+}
+
+void
+EnableDisableTrigger(Relation rel, const char *tgname,
+ char fires_when, bool skip_system,
+ LOCKMODE lockmode)
+{
+ EnableDisableTriggerNew2(rel, tgname, InvalidOid,
+ fires_when, skip_system,
+ true, lockmode);
+}
+
+
+/*
+ * Build trigger data to attach to the given relcache entry.
+ *
+ * Note that trigger data attached to a relcache entry must be stored in
+ * CacheMemoryContext to ensure it survives as long as the relcache entry.
+ * But we should be running in a less long-lived working context. To avoid
+ * leaking cache memory if this routine fails partway through, we build a
+ * temporary TriggerDesc in working memory and then copy the completed
+ * structure into cache memory.
+ */
+void
+RelationBuildTriggers(Relation relation)
+{
+ TriggerDesc *trigdesc;
+ int numtrigs;
+ int maxtrigs;
+ Trigger *triggers;
+ Relation tgrel;
+ ScanKeyData skey;
+ SysScanDesc tgscan;
+ HeapTuple htup;
+ MemoryContext oldContext;
+ int i;
+
+ /*
+ * Allocate a working array to hold the triggers (the array is extended if
+ * necessary)
+ */
+ maxtrigs = 16;
+ triggers = (Trigger *) palloc(maxtrigs * sizeof(Trigger));
+ numtrigs = 0;
+
+ /*
+ * Note: since we scan the triggers using TriggerRelidNameIndexId, we will
+ * be reading the triggers in name order, except possibly during
+ * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
+ * ensures that triggers will be fired in name order.
+ */
+ ScanKeyInit(&skey,
+ Anum_pg_trigger_tgrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(relation)));
+
+ tgrel = table_open(TriggerRelationId, AccessShareLock);
+ tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true,
+ NULL, 1, &skey);
+
+ while (HeapTupleIsValid(htup = systable_getnext(tgscan)))
+ {
+ Form_pg_trigger pg_trigger = (Form_pg_trigger) GETSTRUCT(htup);
+ Trigger *build;
+ Datum datum;
+ bool isnull;
+
+ if (numtrigs >= maxtrigs)
+ {
+ maxtrigs *= 2;
+ triggers = (Trigger *) repalloc(triggers, maxtrigs * sizeof(Trigger));
+ }
+ build = &(triggers[numtrigs]);
+
+ build->tgoid = pg_trigger->oid;
+ build->tgname = DatumGetCString(DirectFunctionCall1(nameout,
+ NameGetDatum(&pg_trigger->tgname)));
+ build->tgfoid = pg_trigger->tgfoid;
+ build->tgtype = pg_trigger->tgtype;
+ build->tgenabled = pg_trigger->tgenabled;
+ build->tgisinternal = pg_trigger->tgisinternal;
+ build->tgisclone = OidIsValid(pg_trigger->tgparentid);
+ build->tgconstrrelid = pg_trigger->tgconstrrelid;
+ build->tgconstrindid = pg_trigger->tgconstrindid;
+ build->tgconstraint = pg_trigger->tgconstraint;
+ build->tgdeferrable = pg_trigger->tgdeferrable;
+ build->tginitdeferred = pg_trigger->tginitdeferred;
+ build->tgnargs = pg_trigger->tgnargs;
+ /* tgattr is first var-width field, so OK to access directly */
+ build->tgnattr = pg_trigger->tgattr.dim1;
+ if (build->tgnattr > 0)
+ {
+ build->tgattr = (int16 *) palloc(build->tgnattr * sizeof(int16));
+ memcpy(build->tgattr, &(pg_trigger->tgattr.values),
+ build->tgnattr * sizeof(int16));
+ }
+ else
+ build->tgattr = NULL;
+ if (build->tgnargs > 0)
+ {
+ bytea *val;
+ char *p;
+
+ val = DatumGetByteaPP(fastgetattr(htup,
+ Anum_pg_trigger_tgargs,
+ tgrel->rd_att, &isnull));
+ if (isnull)
+ elog(ERROR, "tgargs is null in trigger for relation \"%s\"",
+ RelationGetRelationName(relation));
+ p = (char *) VARDATA_ANY(val);
+ build->tgargs = (char **) palloc(build->tgnargs * sizeof(char *));
+ for (i = 0; i < build->tgnargs; i++)
+ {
+ build->tgargs[i] = pstrdup(p);
+ p += strlen(p) + 1;
+ }
+ }
+ else
+ build->tgargs = NULL;
+
+ datum = fastgetattr(htup, Anum_pg_trigger_tgoldtable,
+ tgrel->rd_att, &isnull);
+ if (!isnull)
+ build->tgoldtable =
+ DatumGetCString(DirectFunctionCall1(nameout, datum));
+ else
+ build->tgoldtable = NULL;
+
+ datum = fastgetattr(htup, Anum_pg_trigger_tgnewtable,
+ tgrel->rd_att, &isnull);
+ if (!isnull)
+ build->tgnewtable =
+ DatumGetCString(DirectFunctionCall1(nameout, datum));
+ else
+ build->tgnewtable = NULL;
+
+ datum = fastgetattr(htup, Anum_pg_trigger_tgqual,
+ tgrel->rd_att, &isnull);
+ if (!isnull)
+ build->tgqual = TextDatumGetCString(datum);
+ else
+ build->tgqual = NULL;
+
+ numtrigs++;
+ }
+
+ systable_endscan(tgscan);
+ table_close(tgrel, AccessShareLock);
+
+ /* There might not be any triggers */
+ if (numtrigs == 0)
+ {
+ pfree(triggers);
+ return;
+ }
+
+ /* Build trigdesc */
+ trigdesc = (TriggerDesc *) palloc0(sizeof(TriggerDesc));
+ trigdesc->triggers = triggers;
+ trigdesc->numtriggers = numtrigs;
+ for (i = 0; i < numtrigs; i++)
+ SetTriggerFlags(trigdesc, &(triggers[i]));
+
+ /* Copy completed trigdesc into cache storage */
+ oldContext = MemoryContextSwitchTo(CacheMemoryContext);
+ relation->trigdesc = CopyTriggerDesc(trigdesc);
+ MemoryContextSwitchTo(oldContext);
+
+ /* Release working memory */
+ FreeTriggerDesc(trigdesc);
+}
+
+/*
+ * Update the TriggerDesc's hint flags to include the specified trigger
+ */
+static void
+SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger)
+{
+ int16 tgtype = trigger->tgtype;
+
+ trigdesc->trig_insert_before_row |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+ TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_INSERT);
+ trigdesc->trig_insert_after_row |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+ TRIGGER_TYPE_AFTER, TRIGGER_TYPE_INSERT);
+ trigdesc->trig_insert_instead_row |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+ TRIGGER_TYPE_INSTEAD, TRIGGER_TYPE_INSERT);
+ trigdesc->trig_insert_before_statement |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+ TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_INSERT);
+ trigdesc->trig_insert_after_statement |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+ TRIGGER_TYPE_AFTER, TRIGGER_TYPE_INSERT);
+ trigdesc->trig_update_before_row |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+ TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_UPDATE);
+ trigdesc->trig_update_after_row |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+ TRIGGER_TYPE_AFTER, TRIGGER_TYPE_UPDATE);
+ trigdesc->trig_update_instead_row |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+ TRIGGER_TYPE_INSTEAD, TRIGGER_TYPE_UPDATE);
+ trigdesc->trig_update_before_statement |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+ TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_UPDATE);
+ trigdesc->trig_update_after_statement |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+ TRIGGER_TYPE_AFTER, TRIGGER_TYPE_UPDATE);
+ trigdesc->trig_delete_before_row |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+ TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_DELETE);
+ trigdesc->trig_delete_after_row |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+ TRIGGER_TYPE_AFTER, TRIGGER_TYPE_DELETE);
+ trigdesc->trig_delete_instead_row |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW,
+ TRIGGER_TYPE_INSTEAD, TRIGGER_TYPE_DELETE);
+ trigdesc->trig_delete_before_statement |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+ TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_DELETE);
+ trigdesc->trig_delete_after_statement |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+ TRIGGER_TYPE_AFTER, TRIGGER_TYPE_DELETE);
+ /* there are no row-level truncate triggers */
+ trigdesc->trig_truncate_before_statement |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+ TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_TRUNCATE);
+ trigdesc->trig_truncate_after_statement |=
+ TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
+ TRIGGER_TYPE_AFTER, TRIGGER_TYPE_TRUNCATE);
+
+ trigdesc->trig_insert_new_table |=
+ (TRIGGER_FOR_INSERT(tgtype) &&
+ TRIGGER_USES_TRANSITION_TABLE(trigger->tgnewtable));
+ trigdesc->trig_update_old_table |=
+ (TRIGGER_FOR_UPDATE(tgtype) &&
+ TRIGGER_USES_TRANSITION_TABLE(trigger->tgoldtable));
+ trigdesc->trig_update_new_table |=
+ (TRIGGER_FOR_UPDATE(tgtype) &&
+ TRIGGER_USES_TRANSITION_TABLE(trigger->tgnewtable));
+ trigdesc->trig_delete_old_table |=
+ (TRIGGER_FOR_DELETE(tgtype) &&
+ TRIGGER_USES_TRANSITION_TABLE(trigger->tgoldtable));
+}
+
+/*
+ * Copy a TriggerDesc data structure.
+ *
+ * The copy is allocated in the current memory context.
+ */
+TriggerDesc *
+CopyTriggerDesc(TriggerDesc *trigdesc)
+{
+ TriggerDesc *newdesc;
+ Trigger *trigger;
+ int i;
+
+ if (trigdesc == NULL || trigdesc->numtriggers <= 0)
+ return NULL;
+
+ newdesc = (TriggerDesc *) palloc(sizeof(TriggerDesc));
+ memcpy(newdesc, trigdesc, sizeof(TriggerDesc));
+
+ trigger = (Trigger *) palloc(trigdesc->numtriggers * sizeof(Trigger));
+ memcpy(trigger, trigdesc->triggers,
+ trigdesc->numtriggers * sizeof(Trigger));
+ newdesc->triggers = trigger;
+
+ for (i = 0; i < trigdesc->numtriggers; i++)
+ {
+ trigger->tgname = pstrdup(trigger->tgname);
+ if (trigger->tgnattr > 0)
+ {
+ int16 *newattr;
+
+ newattr = (int16 *) palloc(trigger->tgnattr * sizeof(int16));
+ memcpy(newattr, trigger->tgattr,
+ trigger->tgnattr * sizeof(int16));
+ trigger->tgattr = newattr;
+ }
+ if (trigger->tgnargs > 0)
+ {
+ char **newargs;
+ int16 j;
+
+ newargs = (char **) palloc(trigger->tgnargs * sizeof(char *));
+ for (j = 0; j < trigger->tgnargs; j++)
+ newargs[j] = pstrdup(trigger->tgargs[j]);
+ trigger->tgargs = newargs;
+ }
+ if (trigger->tgqual)
+ trigger->tgqual = pstrdup(trigger->tgqual);
+ if (trigger->tgoldtable)
+ trigger->tgoldtable = pstrdup(trigger->tgoldtable);
+ if (trigger->tgnewtable)
+ trigger->tgnewtable = pstrdup(trigger->tgnewtable);
+ trigger++;
+ }
+
+ return newdesc;
+}
+
+/*
+ * Free a TriggerDesc data structure.
+ */
+void
+FreeTriggerDesc(TriggerDesc *trigdesc)
+{
+ Trigger *trigger;
+ int i;
+
+ if (trigdesc == NULL)
+ return;
+
+ trigger = trigdesc->triggers;
+ for (i = 0; i < trigdesc->numtriggers; i++)
+ {
+ pfree(trigger->tgname);
+ if (trigger->tgnattr > 0)
+ pfree(trigger->tgattr);
+ if (trigger->tgnargs > 0)
+ {
+ while (--(trigger->tgnargs) >= 0)
+ pfree(trigger->tgargs[trigger->tgnargs]);
+ pfree(trigger->tgargs);
+ }
+ if (trigger->tgqual)
+ pfree(trigger->tgqual);
+ if (trigger->tgoldtable)
+ pfree(trigger->tgoldtable);
+ if (trigger->tgnewtable)
+ pfree(trigger->tgnewtable);
+ trigger++;
+ }
+ pfree(trigdesc->triggers);
+ pfree(trigdesc);
+}
+
+/*
+ * Compare two TriggerDesc structures for logical equality.
+ */
+#ifdef NOT_USED
+bool
+equalTriggerDescs(TriggerDesc *trigdesc1, TriggerDesc *trigdesc2)
+{
+ int i,
+ j;
+
+ /*
+ * We need not examine the hint flags, just the trigger array itself; if
+ * we have the same triggers with the same types, the flags should match.
+ *
+ * As of 7.3 we assume trigger set ordering is significant in the
+ * comparison; so we just compare corresponding slots of the two sets.
+ *
+ * Note: comparing the stringToNode forms of the WHEN clauses means that
+ * parse column locations will affect the result. This is okay as long as
+ * this function is only used for detecting exact equality, as for example
+ * in checking for staleness of a cache entry.
+ */
+ if (trigdesc1 != NULL)
+ {
+ if (trigdesc2 == NULL)
+ return false;
+ if (trigdesc1->numtriggers != trigdesc2->numtriggers)
+ return false;
+ for (i = 0; i < trigdesc1->numtriggers; i++)
+ {
+ Trigger *trig1 = trigdesc1->triggers + i;
+ Trigger *trig2 = trigdesc2->triggers + i;
+
+ if (trig1->tgoid != trig2->tgoid)
+ return false;
+ if (strcmp(trig1->tgname, trig2->tgname) != 0)
+ return false;
+ if (trig1->tgfoid != trig2->tgfoid)
+ return false;
+ if (trig1->tgtype != trig2->tgtype)
+ return false;
+ if (trig1->tgenabled != trig2->tgenabled)
+ return false;
+ if (trig1->tgisinternal != trig2->tgisinternal)
+ return false;
+ if (trig1->tgisclone != trig2->tgisclone)
+ return false;
+ if (trig1->tgconstrrelid != trig2->tgconstrrelid)
+ return false;
+ if (trig1->tgconstrindid != trig2->tgconstrindid)
+ return false;
+ if (trig1->tgconstraint != trig2->tgconstraint)
+ return false;
+ if (trig1->tgdeferrable != trig2->tgdeferrable)
+ return false;
+ if (trig1->tginitdeferred != trig2->tginitdeferred)
+ return false;
+ if (trig1->tgnargs != trig2->tgnargs)
+ return false;
+ if (trig1->tgnattr != trig2->tgnattr)
+ return false;
+ if (trig1->tgnattr > 0 &&
+ memcmp(trig1->tgattr, trig2->tgattr,
+ trig1->tgnattr * sizeof(int16)) != 0)
+ return false;
+ for (j = 0; j < trig1->tgnargs; j++)
+ if (strcmp(trig1->tgargs[j], trig2->tgargs[j]) != 0)
+ return false;
+ if (trig1->tgqual == NULL && trig2->tgqual == NULL)
+ /* ok */ ;
+ else if (trig1->tgqual == NULL || trig2->tgqual == NULL)
+ return false;
+ else if (strcmp(trig1->tgqual, trig2->tgqual) != 0)
+ return false;
+ if (trig1->tgoldtable == NULL && trig2->tgoldtable == NULL)
+ /* ok */ ;
+ else if (trig1->tgoldtable == NULL || trig2->tgoldtable == NULL)
+ return false;
+ else if (strcmp(trig1->tgoldtable, trig2->tgoldtable) != 0)
+ return false;
+ if (trig1->tgnewtable == NULL && trig2->tgnewtable == NULL)
+ /* ok */ ;
+ else if (trig1->tgnewtable == NULL || trig2->tgnewtable == NULL)
+ return false;
+ else if (strcmp(trig1->tgnewtable, trig2->tgnewtable) != 0)
+ return false;
+ }
+ }
+ else if (trigdesc2 != NULL)
+ return false;
+ return true;
+}
+#endif /* NOT_USED */
+
+/*
+ * Check if there is a row-level trigger with transition tables that prevents
+ * a table from becoming an inheritance child or partition. Return the name
+ * of the first such incompatible trigger, or NULL if there is none.
+ */
+const char *
+FindTriggerIncompatibleWithInheritance(TriggerDesc *trigdesc)
+{
+ if (trigdesc != NULL)
+ {
+ int i;
+
+ for (i = 0; i < trigdesc->numtriggers; ++i)
+ {
+ Trigger *trigger = &trigdesc->triggers[i];
+
+ if (trigger->tgoldtable != NULL || trigger->tgnewtable != NULL)
+ return trigger->tgname;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Call a trigger function.
+ *
+ * trigdata: trigger descriptor.
+ * tgindx: trigger's index in finfo and instr arrays.
+ * finfo: array of cached trigger function call information.
+ * instr: optional array of EXPLAIN ANALYZE instrumentation state.
+ * per_tuple_context: memory context to execute the function in.
+ *
+ * Returns the tuple (or NULL) as returned by the function.
+ */
+static HeapTuple
+ExecCallTriggerFunc(TriggerData *trigdata,
+ int tgindx,
+ FmgrInfo *finfo,
+ Instrumentation *instr,
+ MemoryContext per_tuple_context)
+{
+ LOCAL_FCINFO(fcinfo, 0);
+ PgStat_FunctionCallUsage fcusage;
+ Datum result;
+ MemoryContext oldContext;
+
+ /*
+ * Protect against code paths that may fail to initialize transition table
+ * info.
+ */
+ Assert(((TRIGGER_FIRED_BY_INSERT(trigdata->tg_event) ||
+ TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event) ||
+ TRIGGER_FIRED_BY_DELETE(trigdata->tg_event)) &&
+ TRIGGER_FIRED_AFTER(trigdata->tg_event) &&
+ !(trigdata->tg_event & AFTER_TRIGGER_DEFERRABLE) &&
+ !(trigdata->tg_event & AFTER_TRIGGER_INITDEFERRED)) ||
+ (trigdata->tg_oldtable == NULL && trigdata->tg_newtable == NULL));
+
+ finfo += tgindx;
+
+ /*
+ * We cache fmgr lookup info, to avoid making the lookup again on each
+ * call.
+ */
+ if (finfo->fn_oid == InvalidOid)
+ fmgr_info(trigdata->tg_trigger->tgfoid, finfo);
+
+ Assert(finfo->fn_oid == trigdata->tg_trigger->tgfoid);
+
+ /*
+ * If doing EXPLAIN ANALYZE, start charging time to this trigger.
+ */
+ if (instr)
+ InstrStartNode(instr + tgindx);
+
+ /*
+ * Do the function evaluation in the per-tuple memory context, so that
+ * leaked memory will be reclaimed once per tuple. Note in particular that
+ * any new tuple created by the trigger function will live till the end of
+ * the tuple cycle.
+ */
+ oldContext = MemoryContextSwitchTo(per_tuple_context);
+
+ /*
+ * Call the function, passing no arguments but setting a context.
+ */
+ InitFunctionCallInfoData(*fcinfo, finfo, 0,
+ InvalidOid, (Node *) trigdata, NULL);
+
+ pgstat_init_function_usage(fcinfo, &fcusage);
+
+ MyTriggerDepth++;
+ PG_TRY();
+ {
+ result = FunctionCallInvoke(fcinfo);
+ }
+ PG_FINALLY();
+ {
+ MyTriggerDepth--;
+ }
+ PG_END_TRY();
+
+ pgstat_end_function_usage(&fcusage, true);
+
+ MemoryContextSwitchTo(oldContext);
+
+ /*
+ * Trigger protocol allows function to return a null pointer, but NOT to
+ * set the isnull result flag.
+ */
+ if (fcinfo->isnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("trigger function %u returned null value",
+ fcinfo->flinfo->fn_oid)));
+
+ /*
+ * If doing EXPLAIN ANALYZE, stop charging time to this trigger, and count
+ * one "tuple returned" (really the number of firings).
+ */
+ if (instr)
+ InstrStopNode(instr + tgindx, 1);
+
+ return (HeapTuple) DatumGetPointer(result);
+}
+
+void
+ExecBSInsertTriggers(EState *estate, ResultRelInfo *relinfo)
+{
+ TriggerDesc *trigdesc;
+ int i;
+ TriggerData LocTriggerData = {0};
+
+ trigdesc = relinfo->ri_TrigDesc;
+
+ if (trigdesc == NULL)
+ return;
+ if (!trigdesc->trig_insert_before_statement)
+ return;
+
+ /* no-op if we already fired BS triggers in this context */
+ if (before_stmt_triggers_fired(RelationGetRelid(relinfo->ri_RelationDesc),
+ CMD_INSERT))
+ return;
+
+ LocTriggerData.type = T_TriggerData;
+ LocTriggerData.tg_event = TRIGGER_EVENT_INSERT |
+ TRIGGER_EVENT_BEFORE;
+ LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+ for (i = 0; i < trigdesc->numtriggers; i++)
+ {
+ Trigger *trigger = &trigdesc->triggers[i];
+ HeapTuple newtuple;
+
+ if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+ TRIGGER_TYPE_STATEMENT,
+ TRIGGER_TYPE_BEFORE,
+ TRIGGER_TYPE_INSERT))
+ continue;
+ if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+ NULL, NULL, NULL))
+ continue;
+
+ LocTriggerData.tg_trigger = trigger;
+ newtuple = ExecCallTriggerFunc(&LocTriggerData,
+ i,
+ relinfo->ri_TrigFunctions,
+ relinfo->ri_TrigInstrument,
+ GetPerTupleMemoryContext(estate));
+
+ if (newtuple)
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("BEFORE STATEMENT trigger cannot return a value")));
+ }
+}
+
+void
+ExecASInsertTriggers(EState *estate, ResultRelInfo *relinfo,
+ TransitionCaptureState *transition_capture)
+{
+ TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+
+ if (trigdesc && trigdesc->trig_insert_after_statement)
+ AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
+ TRIGGER_EVENT_INSERT,
+ false, NULL, NULL, NIL, NULL, transition_capture,
+ false);
+}
+
+bool
+ExecBRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
+ TupleTableSlot *slot)
+{
+ TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ HeapTuple newtuple = NULL;
+ bool should_free;
+ TriggerData LocTriggerData = {0};
+ int i;
+
+ LocTriggerData.type = T_TriggerData;
+ LocTriggerData.tg_event = TRIGGER_EVENT_INSERT |
+ TRIGGER_EVENT_ROW |
+ TRIGGER_EVENT_BEFORE;
+ LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+ for (i = 0; i < trigdesc->numtriggers; i++)
+ {
+ Trigger *trigger = &trigdesc->triggers[i];
+ HeapTuple oldtuple;
+
+ if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+ TRIGGER_TYPE_ROW,
+ TRIGGER_TYPE_BEFORE,
+ TRIGGER_TYPE_INSERT))
+ continue;
+ if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+ NULL, NULL, slot))
+ continue;
+
+ if (!newtuple)
+ newtuple = ExecFetchSlotHeapTuple(slot, true, &should_free);
+
+ LocTriggerData.tg_trigslot = slot;
+ LocTriggerData.tg_trigtuple = oldtuple = newtuple;
+ LocTriggerData.tg_trigger = trigger;
+ newtuple = ExecCallTriggerFunc(&LocTriggerData,
+ i,
+ relinfo->ri_TrigFunctions,
+ relinfo->ri_TrigInstrument,
+ GetPerTupleMemoryContext(estate));
+ if (newtuple == NULL)
+ {
+ if (should_free)
+ heap_freetuple(oldtuple);
+ return false; /* "do nothing" */
+ }
+ else if (newtuple != oldtuple)
+ {
+ ExecForceStoreHeapTuple(newtuple, slot, false);
+
+ /*
+ * After a tuple in a partition goes through a trigger, the user
+ * could have changed the partition key enough that the tuple no
+ * longer fits the partition. Verify that.
+ */
+ if (trigger->tgisclone &&
+ !ExecPartitionCheck(relinfo, slot, estate, false))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("moving row to another partition during a BEFORE FOR EACH ROW trigger is not supported"),
+ errdetail("Before executing trigger \"%s\", the row was to be in partition \"%s.%s\".",
+ trigger->tgname,
+ get_namespace_name(RelationGetNamespace(relinfo->ri_RelationDesc)),
+ RelationGetRelationName(relinfo->ri_RelationDesc))));
+
+ if (should_free)
+ heap_freetuple(oldtuple);
+
+ /* signal tuple should be re-fetched if used */
+ newtuple = NULL;
+ }
+ }
+
+ return true;
+}
+
+void
+ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
+ TupleTableSlot *slot, List *recheckIndexes,
+ TransitionCaptureState *transition_capture)
+{
+ TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+
+ if ((trigdesc && trigdesc->trig_insert_after_row) ||
+ (transition_capture && transition_capture->tcs_insert_new_table))
+ AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
+ TRIGGER_EVENT_INSERT,
+ true, NULL, slot,
+ recheckIndexes, NULL,
+ transition_capture,
+ false);
+}
+
+bool
+ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
+ TupleTableSlot *slot)
+{
+ TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ HeapTuple newtuple = NULL;
+ bool should_free;
+ TriggerData LocTriggerData = {0};
+ int i;
+
+ LocTriggerData.type = T_TriggerData;
+ LocTriggerData.tg_event = TRIGGER_EVENT_INSERT |
+ TRIGGER_EVENT_ROW |
+ TRIGGER_EVENT_INSTEAD;
+ LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+ for (i = 0; i < trigdesc->numtriggers; i++)
+ {
+ Trigger *trigger = &trigdesc->triggers[i];
+ HeapTuple oldtuple;
+
+ if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+ TRIGGER_TYPE_ROW,
+ TRIGGER_TYPE_INSTEAD,
+ TRIGGER_TYPE_INSERT))
+ continue;
+ if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+ NULL, NULL, slot))
+ continue;
+
+ if (!newtuple)
+ newtuple = ExecFetchSlotHeapTuple(slot, true, &should_free);
+
+ LocTriggerData.tg_trigslot = slot;
+ LocTriggerData.tg_trigtuple = oldtuple = newtuple;
+ LocTriggerData.tg_trigger = trigger;
+ newtuple = ExecCallTriggerFunc(&LocTriggerData,
+ i,
+ relinfo->ri_TrigFunctions,
+ relinfo->ri_TrigInstrument,
+ GetPerTupleMemoryContext(estate));
+ if (newtuple == NULL)
+ {
+ if (should_free)
+ heap_freetuple(oldtuple);
+ return false; /* "do nothing" */
+ }
+ else if (newtuple != oldtuple)
+ {
+ ExecForceStoreHeapTuple(newtuple, slot, false);
+
+ if (should_free)
+ heap_freetuple(oldtuple);
+
+ /* signal tuple should be re-fetched if used */
+ newtuple = NULL;
+ }
+ }
+
+ return true;
+}
+
+void
+ExecBSDeleteTriggers(EState *estate, ResultRelInfo *relinfo)
+{
+ TriggerDesc *trigdesc;
+ int i;
+ TriggerData LocTriggerData = {0};
+
+ trigdesc = relinfo->ri_TrigDesc;
+
+ if (trigdesc == NULL)
+ return;
+ if (!trigdesc->trig_delete_before_statement)
+ return;
+
+ /* no-op if we already fired BS triggers in this context */
+ if (before_stmt_triggers_fired(RelationGetRelid(relinfo->ri_RelationDesc),
+ CMD_DELETE))
+ return;
+
+ LocTriggerData.type = T_TriggerData;
+ LocTriggerData.tg_event = TRIGGER_EVENT_DELETE |
+ TRIGGER_EVENT_BEFORE;
+ LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+ for (i = 0; i < trigdesc->numtriggers; i++)
+ {
+ Trigger *trigger = &trigdesc->triggers[i];
+ HeapTuple newtuple;
+
+ if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+ TRIGGER_TYPE_STATEMENT,
+ TRIGGER_TYPE_BEFORE,
+ TRIGGER_TYPE_DELETE))
+ continue;
+ if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+ NULL, NULL, NULL))
+ continue;
+
+ LocTriggerData.tg_trigger = trigger;
+ newtuple = ExecCallTriggerFunc(&LocTriggerData,
+ i,
+ relinfo->ri_TrigFunctions,
+ relinfo->ri_TrigInstrument,
+ GetPerTupleMemoryContext(estate));
+
+ if (newtuple)
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("BEFORE STATEMENT trigger cannot return a value")));
+ }
+}
+
+void
+ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
+ TransitionCaptureState *transition_capture)
+{
+ TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+
+ if (trigdesc && trigdesc->trig_delete_after_statement)
+ AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
+ TRIGGER_EVENT_DELETE,
+ false, NULL, NULL, NIL, NULL, transition_capture,
+ false);
+}
+
+/*
+ * Execute BEFORE ROW DELETE triggers.
+ *
+ * True indicates caller can proceed with the delete. False indicates caller
+ * need to suppress the delete and additionally if requested, we need to pass
+ * back the concurrently updated tuple if any.
+ */
+bool
+ExecBRDeleteTriggersNew(EState *estate, EPQState *epqstate,
+ ResultRelInfo *relinfo,
+ ItemPointer tupleid,
+ HeapTuple fdw_trigtuple,
+ TupleTableSlot **epqslot,
+ TM_Result *tmresult,
+ TM_FailureData *tmfd)
+{
+ TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo);
+ TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ bool result = true;
+ TriggerData LocTriggerData = {0};
+ HeapTuple trigtuple;
+ bool should_free = false;
+ int i;
+
+ Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
+ if (fdw_trigtuple == NULL)
+ {
+ TupleTableSlot *epqslot_candidate = NULL;
+
+ if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
+ LockTupleExclusive, slot, &epqslot_candidate,
+ tmresult, tmfd))
+ return false;
+
+ /*
+ * If the tuple was concurrently updated and the caller of this
+ * function requested for the updated tuple, skip the trigger
+ * execution.
+ */
+ if (epqslot_candidate != NULL && epqslot != NULL)
+ {
+ *epqslot = epqslot_candidate;
+ return false;
+ }
+
+ trigtuple = ExecFetchSlotHeapTuple(slot, true, &should_free);
+ }
+ else
+ {
+ trigtuple = fdw_trigtuple;
+ ExecForceStoreHeapTuple(trigtuple, slot, false);
+ }
+
+ LocTriggerData.type = T_TriggerData;
+ LocTriggerData.tg_event = TRIGGER_EVENT_DELETE |
+ TRIGGER_EVENT_ROW |
+ TRIGGER_EVENT_BEFORE;
+ LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+ for (i = 0; i < trigdesc->numtriggers; i++)
+ {
+ HeapTuple newtuple;
+ Trigger *trigger = &trigdesc->triggers[i];
+
+ if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+ TRIGGER_TYPE_ROW,
+ TRIGGER_TYPE_BEFORE,
+ TRIGGER_TYPE_DELETE))
+ continue;
+ if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+ NULL, slot, NULL))
+ continue;
+
+ LocTriggerData.tg_trigslot = slot;
+ LocTriggerData.tg_trigtuple = trigtuple;
+ LocTriggerData.tg_trigger = trigger;
+ newtuple = ExecCallTriggerFunc(&LocTriggerData,
+ i,
+ relinfo->ri_TrigFunctions,
+ relinfo->ri_TrigInstrument,
+ GetPerTupleMemoryContext(estate));
+ if (newtuple == NULL)
+ {
+ result = false; /* tell caller to suppress delete */
+ break;
+ }
+ if (newtuple != trigtuple)
+ heap_freetuple(newtuple);
+ }
+ if (should_free)
+ heap_freetuple(trigtuple);
+
+ return result;
+}
+
+/*
+ * ABI-compatible wrapper to emulate old version of the above function.
+ * Do not call this version in new code.
+ */
+bool
+ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
+ ResultRelInfo *relinfo,
+ ItemPointer tupleid,
+ HeapTuple fdw_trigtuple,
+ TupleTableSlot **epqslot)
+{
+ return ExecBRDeleteTriggersNew(estate, epqstate, relinfo, tupleid,
+ fdw_trigtuple, epqslot, NULL, NULL);
+}
+
+/*
+ * Note: is_crosspart_update must be true if the DELETE is being performed
+ * as part of a cross-partition update.
+ */
+void
+ExecARDeleteTriggers(EState *estate,
+ ResultRelInfo *relinfo,
+ ItemPointer tupleid,
+ HeapTuple fdw_trigtuple,
+ TransitionCaptureState *transition_capture,
+ bool is_crosspart_update)
+{
+ TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+
+ if ((trigdesc && trigdesc->trig_delete_after_row) ||
+ (transition_capture && transition_capture->tcs_delete_old_table))
+ {
+ TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo);
+
+ Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
+ if (fdw_trigtuple == NULL)
+ GetTupleForTrigger(estate,
+ NULL,
+ relinfo,
+ tupleid,
+ LockTupleExclusive,
+ slot,
+ NULL,
+ NULL,
+ NULL);
+ else
+ ExecForceStoreHeapTuple(fdw_trigtuple, slot, false);
+
+ AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
+ TRIGGER_EVENT_DELETE,
+ true, slot, NULL, NIL, NULL,
+ transition_capture,
+ is_crosspart_update);
+ }
+}
+
+bool
+ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
+ HeapTuple trigtuple)
+{
+ TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo);
+ TriggerData LocTriggerData = {0};
+ int i;
+
+ LocTriggerData.type = T_TriggerData;
+ LocTriggerData.tg_event = TRIGGER_EVENT_DELETE |
+ TRIGGER_EVENT_ROW |
+ TRIGGER_EVENT_INSTEAD;
+ LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+
+ ExecForceStoreHeapTuple(trigtuple, slot, false);
+
+ for (i = 0; i < trigdesc->numtriggers; i++)
+ {
+ HeapTuple rettuple;
+ Trigger *trigger = &trigdesc->triggers[i];
+
+ if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+ TRIGGER_TYPE_ROW,
+ TRIGGER_TYPE_INSTEAD,
+ TRIGGER_TYPE_DELETE))
+ continue;
+ if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+ NULL, slot, NULL))
+ continue;
+
+ LocTriggerData.tg_trigslot = slot;
+ LocTriggerData.tg_trigtuple = trigtuple;
+ LocTriggerData.tg_trigger = trigger;
+ rettuple = ExecCallTriggerFunc(&LocTriggerData,
+ i,
+ relinfo->ri_TrigFunctions,
+ relinfo->ri_TrigInstrument,
+ GetPerTupleMemoryContext(estate));
+ if (rettuple == NULL)
+ return false; /* Delete was suppressed */
+ if (rettuple != trigtuple)
+ heap_freetuple(rettuple);
+ }
+ return true;
+}
+
+void
+ExecBSUpdateTriggers(EState *estate, ResultRelInfo *relinfo)
+{
+ TriggerDesc *trigdesc;
+ int i;
+ TriggerData LocTriggerData = {0};
+ Bitmapset *updatedCols;
+
+ trigdesc = relinfo->ri_TrigDesc;
+
+ if (trigdesc == NULL)
+ return;
+ if (!trigdesc->trig_update_before_statement)
+ return;
+
+ /* no-op if we already fired BS triggers in this context */
+ if (before_stmt_triggers_fired(RelationGetRelid(relinfo->ri_RelationDesc),
+ CMD_UPDATE))
+ return;
+
+ /* statement-level triggers operate on the parent table */
+ Assert(relinfo->ri_RootResultRelInfo == NULL);
+
+ updatedCols = ExecGetAllUpdatedCols(relinfo, estate);
+
+ LocTriggerData.type = T_TriggerData;
+ LocTriggerData.tg_event = TRIGGER_EVENT_UPDATE |
+ TRIGGER_EVENT_BEFORE;
+ LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+ LocTriggerData.tg_updatedcols = updatedCols;
+ for (i = 0; i < trigdesc->numtriggers; i++)
+ {
+ Trigger *trigger = &trigdesc->triggers[i];
+ HeapTuple newtuple;
+
+ if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+ TRIGGER_TYPE_STATEMENT,
+ TRIGGER_TYPE_BEFORE,
+ TRIGGER_TYPE_UPDATE))
+ continue;
+ if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+ updatedCols, NULL, NULL))
+ continue;
+
+ LocTriggerData.tg_trigger = trigger;
+ newtuple = ExecCallTriggerFunc(&LocTriggerData,
+ i,
+ relinfo->ri_TrigFunctions,
+ relinfo->ri_TrigInstrument,
+ GetPerTupleMemoryContext(estate));
+
+ if (newtuple)
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("BEFORE STATEMENT trigger cannot return a value")));
+ }
+}
+
+void
+ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
+ TransitionCaptureState *transition_capture)
+{
+ TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+
+ /* statement-level triggers operate on the parent table */
+ Assert(relinfo->ri_RootResultRelInfo == NULL);
+
+ if (trigdesc && trigdesc->trig_update_after_statement)
+ AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
+ TRIGGER_EVENT_UPDATE,
+ false, NULL, NULL, NIL,
+ ExecGetAllUpdatedCols(relinfo, estate),
+ transition_capture,
+ false);
+}
+
+bool
+ExecBRUpdateTriggersNew(EState *estate, EPQState *epqstate,
+ ResultRelInfo *relinfo,
+ ItemPointer tupleid,
+ HeapTuple fdw_trigtuple,
+ TupleTableSlot *newslot,
+ TM_Result *tmresult,
+ TM_FailureData *tmfd)
+{
+ TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ TupleTableSlot *oldslot = ExecGetTriggerOldSlot(estate, relinfo);
+ HeapTuple newtuple = NULL;
+ HeapTuple trigtuple;
+ bool should_free_trig = false;
+ bool should_free_new = false;
+ TriggerData LocTriggerData = {0};
+ int i;
+ Bitmapset *updatedCols;
+ LockTupleMode lockmode;
+
+ /* Determine lock mode to use */
+ lockmode = ExecUpdateLockMode(estate, relinfo);
+
+ Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
+ if (fdw_trigtuple == NULL)
+ {
+ TupleTableSlot *epqslot_candidate = NULL;
+
+ /* get a copy of the on-disk tuple we are planning to update */
+ if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
+ lockmode, oldslot, &epqslot_candidate,
+ tmresult, tmfd))
+ return false; /* cancel the update action */
+
+ /*
+ * In READ COMMITTED isolation level it's possible that target tuple
+ * was changed due to concurrent update. In that case we have a raw
+ * subplan output tuple in epqslot_candidate, and need to form a new
+ * insertable tuple using ExecGetUpdateNewTuple to replace the one we
+ * received in newslot. Neither we nor our callers have any further
+ * interest in the passed-in tuple, so it's okay to overwrite newslot
+ * with the newer data.
+ *
+ * (Typically, newslot was also generated by ExecGetUpdateNewTuple, so
+ * that epqslot_clean will be that same slot and the copy step below
+ * is not needed.)
+ */
+ if (epqslot_candidate != NULL)
+ {
+ TupleTableSlot *epqslot_clean;
+
+ epqslot_clean = ExecGetUpdateNewTuple(relinfo, epqslot_candidate,
+ oldslot);
+
+ if (newslot != epqslot_clean)
+ ExecCopySlot(newslot, epqslot_clean);
+ }
+
+ trigtuple = ExecFetchSlotHeapTuple(oldslot, true, &should_free_trig);
+ }
+ else
+ {
+ ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false);
+ trigtuple = fdw_trigtuple;
+ }
+
+ LocTriggerData.type = T_TriggerData;
+ LocTriggerData.tg_event = TRIGGER_EVENT_UPDATE |
+ TRIGGER_EVENT_ROW |
+ TRIGGER_EVENT_BEFORE;
+ LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+ updatedCols = ExecGetAllUpdatedCols(relinfo, estate);
+ LocTriggerData.tg_updatedcols = updatedCols;
+ for (i = 0; i < trigdesc->numtriggers; i++)
+ {
+ Trigger *trigger = &trigdesc->triggers[i];
+ HeapTuple oldtuple;
+
+ if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+ TRIGGER_TYPE_ROW,
+ TRIGGER_TYPE_BEFORE,
+ TRIGGER_TYPE_UPDATE))
+ continue;
+ if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+ updatedCols, oldslot, newslot))
+ continue;
+
+ if (!newtuple)
+ newtuple = ExecFetchSlotHeapTuple(newslot, true, &should_free_new);
+
+ LocTriggerData.tg_trigslot = oldslot;
+ LocTriggerData.tg_trigtuple = trigtuple;
+ LocTriggerData.tg_newtuple = oldtuple = newtuple;
+ LocTriggerData.tg_newslot = newslot;
+ LocTriggerData.tg_trigger = trigger;
+ newtuple = ExecCallTriggerFunc(&LocTriggerData,
+ i,
+ relinfo->ri_TrigFunctions,
+ relinfo->ri_TrigInstrument,
+ GetPerTupleMemoryContext(estate));
+
+ if (newtuple == NULL)
+ {
+ if (should_free_trig)
+ heap_freetuple(trigtuple);
+ if (should_free_new)
+ heap_freetuple(oldtuple);
+ return false; /* "do nothing" */
+ }
+ else if (newtuple != oldtuple)
+ {
+ ExecForceStoreHeapTuple(newtuple, newslot, false);
+
+ /*
+ * If the tuple returned by the trigger / being stored, is the old
+ * row version, and the heap tuple passed to the trigger was
+ * allocated locally, materialize the slot. Otherwise we might
+ * free it while still referenced by the slot.
+ */
+ if (should_free_trig && newtuple == trigtuple)
+ ExecMaterializeSlot(newslot);
+
+ if (should_free_new)
+ heap_freetuple(oldtuple);
+
+ /* signal tuple should be re-fetched if used */
+ newtuple = NULL;
+ }
+ }
+ if (should_free_trig)
+ heap_freetuple(trigtuple);
+
+ return true;
+}
+
+/*
+ * ABI-compatible wrapper to emulate old version of the above function.
+ * Do not call this version in new code.
+ */
+bool
+ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
+ ResultRelInfo *relinfo,
+ ItemPointer tupleid,
+ HeapTuple fdw_trigtuple,
+ TupleTableSlot *newslot,
+ TM_FailureData *tmfd)
+{
+ return ExecBRUpdateTriggersNew(estate, epqstate, relinfo, tupleid,
+ fdw_trigtuple, newslot, NULL, tmfd);
+}
+
+/*
+ * Note: 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source
+ * and destination partitions, respectively, of a cross-partition update of
+ * the root partitioned table mentioned in the query, given by 'relinfo'.
+ * 'tupleid' in that case refers to the ctid of the "old" tuple in the source
+ * partition, and 'newslot' contains the "new" tuple in the destination
+ * partition. This interface allows to support the requirements of
+ * ExecCrossPartitionUpdateForeignKey(); is_crosspart_update must be true in
+ * that case.
+ */
+void
+ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
+ ResultRelInfo *src_partinfo,
+ ResultRelInfo *dst_partinfo,
+ ItemPointer tupleid,
+ HeapTuple fdw_trigtuple,
+ TupleTableSlot *newslot,
+ List *recheckIndexes,
+ TransitionCaptureState *transition_capture,
+ bool is_crosspart_update)
+{
+ TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+
+ if ((trigdesc && trigdesc->trig_update_after_row) ||
+ (transition_capture &&
+ (transition_capture->tcs_update_old_table ||
+ transition_capture->tcs_update_new_table)))
+ {
+ /*
+ * Note: if the UPDATE is converted into a DELETE+INSERT as part of
+ * update-partition-key operation, then this function is also called
+ * separately for DELETE and INSERT to capture transition table rows.
+ * In such case, either old tuple or new tuple can be NULL.
+ */
+ TupleTableSlot *oldslot;
+ ResultRelInfo *tupsrc;
+
+ Assert((src_partinfo != NULL && dst_partinfo != NULL) ||
+ !is_crosspart_update);
+
+ tupsrc = src_partinfo ? src_partinfo : relinfo;
+ oldslot = ExecGetTriggerOldSlot(estate, tupsrc);
+
+ if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid))
+ GetTupleForTrigger(estate,
+ NULL,
+ tupsrc,
+ tupleid,
+ LockTupleExclusive,
+ oldslot,
+ NULL,
+ NULL,
+ NULL);
+ else if (fdw_trigtuple != NULL)
+ ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false);
+ else
+ ExecClearTuple(oldslot);
+
+ AfterTriggerSaveEvent(estate, relinfo,
+ src_partinfo, dst_partinfo,
+ TRIGGER_EVENT_UPDATE,
+ true,
+ oldslot, newslot, recheckIndexes,
+ ExecGetAllUpdatedCols(relinfo, estate),
+ transition_capture,
+ is_crosspart_update);
+ }
+}
+
+bool
+ExecIRUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
+ HeapTuple trigtuple, TupleTableSlot *newslot)
+{
+ TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ TupleTableSlot *oldslot = ExecGetTriggerOldSlot(estate, relinfo);
+ HeapTuple newtuple = NULL;
+ bool should_free;
+ TriggerData LocTriggerData = {0};
+ int i;
+
+ LocTriggerData.type = T_TriggerData;
+ LocTriggerData.tg_event = TRIGGER_EVENT_UPDATE |
+ TRIGGER_EVENT_ROW |
+ TRIGGER_EVENT_INSTEAD;
+ LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+
+ ExecForceStoreHeapTuple(trigtuple, oldslot, false);
+
+ for (i = 0; i < trigdesc->numtriggers; i++)
+ {
+ Trigger *trigger = &trigdesc->triggers[i];
+ HeapTuple oldtuple;
+
+ if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+ TRIGGER_TYPE_ROW,
+ TRIGGER_TYPE_INSTEAD,
+ TRIGGER_TYPE_UPDATE))
+ continue;
+ if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+ NULL, oldslot, newslot))
+ continue;
+
+ if (!newtuple)
+ newtuple = ExecFetchSlotHeapTuple(newslot, true, &should_free);
+
+ LocTriggerData.tg_trigslot = oldslot;
+ LocTriggerData.tg_trigtuple = trigtuple;
+ LocTriggerData.tg_newslot = newslot;
+ LocTriggerData.tg_newtuple = oldtuple = newtuple;
+
+ LocTriggerData.tg_trigger = trigger;
+ newtuple = ExecCallTriggerFunc(&LocTriggerData,
+ i,
+ relinfo->ri_TrigFunctions,
+ relinfo->ri_TrigInstrument,
+ GetPerTupleMemoryContext(estate));
+ if (newtuple == NULL)
+ {
+ return false; /* "do nothing" */
+ }
+ else if (newtuple != oldtuple)
+ {
+ ExecForceStoreHeapTuple(newtuple, newslot, false);
+
+ if (should_free)
+ heap_freetuple(oldtuple);
+
+ /* signal tuple should be re-fetched if used */
+ newtuple = NULL;
+ }
+ }
+
+ return true;
+}
+
+void
+ExecBSTruncateTriggers(EState *estate, ResultRelInfo *relinfo)
+{
+ TriggerDesc *trigdesc;
+ int i;
+ TriggerData LocTriggerData = {0};
+
+ trigdesc = relinfo->ri_TrigDesc;
+
+ if (trigdesc == NULL)
+ return;
+ if (!trigdesc->trig_truncate_before_statement)
+ return;
+
+ LocTriggerData.type = T_TriggerData;
+ LocTriggerData.tg_event = TRIGGER_EVENT_TRUNCATE |
+ TRIGGER_EVENT_BEFORE;
+ LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+
+ for (i = 0; i < trigdesc->numtriggers; i++)
+ {
+ Trigger *trigger = &trigdesc->triggers[i];
+ HeapTuple newtuple;
+
+ if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+ TRIGGER_TYPE_STATEMENT,
+ TRIGGER_TYPE_BEFORE,
+ TRIGGER_TYPE_TRUNCATE))
+ continue;
+ if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
+ NULL, NULL, NULL))
+ continue;
+
+ LocTriggerData.tg_trigger = trigger;
+ newtuple = ExecCallTriggerFunc(&LocTriggerData,
+ i,
+ relinfo->ri_TrigFunctions,
+ relinfo->ri_TrigInstrument,
+ GetPerTupleMemoryContext(estate));
+
+ if (newtuple)
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("BEFORE STATEMENT trigger cannot return a value")));
+ }
+}
+
+void
+ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo)
+{
+ TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+
+ if (trigdesc && trigdesc->trig_truncate_after_statement)
+ AfterTriggerSaveEvent(estate, relinfo,
+ NULL, NULL,
+ TRIGGER_EVENT_TRUNCATE,
+ false, NULL, NULL, NIL, NULL, NULL,
+ false);
+}
+
+
+/*
+ * Fetch tuple into "oldslot", dealing with locking and EPQ if necessary
+ */
+static bool
+GetTupleForTrigger(EState *estate,
+ EPQState *epqstate,
+ ResultRelInfo *relinfo,
+ ItemPointer tid,
+ LockTupleMode lockmode,
+ TupleTableSlot *oldslot,
+ TupleTableSlot **epqslot,
+ TM_Result *tmresultp,
+ TM_FailureData *tmfdp)
+{
+ Relation relation = relinfo->ri_RelationDesc;
+
+ if (epqslot != NULL)
+ {
+ TM_Result test;
+ TM_FailureData tmfd;
+ int lockflags = 0;
+
+ *epqslot = NULL;
+
+ /* caller must pass an epqstate if EvalPlanQual is possible */
+ Assert(epqstate != NULL);
+
+ /*
+ * lock tuple for update
+ */
+ if (!IsolationUsesXactSnapshot())
+ lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION;
+ test = table_tuple_lock(relation, tid, estate->es_snapshot, oldslot,
+ estate->es_output_cid,
+ lockmode, LockWaitBlock,
+ lockflags,
+ &tmfd);
+
+ /* Let the caller know about the status of this operation */
+ if (tmresultp)
+ *tmresultp = test;
+ if (tmfdp)
+ *tmfdp = tmfd;
+
+ switch (test)
+ {
+ case TM_SelfModified:
+
+ /*
+ * The target tuple was already updated or deleted by the
+ * current command, or by a later command in the current
+ * transaction. We ignore the tuple in the former case, and
+ * throw error in the latter case, for the same reasons
+ * enumerated in ExecUpdate and ExecDelete in
+ * nodeModifyTable.c.
+ */
+ if (tmfd.cmax != estate->es_output_cid)
+ ereport(ERROR,
+ (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
+ errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
+ errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
+
+ /* treat it as deleted; do not process */
+ return false;
+
+ case TM_Ok:
+ if (tmfd.traversed)
+ {
+ /*
+ * Recheck the tuple using EPQ. For MERGE, we leave this
+ * to the caller (it must do additional rechecking, and
+ * might end up executing a different action entirely).
+ */
+ if (estate->es_plannedstmt->commandType == CMD_MERGE)
+ {
+ if (tmresultp)
+ *tmresultp = TM_Updated;
+ return false;
+ }
+
+ *epqslot = EvalPlanQual(epqstate,
+ relation,
+ relinfo->ri_RangeTableIndex,
+ oldslot);
+
+ /*
+ * If PlanQual failed for updated tuple - we must not
+ * process this tuple!
+ */
+ if (TupIsNull(*epqslot))
+ {
+ *epqslot = NULL;
+ return false;
+ }
+ }
+ break;
+
+ case TM_Updated:
+ if (IsolationUsesXactSnapshot())
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("could not serialize access due to concurrent update")));
+ elog(ERROR, "unexpected table_tuple_lock status: %u", test);
+ break;
+
+ case TM_Deleted:
+ if (IsolationUsesXactSnapshot())
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("could not serialize access due to concurrent delete")));
+ /* tuple was deleted */
+ return false;
+
+ case TM_Invisible:
+ elog(ERROR, "attempted to lock invisible tuple");
+ break;
+
+ default:
+ elog(ERROR, "unrecognized table_tuple_lock status: %u", test);
+ return false; /* keep compiler quiet */
+ }
+ }
+ else
+ {
+ /*
+ * We expect the tuple to be present, thus very simple error handling
+ * suffices.
+ */
+ if (!table_tuple_fetch_row_version(relation, tid, SnapshotAny,
+ oldslot))
+ elog(ERROR, "failed to fetch tuple for trigger");
+ }
+
+ return true;
+}
+
+/*
+ * Is trigger enabled to fire?
+ */
+static bool
+TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
+ Trigger *trigger, TriggerEvent event,
+ Bitmapset *modifiedCols,
+ TupleTableSlot *oldslot, TupleTableSlot *newslot)
+{
+ /* Check replication-role-dependent enable state */
+ if (SessionReplicationRole == SESSION_REPLICATION_ROLE_REPLICA)
+ {
+ if (trigger->tgenabled == TRIGGER_FIRES_ON_ORIGIN ||
+ trigger->tgenabled == TRIGGER_DISABLED)
+ return false;
+ }
+ else /* ORIGIN or LOCAL role */
+ {
+ if (trigger->tgenabled == TRIGGER_FIRES_ON_REPLICA ||
+ trigger->tgenabled == TRIGGER_DISABLED)
+ return false;
+ }
+
+ /*
+ * Check for column-specific trigger (only possible for UPDATE, and in
+ * fact we *must* ignore tgattr for other event types)
+ */
+ if (trigger->tgnattr > 0 && TRIGGER_FIRED_BY_UPDATE(event))
+ {
+ int i;
+ bool modified;
+
+ modified = false;
+ for (i = 0; i < trigger->tgnattr; i++)
+ {
+ if (bms_is_member(trigger->tgattr[i] - FirstLowInvalidHeapAttributeNumber,
+ modifiedCols))
+ {
+ modified = true;
+ break;
+ }
+ }
+ if (!modified)
+ return false;
+ }
+
+ /* Check for WHEN clause */
+ if (trigger->tgqual)
+ {
+ ExprState **predicate;
+ ExprContext *econtext;
+ MemoryContext oldContext;
+ int i;
+
+ Assert(estate != NULL);
+
+ /*
+ * trigger is an element of relinfo->ri_TrigDesc->triggers[]; find the
+ * matching element of relinfo->ri_TrigWhenExprs[]
+ */
+ i = trigger - relinfo->ri_TrigDesc->triggers;
+ predicate = &relinfo->ri_TrigWhenExprs[i];
+
+ /*
+ * If first time through for this WHEN expression, build expression
+ * nodetrees for it. Keep them in the per-query memory context so
+ * they'll survive throughout the query.
+ */
+ if (*predicate == NULL)
+ {
+ Node *tgqual;
+
+ oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
+ tgqual = stringToNode(trigger->tgqual);
+ /* Change references to OLD and NEW to INNER_VAR and OUTER_VAR */
+ ChangeVarNodes(tgqual, PRS2_OLD_VARNO, INNER_VAR, 0);
+ ChangeVarNodes(tgqual, PRS2_NEW_VARNO, OUTER_VAR, 0);
+ /* ExecPrepareQual wants implicit-AND form */
+ tgqual = (Node *) make_ands_implicit((Expr *) tgqual);
+ *predicate = ExecPrepareQual((List *) tgqual, estate);
+ MemoryContextSwitchTo(oldContext);
+ }
+
+ /*
+ * We will use the EState's per-tuple context for evaluating WHEN
+ * expressions (creating it if it's not already there).
+ */
+ econtext = GetPerTupleExprContext(estate);
+
+ /*
+ * Finally evaluate the expression, making the old and/or new tuples
+ * available as INNER_VAR/OUTER_VAR respectively.
+ */
+ econtext->ecxt_innertuple = oldslot;
+ econtext->ecxt_outertuple = newslot;
+ if (!ExecQual(*predicate, econtext))
+ return false;
+ }
+
+ return true;
+}
+
+
+/* ----------
+ * After-trigger stuff
+ *
+ * The AfterTriggersData struct holds data about pending AFTER trigger events
+ * during the current transaction tree. (BEFORE triggers are fired
+ * immediately so we don't need any persistent state about them.) The struct
+ * and most of its subsidiary data are kept in TopTransactionContext; however
+ * some data that can be discarded sooner appears in the CurTransactionContext
+ * of the relevant subtransaction. Also, the individual event records are
+ * kept in a separate sub-context of TopTransactionContext. This is done
+ * mainly so that it's easy to tell from a memory context dump how much space
+ * is being eaten by trigger events.
+ *
+ * Because the list of pending events can grow large, we go to some
+ * considerable effort to minimize per-event memory consumption. The event
+ * records are grouped into chunks and common data for similar events in the
+ * same chunk is only stored once.
+ *
+ * XXX We need to be able to save the per-event data in a file if it grows too
+ * large.
+ * ----------
+ */
+
+/* Per-trigger SET CONSTRAINT status */
+typedef struct SetConstraintTriggerData
+{
+ Oid sct_tgoid;
+ bool sct_tgisdeferred;
+} SetConstraintTriggerData;
+
+typedef struct SetConstraintTriggerData *SetConstraintTrigger;
+
+/*
+ * SET CONSTRAINT intra-transaction status.
+ *
+ * We make this a single palloc'd object so it can be copied and freed easily.
+ *
+ * all_isset and all_isdeferred are used to keep track
+ * of SET CONSTRAINTS ALL {DEFERRED, IMMEDIATE}.
+ *
+ * trigstates[] stores per-trigger tgisdeferred settings.
+ */
+typedef struct SetConstraintStateData
+{
+ bool all_isset;
+ bool all_isdeferred;
+ int numstates; /* number of trigstates[] entries in use */
+ int numalloc; /* allocated size of trigstates[] */
+ SetConstraintTriggerData trigstates[FLEXIBLE_ARRAY_MEMBER];
+} SetConstraintStateData;
+
+typedef SetConstraintStateData *SetConstraintState;
+
+
+/*
+ * Per-trigger-event data
+ *
+ * The actual per-event data, AfterTriggerEventData, includes DONE/IN_PROGRESS
+ * status bits, up to two tuple CTIDs, and optionally two OIDs of partitions.
+ * Each event record also has an associated AfterTriggerSharedData that is
+ * shared across all instances of similar events within a "chunk".
+ *
+ * For row-level triggers, we arrange not to waste storage on unneeded ctid
+ * fields. Updates of regular tables use two; inserts and deletes of regular
+ * tables use one; foreign tables always use zero and save the tuple(s) to a
+ * tuplestore. AFTER_TRIGGER_FDW_FETCH directs AfterTriggerExecute() to
+ * retrieve a fresh tuple or pair of tuples from that tuplestore, while
+ * AFTER_TRIGGER_FDW_REUSE directs it to use the most-recently-retrieved
+ * tuple(s). This permits storing tuples once regardless of the number of
+ * row-level triggers on a foreign table.
+ *
+ * When updates on partitioned tables cause rows to move between partitions,
+ * the OIDs of both partitions are stored too, so that the tuples can be
+ * fetched; such entries are marked AFTER_TRIGGER_CP_UPDATE (for "cross-
+ * partition update").
+ *
+ * Note that we need triggers on foreign tables to be fired in exactly the
+ * order they were queued, so that the tuples come out of the tuplestore in
+ * the right order. To ensure that, we forbid deferrable (constraint)
+ * triggers on foreign tables. This also ensures that such triggers do not
+ * get deferred into outer trigger query levels, meaning that it's okay to
+ * destroy the tuplestore at the end of the query level.
+ *
+ * Statement-level triggers always bear AFTER_TRIGGER_1CTID, though they
+ * require no ctid field. We lack the flag bit space to neatly represent that
+ * distinct case, and it seems unlikely to be worth much trouble.
+ *
+ * Note: ats_firing_id is initially zero and is set to something else when
+ * AFTER_TRIGGER_IN_PROGRESS is set. It indicates which trigger firing
+ * cycle the trigger will be fired in (or was fired in, if DONE is set).
+ * Although this is mutable state, we can keep it in AfterTriggerSharedData
+ * because all instances of the same type of event in a given event list will
+ * be fired at the same time, if they were queued between the same firing
+ * cycles. So we need only ensure that ats_firing_id is zero when attaching
+ * a new event to an existing AfterTriggerSharedData record.
+ */
+typedef uint32 TriggerFlags;
+
+#define AFTER_TRIGGER_OFFSET 0x07FFFFFF /* must be low-order bits */
+#define AFTER_TRIGGER_DONE 0x80000000
+#define AFTER_TRIGGER_IN_PROGRESS 0x40000000
+/* bits describing the size and tuple sources of this event */
+#define AFTER_TRIGGER_FDW_REUSE 0x00000000
+#define AFTER_TRIGGER_FDW_FETCH 0x20000000
+#define AFTER_TRIGGER_1CTID 0x10000000
+#define AFTER_TRIGGER_2CTID 0x30000000
+#define AFTER_TRIGGER_CP_UPDATE 0x08000000
+#define AFTER_TRIGGER_TUP_BITS 0x38000000
+typedef struct AfterTriggerSharedData *AfterTriggerShared;
+
+typedef struct AfterTriggerSharedData
+{
+ TriggerEvent ats_event; /* event type indicator, see trigger.h */
+ Oid ats_tgoid; /* the trigger's ID */
+ Oid ats_relid; /* the relation it's on */
+ CommandId ats_firing_id; /* ID for firing cycle */
+ struct AfterTriggersTableData *ats_table; /* transition table access */
+ Bitmapset *ats_modifiedcols; /* modified columns */
+} AfterTriggerSharedData;
+
+typedef struct AfterTriggerEventData *AfterTriggerEvent;
+
+typedef struct AfterTriggerEventData
+{
+ TriggerFlags ate_flags; /* status bits and offset to shared data */
+ ItemPointerData ate_ctid1; /* inserted, deleted, or old updated tuple */
+ ItemPointerData ate_ctid2; /* new updated tuple */
+
+ /*
+ * During a cross-partition update of a partitioned table, we also store
+ * the OIDs of source and destination partitions that are needed to fetch
+ * the old (ctid1) and the new tuple (ctid2) from, respectively.
+ */
+ Oid ate_src_part;
+ Oid ate_dst_part;
+} AfterTriggerEventData;
+
+/* AfterTriggerEventData, minus ate_src_part, ate_dst_part */
+typedef struct AfterTriggerEventDataNoOids
+{
+ TriggerFlags ate_flags;
+ ItemPointerData ate_ctid1;
+ ItemPointerData ate_ctid2;
+} AfterTriggerEventDataNoOids;
+
+/* AfterTriggerEventData, minus ate_*_part and ate_ctid2 */
+typedef struct AfterTriggerEventDataOneCtid
+{
+ TriggerFlags ate_flags; /* status bits and offset to shared data */
+ ItemPointerData ate_ctid1; /* inserted, deleted, or old updated tuple */
+} AfterTriggerEventDataOneCtid;
+
+/* AfterTriggerEventData, minus ate_*_part, ate_ctid1 and ate_ctid2 */
+typedef struct AfterTriggerEventDataZeroCtids
+{
+ TriggerFlags ate_flags; /* status bits and offset to shared data */
+} AfterTriggerEventDataZeroCtids;
+
+#define SizeofTriggerEvent(evt) \
+ (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_CP_UPDATE ? \
+ sizeof(AfterTriggerEventData) : \
+ (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ? \
+ sizeof(AfterTriggerEventDataNoOids) : \
+ (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_1CTID ? \
+ sizeof(AfterTriggerEventDataOneCtid) : \
+ sizeof(AfterTriggerEventDataZeroCtids))))
+
+#define GetTriggerSharedData(evt) \
+ ((AfterTriggerShared) ((char *) (evt) + ((evt)->ate_flags & AFTER_TRIGGER_OFFSET)))
+
+/*
+ * To avoid palloc overhead, we keep trigger events in arrays in successively-
+ * larger chunks (a slightly more sophisticated version of an expansible
+ * array). The space between CHUNK_DATA_START and freeptr is occupied by
+ * AfterTriggerEventData records; the space between endfree and endptr is
+ * occupied by AfterTriggerSharedData records.
+ */
+typedef struct AfterTriggerEventChunk
+{
+ struct AfterTriggerEventChunk *next; /* list link */
+ char *freeptr; /* start of free space in chunk */
+ char *endfree; /* end of free space in chunk */
+ char *endptr; /* end of chunk */
+ /* event data follows here */
+} AfterTriggerEventChunk;
+
+#define CHUNK_DATA_START(cptr) ((char *) (cptr) + MAXALIGN(sizeof(AfterTriggerEventChunk)))
+
+/* A list of events */
+typedef struct AfterTriggerEventList
+{
+ AfterTriggerEventChunk *head;
+ AfterTriggerEventChunk *tail;
+ char *tailfree; /* freeptr of tail chunk */
+} AfterTriggerEventList;
+
+/* Macros to help in iterating over a list of events */
+#define for_each_chunk(cptr, evtlist) \
+ for (cptr = (evtlist).head; cptr != NULL; cptr = cptr->next)
+#define for_each_event(eptr, cptr) \
+ for (eptr = (AfterTriggerEvent) CHUNK_DATA_START(cptr); \
+ (char *) eptr < (cptr)->freeptr; \
+ eptr = (AfterTriggerEvent) (((char *) eptr) + SizeofTriggerEvent(eptr)))
+/* Use this if no special per-chunk processing is needed */
+#define for_each_event_chunk(eptr, cptr, evtlist) \
+ for_each_chunk(cptr, evtlist) for_each_event(eptr, cptr)
+
+/* Macros for iterating from a start point that might not be list start */
+#define for_each_chunk_from(cptr) \
+ for (; cptr != NULL; cptr = cptr->next)
+#define for_each_event_from(eptr, cptr) \
+ for (; \
+ (char *) eptr < (cptr)->freeptr; \
+ eptr = (AfterTriggerEvent) (((char *) eptr) + SizeofTriggerEvent(eptr)))
+
+
+/*
+ * All per-transaction data for the AFTER TRIGGERS module.
+ *
+ * AfterTriggersData has the following fields:
+ *
+ * firing_counter is incremented for each call of afterTriggerInvokeEvents.
+ * We mark firable events with the current firing cycle's ID so that we can
+ * tell which ones to work on. This ensures sane behavior if a trigger
+ * function chooses to do SET CONSTRAINTS: the inner SET CONSTRAINTS will
+ * only fire those events that weren't already scheduled for firing.
+ *
+ * state keeps track of the transaction-local effects of SET CONSTRAINTS.
+ * This is saved and restored across failed subtransactions.
+ *
+ * events is the current list of deferred events. This is global across
+ * all subtransactions of the current transaction. In a subtransaction
+ * abort, we know that the events added by the subtransaction are at the
+ * end of the list, so it is relatively easy to discard them. The event
+ * list chunks themselves are stored in event_cxt.
+ *
+ * query_depth is the current depth of nested AfterTriggerBeginQuery calls
+ * (-1 when the stack is empty).
+ *
+ * query_stack[query_depth] is the per-query-level data, including these fields:
+ *
+ * events is a list of AFTER trigger events queued by the current query.
+ * None of these are valid until the matching AfterTriggerEndQuery call
+ * occurs. At that point we fire immediate-mode triggers, and append any
+ * deferred events to the main events list.
+ *
+ * fdw_tuplestore is a tuplestore containing the foreign-table tuples
+ * needed by events queued by the current query. (Note: we use just one
+ * tuplestore even though more than one foreign table might be involved.
+ * This is okay because tuplestores don't really care what's in the tuples
+ * they store; but it's possible that someday it'd break.)
+ *
+ * tables is a List of AfterTriggersTableData structs for target tables
+ * of the current query (see below).
+ *
+ * maxquerydepth is just the allocated length of query_stack.
+ *
+ * trans_stack holds per-subtransaction data, including these fields:
+ *
+ * state is NULL or a pointer to a saved copy of the SET CONSTRAINTS
+ * state data. Each subtransaction level that modifies that state first
+ * saves a copy, which we use to restore the state if we abort.
+ *
+ * events is a copy of the events head/tail pointers,
+ * which we use to restore those values during subtransaction abort.
+ *
+ * query_depth is the subtransaction-start-time value of query_depth,
+ * which we similarly use to clean up at subtransaction abort.
+ *
+ * firing_counter is the subtransaction-start-time value of firing_counter.
+ * We use this to recognize which deferred triggers were fired (or marked
+ * for firing) within an aborted subtransaction.
+ *
+ * We use GetCurrentTransactionNestLevel() to determine the correct array
+ * index in trans_stack. maxtransdepth is the number of allocated entries in
+ * trans_stack. (By not keeping our own stack pointer, we can avoid trouble
+ * in cases where errors during subxact abort cause multiple invocations
+ * of AfterTriggerEndSubXact() at the same nesting depth.)
+ *
+ * We create an AfterTriggersTableData struct for each target table of the
+ * current query, and each operation mode (INSERT/UPDATE/DELETE), that has
+ * either transition tables or statement-level triggers. This is used to
+ * hold the relevant transition tables, as well as info tracking whether
+ * we already queued the statement triggers. (We use that info to prevent
+ * firing the same statement triggers more than once per statement, or really
+ * once per transition table set.) These structs, along with the transition
+ * table tuplestores, live in the (sub)transaction's CurTransactionContext.
+ * That's sufficient lifespan because we don't allow transition tables to be
+ * used by deferrable triggers, so they only need to survive until
+ * AfterTriggerEndQuery.
+ */
+typedef struct AfterTriggersQueryData AfterTriggersQueryData;
+typedef struct AfterTriggersTransData AfterTriggersTransData;
+typedef struct AfterTriggersTableData AfterTriggersTableData;
+
+typedef struct AfterTriggersData
+{
+ CommandId firing_counter; /* next firing ID to assign */
+ SetConstraintState state; /* the active S C state */
+ AfterTriggerEventList events; /* deferred-event list */
+ MemoryContext event_cxt; /* memory context for events, if any */
+
+ /* per-query-level data: */
+ AfterTriggersQueryData *query_stack; /* array of structs shown below */
+ int query_depth; /* current index in above array */
+ int maxquerydepth; /* allocated len of above array */
+
+ /* per-subtransaction-level data: */
+ AfterTriggersTransData *trans_stack; /* array of structs shown below */
+ int maxtransdepth; /* allocated len of above array */
+} AfterTriggersData;
+
+struct AfterTriggersQueryData
+{
+ AfterTriggerEventList events; /* events pending from this query */
+ Tuplestorestate *fdw_tuplestore; /* foreign tuples for said events */
+ List *tables; /* list of AfterTriggersTableData, see below */
+};
+
+struct AfterTriggersTransData
+{
+ /* these fields are just for resetting at subtrans abort: */
+ SetConstraintState state; /* saved S C state, or NULL if not yet saved */
+ AfterTriggerEventList events; /* saved list pointer */
+ int query_depth; /* saved query_depth */
+ CommandId firing_counter; /* saved firing_counter */
+};
+
+struct AfterTriggersTableData
+{
+ /* relid + cmdType form the lookup key for these structs: */
+ Oid relid; /* target table's OID */
+ CmdType cmdType; /* event type, CMD_INSERT/UPDATE/DELETE */
+ bool closed; /* true when no longer OK to add tuples */
+ bool before_trig_done; /* did we already queue BS triggers? */
+ bool after_trig_done; /* did we already queue AS triggers? */
+ AfterTriggerEventList after_trig_events; /* if so, saved list pointer */
+
+ /*
+ * We maintain separate transition tables for UPDATE/INSERT/DELETE since
+ * MERGE can run all three actions in a single statement. Note that UPDATE
+ * needs both old and new transition tables whereas INSERT needs only new,
+ * and DELETE needs only old.
+ */
+
+ /* "old" transition table for UPDATE, if any */
+ Tuplestorestate *old_upd_tuplestore;
+ /* "new" transition table for UPDATE, if any */
+ Tuplestorestate *new_upd_tuplestore;
+ /* "old" transition table for DELETE, if any */
+ Tuplestorestate *old_del_tuplestore;
+ /* "new" transition table for INSERT, if any */
+ Tuplestorestate *new_ins_tuplestore;
+
+ TupleTableSlot *storeslot; /* for converting to tuplestore's format */
+};
+
+static AfterTriggersData afterTriggers;
+
+static void AfterTriggerExecute(EState *estate,
+ AfterTriggerEvent event,
+ ResultRelInfo *relInfo,
+ ResultRelInfo *src_relInfo,
+ ResultRelInfo *dst_relInfo,
+ TriggerDesc *trigdesc,
+ FmgrInfo *finfo,
+ Instrumentation *instr,
+ MemoryContext per_tuple_context,
+ TupleTableSlot *trig_tuple_slot1,
+ TupleTableSlot *trig_tuple_slot2);
+static AfterTriggersTableData *GetAfterTriggersTableData(Oid relid,
+ CmdType cmdType);
+static TupleTableSlot *GetAfterTriggersStoreSlot(AfterTriggersTableData *table,
+ TupleDesc tupdesc);
+static Tuplestorestate *GetAfterTriggersTransitionTable(int event,
+ TupleTableSlot *oldslot,
+ TupleTableSlot *newslot,
+ TransitionCaptureState *transition_capture);
+static void TransitionTableAddTuple(EState *estate,
+ TransitionCaptureState *transition_capture,
+ ResultRelInfo *relinfo,
+ TupleTableSlot *slot,
+ TupleTableSlot *original_insert_tuple,
+ Tuplestorestate *tuplestore);
+static void AfterTriggerFreeQuery(AfterTriggersQueryData *qs);
+static SetConstraintState SetConstraintStateCreate(int numalloc);
+static SetConstraintState SetConstraintStateCopy(SetConstraintState state);
+static SetConstraintState SetConstraintStateAddItem(SetConstraintState state,
+ Oid tgoid, bool tgisdeferred);
+static void cancel_prior_stmt_triggers(Oid relid, CmdType cmdType, int tgevent);
+
+
+/*
+ * Get the FDW tuplestore for the current trigger query level, creating it
+ * if necessary.
+ */
+static Tuplestorestate *
+GetCurrentFDWTuplestore(void)
+{
+ Tuplestorestate *ret;
+
+ ret = afterTriggers.query_stack[afterTriggers.query_depth].fdw_tuplestore;
+ if (ret == NULL)
+ {
+ MemoryContext oldcxt;
+ ResourceOwner saveResourceOwner;
+
+ /*
+ * Make the tuplestore valid until end of subtransaction. We really
+ * only need it until AfterTriggerEndQuery().
+ */
+ oldcxt = MemoryContextSwitchTo(CurTransactionContext);
+ saveResourceOwner = CurrentResourceOwner;
+ CurrentResourceOwner = CurTransactionResourceOwner;
+
+ ret = tuplestore_begin_heap(false, false, work_mem);
+
+ CurrentResourceOwner = saveResourceOwner;
+ MemoryContextSwitchTo(oldcxt);
+
+ afterTriggers.query_stack[afterTriggers.query_depth].fdw_tuplestore = ret;
+ }
+
+ return ret;
+}
+
+/* ----------
+ * afterTriggerCheckState()
+ *
+ * Returns true if the trigger event is actually in state DEFERRED.
+ * ----------
+ */
+static bool
+afterTriggerCheckState(AfterTriggerShared evtshared)
+{
+ Oid tgoid = evtshared->ats_tgoid;
+ SetConstraintState state = afterTriggers.state;
+ int i;
+
+ /*
+ * For not-deferrable triggers (i.e. normal AFTER ROW triggers and
+ * constraints declared NOT DEFERRABLE), the state is always false.
+ */
+ if ((evtshared->ats_event & AFTER_TRIGGER_DEFERRABLE) == 0)
+ return false;
+
+ /*
+ * If constraint state exists, SET CONSTRAINTS might have been executed
+ * either for this trigger or for all triggers.
+ */
+ if (state != NULL)
+ {
+ /* Check for SET CONSTRAINTS for this specific trigger. */
+ for (i = 0; i < state->numstates; i++)
+ {
+ if (state->trigstates[i].sct_tgoid == tgoid)
+ return state->trigstates[i].sct_tgisdeferred;
+ }
+
+ /* Check for SET CONSTRAINTS ALL. */
+ if (state->all_isset)
+ return state->all_isdeferred;
+ }
+
+ /*
+ * Otherwise return the default state for the trigger.
+ */
+ return ((evtshared->ats_event & AFTER_TRIGGER_INITDEFERRED) != 0);
+}
+
+/* ----------
+ * afterTriggerCopyBitmap()
+ *
+ * Copy bitmap into AfterTriggerEvents memory context, which is where the after
+ * trigger events are kept.
+ * ----------
+ */
+static Bitmapset *
+afterTriggerCopyBitmap(Bitmapset *src)
+{
+ Bitmapset *dst;
+ MemoryContext oldcxt;
+
+ if (src == NULL)
+ return NULL;
+
+ /* Create event context if we didn't already */
+ if (afterTriggers.event_cxt == NULL)
+ afterTriggers.event_cxt =
+ AllocSetContextCreate(TopTransactionContext,
+ "AfterTriggerEvents",
+ ALLOCSET_DEFAULT_SIZES);
+
+ oldcxt = MemoryContextSwitchTo(afterTriggers.event_cxt);
+
+ dst = bms_copy(src);
+
+ MemoryContextSwitchTo(oldcxt);
+
+ return dst;
+}
+
+/* ----------
+ * afterTriggerAddEvent()
+ *
+ * Add a new trigger event to the specified queue.
+ * The passed-in event data is copied.
+ * ----------
+ */
+static void
+afterTriggerAddEvent(AfterTriggerEventList *events,
+ AfterTriggerEvent event, AfterTriggerShared evtshared)
+{
+ Size eventsize = SizeofTriggerEvent(event);
+ Size needed = eventsize + sizeof(AfterTriggerSharedData);
+ AfterTriggerEventChunk *chunk;
+ AfterTriggerShared newshared;
+ AfterTriggerEvent newevent;
+
+ /*
+ * If empty list or not enough room in the tail chunk, make a new chunk.
+ * We assume here that a new shared record will always be needed.
+ */
+ chunk = events->tail;
+ if (chunk == NULL ||
+ chunk->endfree - chunk->freeptr < needed)
+ {
+ Size chunksize;
+
+ /* Create event context if we didn't already */
+ if (afterTriggers.event_cxt == NULL)
+ afterTriggers.event_cxt =
+ AllocSetContextCreate(TopTransactionContext,
+ "AfterTriggerEvents",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /*
+ * Chunk size starts at 1KB and is allowed to increase up to 1MB.
+ * These numbers are fairly arbitrary, though there is a hard limit at
+ * AFTER_TRIGGER_OFFSET; else we couldn't link event records to their
+ * shared records using the available space in ate_flags. Another
+ * constraint is that if the chunk size gets too huge, the search loop
+ * below would get slow given a (not too common) usage pattern with
+ * many distinct event types in a chunk. Therefore, we double the
+ * preceding chunk size only if there weren't too many shared records
+ * in the preceding chunk; otherwise we halve it. This gives us some
+ * ability to adapt to the actual usage pattern of the current query
+ * while still having large chunk sizes in typical usage. All chunk
+ * sizes used should be MAXALIGN multiples, to ensure that the shared
+ * records will be aligned safely.
+ */
+#define MIN_CHUNK_SIZE 1024
+#define MAX_CHUNK_SIZE (1024*1024)
+
+#if MAX_CHUNK_SIZE > (AFTER_TRIGGER_OFFSET+1)
+#error MAX_CHUNK_SIZE must not exceed AFTER_TRIGGER_OFFSET
+#endif
+
+ if (chunk == NULL)
+ chunksize = MIN_CHUNK_SIZE;
+ else
+ {
+ /* preceding chunk size... */
+ chunksize = chunk->endptr - (char *) chunk;
+ /* check number of shared records in preceding chunk */
+ if ((chunk->endptr - chunk->endfree) <=
+ (100 * sizeof(AfterTriggerSharedData)))
+ chunksize *= 2; /* okay, double it */
+ else
+ chunksize /= 2; /* too many shared records */
+ chunksize = Min(chunksize, MAX_CHUNK_SIZE);
+ }
+ chunk = MemoryContextAlloc(afterTriggers.event_cxt, chunksize);
+ chunk->next = NULL;
+ chunk->freeptr = CHUNK_DATA_START(chunk);
+ chunk->endptr = chunk->endfree = (char *) chunk + chunksize;
+ Assert(chunk->endfree - chunk->freeptr >= needed);
+
+ if (events->head == NULL)
+ events->head = chunk;
+ else
+ events->tail->next = chunk;
+ events->tail = chunk;
+ /* events->tailfree is now out of sync, but we'll fix it below */
+ }
+
+ /*
+ * Try to locate a matching shared-data record already in the chunk. If
+ * none, make a new one.
+ */
+ for (newshared = ((AfterTriggerShared) chunk->endptr) - 1;
+ (char *) newshared >= chunk->endfree;
+ newshared--)
+ {
+ if (newshared->ats_tgoid == evtshared->ats_tgoid &&
+ newshared->ats_relid == evtshared->ats_relid &&
+ newshared->ats_event == evtshared->ats_event &&
+ newshared->ats_table == evtshared->ats_table &&
+ newshared->ats_firing_id == 0)
+ break;
+ }
+ if ((char *) newshared < chunk->endfree)
+ {
+ *newshared = *evtshared;
+ newshared->ats_firing_id = 0; /* just to be sure */
+ chunk->endfree = (char *) newshared;
+ }
+
+ /* Insert the data */
+ newevent = (AfterTriggerEvent) chunk->freeptr;
+ memcpy(newevent, event, eventsize);
+ /* ... and link the new event to its shared record */
+ newevent->ate_flags &= ~AFTER_TRIGGER_OFFSET;
+ newevent->ate_flags |= (char *) newshared - (char *) newevent;
+
+ chunk->freeptr += eventsize;
+ events->tailfree = chunk->freeptr;
+}
+
+/* ----------
+ * afterTriggerFreeEventList()
+ *
+ * Free all the event storage in the given list.
+ * ----------
+ */
+static void
+afterTriggerFreeEventList(AfterTriggerEventList *events)
+{
+ AfterTriggerEventChunk *chunk;
+
+ while ((chunk = events->head) != NULL)
+ {
+ events->head = chunk->next;
+ pfree(chunk);
+ }
+ events->tail = NULL;
+ events->tailfree = NULL;
+}
+
+/* ----------
+ * afterTriggerRestoreEventList()
+ *
+ * Restore an event list to its prior length, removing all the events
+ * added since it had the value old_events.
+ * ----------
+ */
+static void
+afterTriggerRestoreEventList(AfterTriggerEventList *events,
+ const AfterTriggerEventList *old_events)
+{
+ AfterTriggerEventChunk *chunk;
+ AfterTriggerEventChunk *next_chunk;
+
+ if (old_events->tail == NULL)
+ {
+ /* restoring to a completely empty state, so free everything */
+ afterTriggerFreeEventList(events);
+ }
+ else
+ {
+ *events = *old_events;
+ /* free any chunks after the last one we want to keep */
+ for (chunk = events->tail->next; chunk != NULL; chunk = next_chunk)
+ {
+ next_chunk = chunk->next;
+ pfree(chunk);
+ }
+ /* and clean up the tail chunk to be the right length */
+ events->tail->next = NULL;
+ events->tail->freeptr = events->tailfree;
+
+ /*
+ * We don't make any effort to remove now-unused shared data records.
+ * They might still be useful, anyway.
+ */
+ }
+}
+
+/* ----------
+ * afterTriggerDeleteHeadEventChunk()
+ *
+ * Remove the first chunk of events from the query level's event list.
+ * Keep any event list pointers elsewhere in the query level's data
+ * structures in sync.
+ * ----------
+ */
+static void
+afterTriggerDeleteHeadEventChunk(AfterTriggersQueryData *qs)
+{
+ AfterTriggerEventChunk *target = qs->events.head;
+ ListCell *lc;
+
+ Assert(target && target->next);
+
+ /*
+ * First, update any pointers in the per-table data, so that they won't be
+ * dangling. Resetting obsoleted pointers to NULL will make
+ * cancel_prior_stmt_triggers start from the list head, which is fine.
+ */
+ foreach(lc, qs->tables)
+ {
+ AfterTriggersTableData *table = (AfterTriggersTableData *) lfirst(lc);
+
+ if (table->after_trig_done &&
+ table->after_trig_events.tail == target)
+ {
+ table->after_trig_events.head = NULL;
+ table->after_trig_events.tail = NULL;
+ table->after_trig_events.tailfree = NULL;
+ }
+ }
+
+ /* Now we can flush the head chunk */
+ qs->events.head = target->next;
+ pfree(target);
+}
+
+
+/* ----------
+ * AfterTriggerExecute()
+ *
+ * Fetch the required tuples back from the heap and fire one
+ * single trigger function.
+ *
+ * Frequently, this will be fired many times in a row for triggers of
+ * a single relation. Therefore, we cache the open relation and provide
+ * fmgr lookup cache space at the caller level. (For triggers fired at
+ * the end of a query, we can even piggyback on the executor's state.)
+ *
+ * When fired for a cross-partition update of a partitioned table, the old
+ * tuple is fetched using 'src_relInfo' (the source leaf partition) and
+ * the new tuple using 'dst_relInfo' (the destination leaf partition), though
+ * both are converted into the root partitioned table's format before passing
+ * to the trigger function.
+ *
+ * event: event currently being fired.
+ * relInfo: result relation for event.
+ * src_relInfo: source partition of a cross-partition update
+ * dst_relInfo: its destination partition
+ * trigdesc: working copy of rel's trigger info.
+ * finfo: array of fmgr lookup cache entries (one per trigger in trigdesc).
+ * instr: array of EXPLAIN ANALYZE instrumentation nodes (one per trigger),
+ * or NULL if no instrumentation is wanted.
+ * per_tuple_context: memory context to call trigger function in.
+ * trig_tuple_slot1: scratch slot for tg_trigtuple (foreign tables only)
+ * trig_tuple_slot2: scratch slot for tg_newtuple (foreign tables only)
+ * ----------
+ */
+static void
+AfterTriggerExecute(EState *estate,
+ AfterTriggerEvent event,
+ ResultRelInfo *relInfo,
+ ResultRelInfo *src_relInfo,
+ ResultRelInfo *dst_relInfo,
+ TriggerDesc *trigdesc,
+ FmgrInfo *finfo, Instrumentation *instr,
+ MemoryContext per_tuple_context,
+ TupleTableSlot *trig_tuple_slot1,
+ TupleTableSlot *trig_tuple_slot2)
+{
+ Relation rel = relInfo->ri_RelationDesc;
+ Relation src_rel = src_relInfo->ri_RelationDesc;
+ Relation dst_rel = dst_relInfo->ri_RelationDesc;
+ AfterTriggerShared evtshared = GetTriggerSharedData(event);
+ Oid tgoid = evtshared->ats_tgoid;
+ TriggerData LocTriggerData = {0};
+ HeapTuple rettuple;
+ int tgindx;
+ bool should_free_trig = false;
+ bool should_free_new = false;
+
+ /*
+ * Locate trigger in trigdesc.
+ */
+ for (tgindx = 0; tgindx < trigdesc->numtriggers; tgindx++)
+ {
+ if (trigdesc->triggers[tgindx].tgoid == tgoid)
+ {
+ LocTriggerData.tg_trigger = &(trigdesc->triggers[tgindx]);
+ break;
+ }
+ }
+ if (LocTriggerData.tg_trigger == NULL)
+ elog(ERROR, "could not find trigger %u", tgoid);
+
+ /*
+ * If doing EXPLAIN ANALYZE, start charging time to this trigger. We want
+ * to include time spent re-fetching tuples in the trigger cost.
+ */
+ if (instr)
+ InstrStartNode(instr + tgindx);
+
+ /*
+ * Fetch the required tuple(s).
+ */
+ switch (event->ate_flags & AFTER_TRIGGER_TUP_BITS)
+ {
+ case AFTER_TRIGGER_FDW_FETCH:
+ {
+ Tuplestorestate *fdw_tuplestore = GetCurrentFDWTuplestore();
+
+ if (!tuplestore_gettupleslot(fdw_tuplestore, true, false,
+ trig_tuple_slot1))
+ elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
+
+ if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) ==
+ TRIGGER_EVENT_UPDATE &&
+ !tuplestore_gettupleslot(fdw_tuplestore, true, false,
+ trig_tuple_slot2))
+ elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
+ }
+ /* fall through */
+ case AFTER_TRIGGER_FDW_REUSE:
+
+ /*
+ * Store tuple in the slot so that tg_trigtuple does not reference
+ * tuplestore memory. (It is formally possible for the trigger
+ * function to queue trigger events that add to the same
+ * tuplestore, which can push other tuples out of memory.) The
+ * distinction is academic, because we start with a minimal tuple
+ * that is stored as a heap tuple, constructed in different memory
+ * context, in the slot anyway.
+ */
+ LocTriggerData.tg_trigslot = trig_tuple_slot1;
+ LocTriggerData.tg_trigtuple =
+ ExecFetchSlotHeapTuple(trig_tuple_slot1, true, &should_free_trig);
+
+ if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) ==
+ TRIGGER_EVENT_UPDATE)
+ {
+ LocTriggerData.tg_newslot = trig_tuple_slot2;
+ LocTriggerData.tg_newtuple =
+ ExecFetchSlotHeapTuple(trig_tuple_slot2, true, &should_free_new);
+ }
+ else
+ {
+ LocTriggerData.tg_newtuple = NULL;
+ }
+ break;
+
+ default:
+ if (ItemPointerIsValid(&(event->ate_ctid1)))
+ {
+ TupleTableSlot *src_slot = ExecGetTriggerOldSlot(estate,
+ src_relInfo);
+
+ if (!table_tuple_fetch_row_version(src_rel,
+ &(event->ate_ctid1),
+ SnapshotAny,
+ src_slot))
+ elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
+
+ /*
+ * Store the tuple fetched from the source partition into the
+ * target (root partitioned) table slot, converting if needed.
+ */
+ if (src_relInfo != relInfo)
+ {
+ TupleConversionMap *map = ExecGetChildToRootMap(src_relInfo);
+
+ LocTriggerData.tg_trigslot = ExecGetTriggerOldSlot(estate, relInfo);
+ if (map)
+ {
+ execute_attr_map_slot(map->attrMap,
+ src_slot,
+ LocTriggerData.tg_trigslot);
+ }
+ else
+ ExecCopySlot(LocTriggerData.tg_trigslot, src_slot);
+ }
+ else
+ LocTriggerData.tg_trigslot = src_slot;
+ LocTriggerData.tg_trigtuple =
+ ExecFetchSlotHeapTuple(LocTriggerData.tg_trigslot, false, &should_free_trig);
+ }
+ else
+ {
+ LocTriggerData.tg_trigtuple = NULL;
+ }
+
+ /* don't touch ctid2 if not there */
+ if (((event->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ||
+ (event->ate_flags & AFTER_TRIGGER_CP_UPDATE)) &&
+ ItemPointerIsValid(&(event->ate_ctid2)))
+ {
+ TupleTableSlot *dst_slot = ExecGetTriggerNewSlot(estate,
+ dst_relInfo);
+
+ if (!table_tuple_fetch_row_version(dst_rel,
+ &(event->ate_ctid2),
+ SnapshotAny,
+ dst_slot))
+ elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
+
+ /*
+ * Store the tuple fetched from the destination partition into
+ * the target (root partitioned) table slot, converting if
+ * needed.
+ */
+ if (dst_relInfo != relInfo)
+ {
+ TupleConversionMap *map = ExecGetChildToRootMap(dst_relInfo);
+
+ LocTriggerData.tg_newslot = ExecGetTriggerNewSlot(estate, relInfo);
+ if (map)
+ {
+ execute_attr_map_slot(map->attrMap,
+ dst_slot,
+ LocTriggerData.tg_newslot);
+ }
+ else
+ ExecCopySlot(LocTriggerData.tg_newslot, dst_slot);
+ }
+ else
+ LocTriggerData.tg_newslot = dst_slot;
+ LocTriggerData.tg_newtuple =
+ ExecFetchSlotHeapTuple(LocTriggerData.tg_newslot, false, &should_free_new);
+ }
+ else
+ {
+ LocTriggerData.tg_newtuple = NULL;
+ }
+ }
+
+ /*
+ * Set up the tuplestore information to let the trigger have access to
+ * transition tables. When we first make a transition table available to
+ * a trigger, mark it "closed" so that it cannot change anymore. If any
+ * additional events of the same type get queued in the current trigger
+ * query level, they'll go into new transition tables.
+ */
+ LocTriggerData.tg_oldtable = LocTriggerData.tg_newtable = NULL;
+ if (evtshared->ats_table)
+ {
+ if (LocTriggerData.tg_trigger->tgoldtable)
+ {
+ if (TRIGGER_FIRED_BY_UPDATE(evtshared->ats_event))
+ LocTriggerData.tg_oldtable = evtshared->ats_table->old_upd_tuplestore;
+ else
+ LocTriggerData.tg_oldtable = evtshared->ats_table->old_del_tuplestore;
+ evtshared->ats_table->closed = true;
+ }
+
+ if (LocTriggerData.tg_trigger->tgnewtable)
+ {
+ if (TRIGGER_FIRED_BY_INSERT(evtshared->ats_event))
+ LocTriggerData.tg_newtable = evtshared->ats_table->new_ins_tuplestore;
+ else
+ LocTriggerData.tg_newtable = evtshared->ats_table->new_upd_tuplestore;
+ evtshared->ats_table->closed = true;
+ }
+ }
+
+ /*
+ * Setup the remaining trigger information
+ */
+ LocTriggerData.type = T_TriggerData;
+ LocTriggerData.tg_event =
+ evtshared->ats_event & (TRIGGER_EVENT_OPMASK | TRIGGER_EVENT_ROW);
+ LocTriggerData.tg_relation = rel;
+ if (TRIGGER_FOR_UPDATE(LocTriggerData.tg_trigger->tgtype))
+ LocTriggerData.tg_updatedcols = evtshared->ats_modifiedcols;
+
+ MemoryContextReset(per_tuple_context);
+
+ /*
+ * Call the trigger and throw away any possibly returned updated tuple.
+ * (Don't let ExecCallTriggerFunc measure EXPLAIN time.)
+ */
+ rettuple = ExecCallTriggerFunc(&LocTriggerData,
+ tgindx,
+ finfo,
+ NULL,
+ per_tuple_context);
+ if (rettuple != NULL &&
+ rettuple != LocTriggerData.tg_trigtuple &&
+ rettuple != LocTriggerData.tg_newtuple)
+ heap_freetuple(rettuple);
+
+ /*
+ * Release resources
+ */
+ if (should_free_trig)
+ heap_freetuple(LocTriggerData.tg_trigtuple);
+ if (should_free_new)
+ heap_freetuple(LocTriggerData.tg_newtuple);
+
+ /* don't clear slots' contents if foreign table */
+ if (trig_tuple_slot1 == NULL)
+ {
+ if (LocTriggerData.tg_trigslot)
+ ExecClearTuple(LocTriggerData.tg_trigslot);
+ if (LocTriggerData.tg_newslot)
+ ExecClearTuple(LocTriggerData.tg_newslot);
+ }
+
+ /*
+ * If doing EXPLAIN ANALYZE, stop charging time to this trigger, and count
+ * one "tuple returned" (really the number of firings).
+ */
+ if (instr)
+ InstrStopNode(instr + tgindx, 1);
+}
+
+
+/*
+ * afterTriggerMarkEvents()
+ *
+ * Scan the given event list for not yet invoked events. Mark the ones
+ * that can be invoked now with the current firing ID.
+ *
+ * If move_list isn't NULL, events that are not to be invoked now are
+ * transferred to move_list.
+ *
+ * When immediate_only is true, do not invoke currently-deferred triggers.
+ * (This will be false only at main transaction exit.)
+ *
+ * Returns true if any invokable events were found.
+ */
+static bool
+afterTriggerMarkEvents(AfterTriggerEventList *events,
+ AfterTriggerEventList *move_list,
+ bool immediate_only)
+{
+ bool found = false;
+ bool deferred_found = false;
+ AfterTriggerEvent event;
+ AfterTriggerEventChunk *chunk;
+
+ for_each_event_chunk(event, chunk, *events)
+ {
+ AfterTriggerShared evtshared = GetTriggerSharedData(event);
+ bool defer_it = false;
+
+ if (!(event->ate_flags &
+ (AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS)))
+ {
+ /*
+ * This trigger hasn't been called or scheduled yet. Check if we
+ * should call it now.
+ */
+ if (immediate_only && afterTriggerCheckState(evtshared))
+ {
+ defer_it = true;
+ }
+ else
+ {
+ /*
+ * Mark it as to be fired in this firing cycle.
+ */
+ evtshared->ats_firing_id = afterTriggers.firing_counter;
+ event->ate_flags |= AFTER_TRIGGER_IN_PROGRESS;
+ found = true;
+ }
+ }
+
+ /*
+ * If it's deferred, move it to move_list, if requested.
+ */
+ if (defer_it && move_list != NULL)
+ {
+ deferred_found = true;
+ /* add it to move_list */
+ afterTriggerAddEvent(move_list, event, evtshared);
+ /* mark original copy "done" so we don't do it again */
+ event->ate_flags |= AFTER_TRIGGER_DONE;
+ }
+ }
+
+ /*
+ * We could allow deferred triggers if, before the end of the
+ * security-restricted operation, we were to verify that a SET CONSTRAINTS
+ * ... IMMEDIATE has fired all such triggers. For now, don't bother.
+ */
+ if (deferred_found && InSecurityRestrictedOperation())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("cannot fire deferred trigger within security-restricted operation")));
+
+ return found;
+}
+
+/*
+ * afterTriggerInvokeEvents()
+ *
+ * Scan the given event list for events that are marked as to be fired
+ * in the current firing cycle, and fire them.
+ *
+ * If estate isn't NULL, we use its result relation info to avoid repeated
+ * openings and closing of trigger target relations. If it is NULL, we
+ * make one locally to cache the info in case there are multiple trigger
+ * events per rel.
+ *
+ * When delete_ok is true, it's safe to delete fully-processed events.
+ * (We are not very tense about that: we simply reset a chunk to be empty
+ * if all its events got fired. The objective here is just to avoid useless
+ * rescanning of events when a trigger queues new events during transaction
+ * end, so it's not necessary to worry much about the case where only
+ * some events are fired.)
+ *
+ * Returns true if no unfired events remain in the list (this allows us
+ * to avoid repeating afterTriggerMarkEvents).
+ */
+static bool
+afterTriggerInvokeEvents(AfterTriggerEventList *events,
+ CommandId firing_id,
+ EState *estate,
+ bool delete_ok)
+{
+ bool all_fired = true;
+ AfterTriggerEventChunk *chunk;
+ MemoryContext per_tuple_context;
+ bool local_estate = false;
+ ResultRelInfo *rInfo = NULL;
+ Relation rel = NULL;
+ TriggerDesc *trigdesc = NULL;
+ FmgrInfo *finfo = NULL;
+ Instrumentation *instr = NULL;
+ TupleTableSlot *slot1 = NULL,
+ *slot2 = NULL;
+
+ /* Make a local EState if need be */
+ if (estate == NULL)
+ {
+ estate = CreateExecutorState();
+ local_estate = true;
+ }
+
+ /* Make a per-tuple memory context for trigger function calls */
+ per_tuple_context =
+ AllocSetContextCreate(CurrentMemoryContext,
+ "AfterTriggerTupleContext",
+ ALLOCSET_DEFAULT_SIZES);
+
+ for_each_chunk(chunk, *events)
+ {
+ AfterTriggerEvent event;
+ bool all_fired_in_chunk = true;
+
+ for_each_event(event, chunk)
+ {
+ AfterTriggerShared evtshared = GetTriggerSharedData(event);
+
+ /*
+ * Is it one for me to fire?
+ */
+ if ((event->ate_flags & AFTER_TRIGGER_IN_PROGRESS) &&
+ evtshared->ats_firing_id == firing_id)
+ {
+ ResultRelInfo *src_rInfo,
+ *dst_rInfo;
+
+ /*
+ * So let's fire it... but first, find the correct relation if
+ * this is not the same relation as before.
+ */
+ if (rel == NULL || RelationGetRelid(rel) != evtshared->ats_relid)
+ {
+ rInfo = ExecGetTriggerResultRel(estate, evtshared->ats_relid,
+ NULL);
+ rel = rInfo->ri_RelationDesc;
+ /* Catch calls with insufficient relcache refcounting */
+ Assert(!RelationHasReferenceCountZero(rel));
+ trigdesc = rInfo->ri_TrigDesc;
+ finfo = rInfo->ri_TrigFunctions;
+ instr = rInfo->ri_TrigInstrument;
+ if (slot1 != NULL)
+ {
+ ExecDropSingleTupleTableSlot(slot1);
+ ExecDropSingleTupleTableSlot(slot2);
+ slot1 = slot2 = NULL;
+ }
+ if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+ {
+ slot1 = MakeSingleTupleTableSlot(rel->rd_att,
+ &TTSOpsMinimalTuple);
+ slot2 = MakeSingleTupleTableSlot(rel->rd_att,
+ &TTSOpsMinimalTuple);
+ }
+ if (trigdesc == NULL) /* should not happen */
+ elog(ERROR, "relation %u has no triggers",
+ evtshared->ats_relid);
+ }
+
+ /*
+ * Look up source and destination partition result rels of a
+ * cross-partition update event.
+ */
+ if ((event->ate_flags & AFTER_TRIGGER_TUP_BITS) ==
+ AFTER_TRIGGER_CP_UPDATE)
+ {
+ Assert(OidIsValid(event->ate_src_part) &&
+ OidIsValid(event->ate_dst_part));
+ src_rInfo = ExecGetTriggerResultRel(estate,
+ event->ate_src_part,
+ rInfo);
+ dst_rInfo = ExecGetTriggerResultRel(estate,
+ event->ate_dst_part,
+ rInfo);
+ }
+ else
+ src_rInfo = dst_rInfo = rInfo;
+
+ /*
+ * Fire it. Note that the AFTER_TRIGGER_IN_PROGRESS flag is
+ * still set, so recursive examinations of the event list
+ * won't try to re-fire it.
+ */
+ AfterTriggerExecute(estate, event, rInfo,
+ src_rInfo, dst_rInfo,
+ trigdesc, finfo, instr,
+ per_tuple_context, slot1, slot2);
+
+ /*
+ * Mark the event as done.
+ */
+ event->ate_flags &= ~AFTER_TRIGGER_IN_PROGRESS;
+ event->ate_flags |= AFTER_TRIGGER_DONE;
+ }
+ else if (!(event->ate_flags & AFTER_TRIGGER_DONE))
+ {
+ /* something remains to be done */
+ all_fired = all_fired_in_chunk = false;
+ }
+ }
+
+ /* Clear the chunk if delete_ok and nothing left of interest */
+ if (delete_ok && all_fired_in_chunk)
+ {
+ chunk->freeptr = CHUNK_DATA_START(chunk);
+ chunk->endfree = chunk->endptr;
+
+ /*
+ * If it's last chunk, must sync event list's tailfree too. Note
+ * that delete_ok must NOT be passed as true if there could be
+ * additional AfterTriggerEventList values pointing at this event
+ * list, since we'd fail to fix their copies of tailfree.
+ */
+ if (chunk == events->tail)
+ events->tailfree = chunk->freeptr;
+ }
+ }
+ if (slot1 != NULL)
+ {
+ ExecDropSingleTupleTableSlot(slot1);
+ ExecDropSingleTupleTableSlot(slot2);
+ }
+
+ /* Release working resources */
+ MemoryContextDelete(per_tuple_context);
+
+ if (local_estate)
+ {
+ ExecCloseResultRelations(estate);
+ ExecResetTupleTable(estate->es_tupleTable, false);
+ FreeExecutorState(estate);
+ }
+
+ return all_fired;
+}
+
+
+/*
+ * GetAfterTriggersTableData
+ *
+ * Find or create an AfterTriggersTableData struct for the specified
+ * trigger event (relation + operation type). Ignore existing structs
+ * marked "closed"; we don't want to put any additional tuples into them,
+ * nor change their stmt-triggers-fired state.
+ *
+ * Note: the AfterTriggersTableData list is allocated in the current
+ * (sub)transaction's CurTransactionContext. This is OK because
+ * we don't need it to live past AfterTriggerEndQuery.
+ */
+static AfterTriggersTableData *
+GetAfterTriggersTableData(Oid relid, CmdType cmdType)
+{
+ AfterTriggersTableData *table;
+ AfterTriggersQueryData *qs;
+ MemoryContext oldcxt;
+ ListCell *lc;
+
+ /* Caller should have ensured query_depth is OK. */
+ Assert(afterTriggers.query_depth >= 0 &&
+ afterTriggers.query_depth < afterTriggers.maxquerydepth);
+ qs = &afterTriggers.query_stack[afterTriggers.query_depth];
+
+ foreach(lc, qs->tables)
+ {
+ table = (AfterTriggersTableData *) lfirst(lc);
+ if (table->relid == relid && table->cmdType == cmdType &&
+ !table->closed)
+ return table;
+ }
+
+ oldcxt = MemoryContextSwitchTo(CurTransactionContext);
+
+ table = (AfterTriggersTableData *) palloc0(sizeof(AfterTriggersTableData));
+ table->relid = relid;
+ table->cmdType = cmdType;
+ qs->tables = lappend(qs->tables, table);
+
+ MemoryContextSwitchTo(oldcxt);
+
+ return table;
+}
+
+/*
+ * Returns a TupleTableSlot suitable for holding the tuples to be put
+ * into AfterTriggersTableData's transition table tuplestores.
+ */
+static TupleTableSlot *
+GetAfterTriggersStoreSlot(AfterTriggersTableData *table,
+ TupleDesc tupdesc)
+{
+ /* Create it if not already done. */
+ if (!table->storeslot)
+ {
+ MemoryContext oldcxt;
+
+ /*
+ * We need this slot only until AfterTriggerEndQuery, but making it
+ * last till end-of-subxact is good enough. It'll be freed by
+ * AfterTriggerFreeQuery(). However, the passed-in tupdesc might have
+ * a different lifespan, so we'd better make a copy of that.
+ */
+ oldcxt = MemoryContextSwitchTo(CurTransactionContext);
+ tupdesc = CreateTupleDescCopy(tupdesc);
+ table->storeslot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual);
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ return table->storeslot;
+}
+
+/*
+ * MakeTransitionCaptureState
+ *
+ * Make a TransitionCaptureState object for the given TriggerDesc, target
+ * relation, and operation type. The TCS object holds all the state needed
+ * to decide whether to capture tuples in transition tables.
+ *
+ * If there are no triggers in 'trigdesc' that request relevant transition
+ * tables, then return NULL.
+ *
+ * The resulting object can be passed to the ExecAR* functions. When
+ * dealing with child tables, the caller can set tcs_original_insert_tuple
+ * to avoid having to reconstruct the original tuple in the root table's
+ * format.
+ *
+ * Note that we copy the flags from a parent table into this struct (rather
+ * than subsequently using the relation's TriggerDesc directly) so that we can
+ * use it to control collection of transition tuples from child tables.
+ *
+ * Per SQL spec, all operations of the same kind (INSERT/UPDATE/DELETE)
+ * on the same table during one query should share one transition table.
+ * Therefore, the Tuplestores are owned by an AfterTriggersTableData struct
+ * looked up using the table OID + CmdType, and are merely referenced by
+ * the TransitionCaptureState objects we hand out to callers.
+ */
+TransitionCaptureState *
+MakeTransitionCaptureState(TriggerDesc *trigdesc, Oid relid, CmdType cmdType)
+{
+ TransitionCaptureState *state;
+ bool need_old_upd,
+ need_new_upd,
+ need_old_del,
+ need_new_ins;
+ AfterTriggersTableData *table;
+ MemoryContext oldcxt;
+ ResourceOwner saveResourceOwner;
+
+ if (trigdesc == NULL)
+ return NULL;
+
+ /* Detect which table(s) we need. */
+ switch (cmdType)
+ {
+ case CMD_INSERT:
+ need_old_upd = need_old_del = need_new_upd = false;
+ need_new_ins = trigdesc->trig_insert_new_table;
+ break;
+ case CMD_UPDATE:
+ need_old_upd = trigdesc->trig_update_old_table;
+ need_new_upd = trigdesc->trig_update_new_table;
+ need_old_del = need_new_ins = false;
+ break;
+ case CMD_DELETE:
+ need_old_del = trigdesc->trig_delete_old_table;
+ need_old_upd = need_new_upd = need_new_ins = false;
+ break;
+ case CMD_MERGE:
+ need_old_upd = trigdesc->trig_update_old_table;
+ need_new_upd = trigdesc->trig_update_new_table;
+ need_old_del = trigdesc->trig_delete_old_table;
+ need_new_ins = trigdesc->trig_insert_new_table;
+ break;
+ default:
+ elog(ERROR, "unexpected CmdType: %d", (int) cmdType);
+ /* keep compiler quiet */
+ need_old_upd = need_new_upd = need_old_del = need_new_ins = false;
+ break;
+ }
+ if (!need_old_upd && !need_new_upd && !need_new_ins && !need_old_del)
+ return NULL;
+
+ /* Check state, like AfterTriggerSaveEvent. */
+ if (afterTriggers.query_depth < 0)
+ elog(ERROR, "MakeTransitionCaptureState() called outside of query");
+
+ /* Be sure we have enough space to record events at this query depth. */
+ if (afterTriggers.query_depth >= afterTriggers.maxquerydepth)
+ AfterTriggerEnlargeQueryState();
+
+ /*
+ * Find or create an AfterTriggersTableData struct to hold the
+ * tuplestore(s). If there's a matching struct but it's marked closed,
+ * ignore it; we need a newer one.
+ *
+ * Note: the AfterTriggersTableData list, as well as the tuplestores, are
+ * allocated in the current (sub)transaction's CurTransactionContext, and
+ * the tuplestores are managed by the (sub)transaction's resource owner.
+ * This is sufficient lifespan because we do not allow triggers using
+ * transition tables to be deferrable; they will be fired during
+ * AfterTriggerEndQuery, after which it's okay to delete the data.
+ */
+ table = GetAfterTriggersTableData(relid, cmdType);
+
+ /* Now create required tuplestore(s), if we don't have them already. */
+ oldcxt = MemoryContextSwitchTo(CurTransactionContext);
+ saveResourceOwner = CurrentResourceOwner;
+ CurrentResourceOwner = CurTransactionResourceOwner;
+
+ if (need_old_upd && table->old_upd_tuplestore == NULL)
+ table->old_upd_tuplestore = tuplestore_begin_heap(false, false, work_mem);
+ if (need_new_upd && table->new_upd_tuplestore == NULL)
+ table->new_upd_tuplestore = tuplestore_begin_heap(false, false, work_mem);
+ if (need_old_del && table->old_del_tuplestore == NULL)
+ table->old_del_tuplestore = tuplestore_begin_heap(false, false, work_mem);
+ if (need_new_ins && table->new_ins_tuplestore == NULL)
+ table->new_ins_tuplestore = tuplestore_begin_heap(false, false, work_mem);
+
+ CurrentResourceOwner = saveResourceOwner;
+ MemoryContextSwitchTo(oldcxt);
+
+ /* Now build the TransitionCaptureState struct, in caller's context */
+ state = (TransitionCaptureState *) palloc0(sizeof(TransitionCaptureState));
+ state->tcs_delete_old_table = trigdesc->trig_delete_old_table;
+ state->tcs_update_old_table = trigdesc->trig_update_old_table;
+ state->tcs_update_new_table = trigdesc->trig_update_new_table;
+ state->tcs_insert_new_table = trigdesc->trig_insert_new_table;
+ state->tcs_private = table;
+
+ return state;
+}
+
+
+/* ----------
+ * AfterTriggerBeginXact()
+ *
+ * Called at transaction start (either BEGIN or implicit for single
+ * statement outside of transaction block).
+ * ----------
+ */
+void
+AfterTriggerBeginXact(void)
+{
+ /*
+ * Initialize after-trigger state structure to empty
+ */
+ afterTriggers.firing_counter = (CommandId) 1; /* mustn't be 0 */
+ afterTriggers.query_depth = -1;
+
+ /*
+ * Verify that there is no leftover state remaining. If these assertions
+ * trip, it means that AfterTriggerEndXact wasn't called or didn't clean
+ * up properly.
+ */
+ Assert(afterTriggers.state == NULL);
+ Assert(afterTriggers.query_stack == NULL);
+ Assert(afterTriggers.maxquerydepth == 0);
+ Assert(afterTriggers.event_cxt == NULL);
+ Assert(afterTriggers.events.head == NULL);
+ Assert(afterTriggers.trans_stack == NULL);
+ Assert(afterTriggers.maxtransdepth == 0);
+}
+
+
+/* ----------
+ * AfterTriggerBeginQuery()
+ *
+ * Called just before we start processing a single query within a
+ * transaction (or subtransaction). Most of the real work gets deferred
+ * until somebody actually tries to queue a trigger event.
+ * ----------
+ */
+void
+AfterTriggerBeginQuery(void)
+{
+ /* Increase the query stack depth */
+ afterTriggers.query_depth++;
+}
+
+
+/* ----------
+ * AfterTriggerEndQuery()
+ *
+ * Called after one query has been completely processed. At this time
+ * we invoke all AFTER IMMEDIATE trigger events queued by the query, and
+ * transfer deferred trigger events to the global deferred-trigger list.
+ *
+ * Note that this must be called BEFORE closing down the executor
+ * with ExecutorEnd, because we make use of the EState's info about
+ * target relations. Normally it is called from ExecutorFinish.
+ * ----------
+ */
+void
+AfterTriggerEndQuery(EState *estate)
+{
+ AfterTriggersQueryData *qs;
+
+ /* Must be inside a query, too */
+ Assert(afterTriggers.query_depth >= 0);
+
+ /*
+ * If we never even got as far as initializing the event stack, there
+ * certainly won't be any events, so exit quickly.
+ */
+ if (afterTriggers.query_depth >= afterTriggers.maxquerydepth)
+ {
+ afterTriggers.query_depth--;
+ return;
+ }
+
+ /*
+ * Process all immediate-mode triggers queued by the query, and move the
+ * deferred ones to the main list of deferred events.
+ *
+ * Notice that we decide which ones will be fired, and put the deferred
+ * ones on the main list, before anything is actually fired. This ensures
+ * reasonably sane behavior if a trigger function does SET CONSTRAINTS ...
+ * IMMEDIATE: all events we have decided to defer will be available for it
+ * to fire.
+ *
+ * We loop in case a trigger queues more events at the same query level.
+ * Ordinary trigger functions, including all PL/pgSQL trigger functions,
+ * will instead fire any triggers in a dedicated query level. Foreign key
+ * enforcement triggers do add to the current query level, thanks to their
+ * passing fire_triggers = false to SPI_execute_snapshot(). Other
+ * C-language triggers might do likewise.
+ *
+ * If we find no firable events, we don't have to increment
+ * firing_counter.
+ */
+ qs = &afterTriggers.query_stack[afterTriggers.query_depth];
+
+ for (;;)
+ {
+ if (afterTriggerMarkEvents(&qs->events, &afterTriggers.events, true))
+ {
+ CommandId firing_id = afterTriggers.firing_counter++;
+ AfterTriggerEventChunk *oldtail = qs->events.tail;
+
+ if (afterTriggerInvokeEvents(&qs->events, firing_id, estate, false))
+ break; /* all fired */
+
+ /*
+ * Firing a trigger could result in query_stack being repalloc'd,
+ * so we must recalculate qs after each afterTriggerInvokeEvents
+ * call. Furthermore, it's unsafe to pass delete_ok = true here,
+ * because that could cause afterTriggerInvokeEvents to try to
+ * access qs->events after the stack has been repalloc'd.
+ */
+ qs = &afterTriggers.query_stack[afterTriggers.query_depth];
+
+ /*
+ * We'll need to scan the events list again. To reduce the cost
+ * of doing so, get rid of completely-fired chunks. We know that
+ * all events were marked IN_PROGRESS or DONE at the conclusion of
+ * afterTriggerMarkEvents, so any still-interesting events must
+ * have been added after that, and so must be in the chunk that
+ * was then the tail chunk, or in later chunks. So, zap all
+ * chunks before oldtail. This is approximately the same set of
+ * events we would have gotten rid of by passing delete_ok = true.
+ */
+ Assert(oldtail != NULL);
+ while (qs->events.head != oldtail)
+ afterTriggerDeleteHeadEventChunk(qs);
+ }
+ else
+ break;
+ }
+
+ /* Release query-level-local storage, including tuplestores if any */
+ AfterTriggerFreeQuery(&afterTriggers.query_stack[afterTriggers.query_depth]);
+
+ afterTriggers.query_depth--;
+}
+
+
+/*
+ * AfterTriggerFreeQuery
+ * Release subsidiary storage for a trigger query level.
+ * This includes closing down tuplestores.
+ * Note: it's important for this to be safe if interrupted by an error
+ * and then called again for the same query level.
+ */
+static void
+AfterTriggerFreeQuery(AfterTriggersQueryData *qs)
+{
+ Tuplestorestate *ts;
+ List *tables;
+ ListCell *lc;
+
+ /* Drop the trigger events */
+ afterTriggerFreeEventList(&qs->events);
+
+ /* Drop FDW tuplestore if any */
+ ts = qs->fdw_tuplestore;
+ qs->fdw_tuplestore = NULL;
+ if (ts)
+ tuplestore_end(ts);
+
+ /* Release per-table subsidiary storage */
+ tables = qs->tables;
+ foreach(lc, tables)
+ {
+ AfterTriggersTableData *table = (AfterTriggersTableData *) lfirst(lc);
+
+ ts = table->old_upd_tuplestore;
+ table->old_upd_tuplestore = NULL;
+ if (ts)
+ tuplestore_end(ts);
+ ts = table->new_upd_tuplestore;
+ table->new_upd_tuplestore = NULL;
+ if (ts)
+ tuplestore_end(ts);
+ ts = table->old_del_tuplestore;
+ table->old_del_tuplestore = NULL;
+ if (ts)
+ tuplestore_end(ts);
+ ts = table->new_ins_tuplestore;
+ table->new_ins_tuplestore = NULL;
+ if (ts)
+ tuplestore_end(ts);
+ if (table->storeslot)
+ {
+ TupleTableSlot *slot = table->storeslot;
+
+ table->storeslot = NULL;
+ ExecDropSingleTupleTableSlot(slot);
+ }
+ }
+
+ /*
+ * Now free the AfterTriggersTableData structs and list cells. Reset list
+ * pointer first; if list_free_deep somehow gets an error, better to leak
+ * that storage than have an infinite loop.
+ */
+ qs->tables = NIL;
+ list_free_deep(tables);
+}
+
+
+/* ----------
+ * AfterTriggerFireDeferred()
+ *
+ * Called just before the current transaction is committed. At this
+ * time we invoke all pending DEFERRED triggers.
+ *
+ * It is possible for other modules to queue additional deferred triggers
+ * during pre-commit processing; therefore xact.c may have to call this
+ * multiple times.
+ * ----------
+ */
+void
+AfterTriggerFireDeferred(void)
+{
+ AfterTriggerEventList *events;
+ bool snap_pushed = false;
+
+ /* Must not be inside a query */
+ Assert(afterTriggers.query_depth == -1);
+
+ /*
+ * If there are any triggers to fire, make sure we have set a snapshot for
+ * them to use. (Since PortalRunUtility doesn't set a snap for COMMIT, we
+ * can't assume ActiveSnapshot is valid on entry.)
+ */
+ events = &afterTriggers.events;
+ if (events->head != NULL)
+ {
+ PushActiveSnapshot(GetTransactionSnapshot());
+ snap_pushed = true;
+ }
+
+ /*
+ * Run all the remaining triggers. Loop until they are all gone, in case
+ * some trigger queues more for us to do.
+ */
+ while (afterTriggerMarkEvents(events, NULL, false))
+ {
+ CommandId firing_id = afterTriggers.firing_counter++;
+
+ if (afterTriggerInvokeEvents(events, firing_id, NULL, true))
+ break; /* all fired */
+ }
+
+ /*
+ * We don't bother freeing the event list, since it will go away anyway
+ * (and more efficiently than via pfree) in AfterTriggerEndXact.
+ */
+
+ if (snap_pushed)
+ PopActiveSnapshot();
+}
+
+
+/* ----------
+ * AfterTriggerEndXact()
+ *
+ * The current transaction is finishing.
+ *
+ * Any unfired triggers are canceled so we simply throw
+ * away anything we know.
+ *
+ * Note: it is possible for this to be called repeatedly in case of
+ * error during transaction abort; therefore, do not complain if
+ * already closed down.
+ * ----------
+ */
+void
+AfterTriggerEndXact(bool isCommit)
+{
+ /*
+ * Forget the pending-events list.
+ *
+ * Since all the info is in TopTransactionContext or children thereof, we
+ * don't really need to do anything to reclaim memory. However, the
+ * pending-events list could be large, and so it's useful to discard it as
+ * soon as possible --- especially if we are aborting because we ran out
+ * of memory for the list!
+ */
+ if (afterTriggers.event_cxt)
+ {
+ MemoryContextDelete(afterTriggers.event_cxt);
+ afterTriggers.event_cxt = NULL;
+ afterTriggers.events.head = NULL;
+ afterTriggers.events.tail = NULL;
+ afterTriggers.events.tailfree = NULL;
+ }
+
+ /*
+ * Forget any subtransaction state as well. Since this can't be very
+ * large, we let the eventual reset of TopTransactionContext free the
+ * memory instead of doing it here.
+ */
+ afterTriggers.trans_stack = NULL;
+ afterTriggers.maxtransdepth = 0;
+
+
+ /*
+ * Forget the query stack and constraint-related state information. As
+ * with the subtransaction state information, we don't bother freeing the
+ * memory here.
+ */
+ afterTriggers.query_stack = NULL;
+ afterTriggers.maxquerydepth = 0;
+ afterTriggers.state = NULL;
+
+ /* No more afterTriggers manipulation until next transaction starts. */
+ afterTriggers.query_depth = -1;
+}
+
+/*
+ * AfterTriggerBeginSubXact()
+ *
+ * Start a subtransaction.
+ */
+void
+AfterTriggerBeginSubXact(void)
+{
+ int my_level = GetCurrentTransactionNestLevel();
+
+ /*
+ * Allocate more space in the trans_stack if needed. (Note: because the
+ * minimum nest level of a subtransaction is 2, we waste the first couple
+ * entries of the array; not worth the notational effort to avoid it.)
+ */
+ while (my_level >= afterTriggers.maxtransdepth)
+ {
+ if (afterTriggers.maxtransdepth == 0)
+ {
+ /* Arbitrarily initialize for max of 8 subtransaction levels */
+ afterTriggers.trans_stack = (AfterTriggersTransData *)
+ MemoryContextAlloc(TopTransactionContext,
+ 8 * sizeof(AfterTriggersTransData));
+ afterTriggers.maxtransdepth = 8;
+ }
+ else
+ {
+ /* repalloc will keep the stack in the same context */
+ int new_alloc = afterTriggers.maxtransdepth * 2;
+
+ afterTriggers.trans_stack = (AfterTriggersTransData *)
+ repalloc(afterTriggers.trans_stack,
+ new_alloc * sizeof(AfterTriggersTransData));
+ afterTriggers.maxtransdepth = new_alloc;
+ }
+ }
+
+ /*
+ * Push the current information into the stack. The SET CONSTRAINTS state
+ * is not saved until/unless changed. Likewise, we don't make a
+ * per-subtransaction event context until needed.
+ */
+ afterTriggers.trans_stack[my_level].state = NULL;
+ afterTriggers.trans_stack[my_level].events = afterTriggers.events;
+ afterTriggers.trans_stack[my_level].query_depth = afterTriggers.query_depth;
+ afterTriggers.trans_stack[my_level].firing_counter = afterTriggers.firing_counter;
+}
+
+/*
+ * AfterTriggerEndSubXact()
+ *
+ * The current subtransaction is ending.
+ */
+void
+AfterTriggerEndSubXact(bool isCommit)
+{
+ int my_level = GetCurrentTransactionNestLevel();
+ SetConstraintState state;
+ AfterTriggerEvent event;
+ AfterTriggerEventChunk *chunk;
+ CommandId subxact_firing_id;
+
+ /*
+ * Pop the prior state if needed.
+ */
+ if (isCommit)
+ {
+ Assert(my_level < afterTriggers.maxtransdepth);
+ /* If we saved a prior state, we don't need it anymore */
+ state = afterTriggers.trans_stack[my_level].state;
+ if (state != NULL)
+ pfree(state);
+ /* this avoids double pfree if error later: */
+ afterTriggers.trans_stack[my_level].state = NULL;
+ Assert(afterTriggers.query_depth ==
+ afterTriggers.trans_stack[my_level].query_depth);
+ }
+ else
+ {
+ /*
+ * Aborting. It is possible subxact start failed before calling
+ * AfterTriggerBeginSubXact, in which case we mustn't risk touching
+ * trans_stack levels that aren't there.
+ */
+ if (my_level >= afterTriggers.maxtransdepth)
+ return;
+
+ /*
+ * Release query-level storage for queries being aborted, and restore
+ * query_depth to its pre-subxact value. This assumes that a
+ * subtransaction will not add events to query levels started in a
+ * earlier transaction state.
+ */
+ while (afterTriggers.query_depth > afterTriggers.trans_stack[my_level].query_depth)
+ {
+ if (afterTriggers.query_depth < afterTriggers.maxquerydepth)
+ AfterTriggerFreeQuery(&afterTriggers.query_stack[afterTriggers.query_depth]);
+ afterTriggers.query_depth--;
+ }
+ Assert(afterTriggers.query_depth ==
+ afterTriggers.trans_stack[my_level].query_depth);
+
+ /*
+ * Restore the global deferred-event list to its former length,
+ * discarding any events queued by the subxact.
+ */
+ afterTriggerRestoreEventList(&afterTriggers.events,
+ &afterTriggers.trans_stack[my_level].events);
+
+ /*
+ * Restore the trigger state. If the saved state is NULL, then this
+ * subxact didn't save it, so it doesn't need restoring.
+ */
+ state = afterTriggers.trans_stack[my_level].state;
+ if (state != NULL)
+ {
+ pfree(afterTriggers.state);
+ afterTriggers.state = state;
+ }
+ /* this avoids double pfree if error later: */
+ afterTriggers.trans_stack[my_level].state = NULL;
+
+ /*
+ * Scan for any remaining deferred events that were marked DONE or IN
+ * PROGRESS by this subxact or a child, and un-mark them. We can
+ * recognize such events because they have a firing ID greater than or
+ * equal to the firing_counter value we saved at subtransaction start.
+ * (This essentially assumes that the current subxact includes all
+ * subxacts started after it.)
+ */
+ subxact_firing_id = afterTriggers.trans_stack[my_level].firing_counter;
+ for_each_event_chunk(event, chunk, afterTriggers.events)
+ {
+ AfterTriggerShared evtshared = GetTriggerSharedData(event);
+
+ if (event->ate_flags &
+ (AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS))
+ {
+ if (evtshared->ats_firing_id >= subxact_firing_id)
+ event->ate_flags &=
+ ~(AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS);
+ }
+ }
+ }
+}
+
+/*
+ * Get the transition table for the given event and depending on whether we are
+ * processing the old or the new tuple.
+ */
+static Tuplestorestate *
+GetAfterTriggersTransitionTable(int event,
+ TupleTableSlot *oldslot,
+ TupleTableSlot *newslot,
+ TransitionCaptureState *transition_capture)
+{
+ Tuplestorestate *tuplestore = NULL;
+ bool delete_old_table = transition_capture->tcs_delete_old_table;
+ bool update_old_table = transition_capture->tcs_update_old_table;
+ bool update_new_table = transition_capture->tcs_update_new_table;
+ bool insert_new_table = transition_capture->tcs_insert_new_table;
+
+ /*
+ * For INSERT events NEW should be non-NULL, for DELETE events OLD should
+ * be non-NULL, whereas for UPDATE events normally both OLD and NEW are
+ * non-NULL. But for UPDATE events fired for capturing transition tuples
+ * during UPDATE partition-key row movement, OLD is NULL when the event is
+ * for a row being inserted, whereas NEW is NULL when the event is for a
+ * row being deleted.
+ */
+ Assert(!(event == TRIGGER_EVENT_DELETE && delete_old_table &&
+ TupIsNull(oldslot)));
+ Assert(!(event == TRIGGER_EVENT_INSERT && insert_new_table &&
+ TupIsNull(newslot)));
+
+ if (!TupIsNull(oldslot))
+ {
+ Assert(TupIsNull(newslot));
+ if (event == TRIGGER_EVENT_DELETE && delete_old_table)
+ tuplestore = transition_capture->tcs_private->old_del_tuplestore;
+ else if (event == TRIGGER_EVENT_UPDATE && update_old_table)
+ tuplestore = transition_capture->tcs_private->old_upd_tuplestore;
+ }
+ else if (!TupIsNull(newslot))
+ {
+ Assert(TupIsNull(oldslot));
+ if (event == TRIGGER_EVENT_INSERT && insert_new_table)
+ tuplestore = transition_capture->tcs_private->new_ins_tuplestore;
+ else if (event == TRIGGER_EVENT_UPDATE && update_new_table)
+ tuplestore = transition_capture->tcs_private->new_upd_tuplestore;
+ }
+
+ return tuplestore;
+}
+
+/*
+ * Add the given heap tuple to the given tuplestore, applying the conversion
+ * map if necessary.
+ *
+ * If original_insert_tuple is given, we can add that tuple without conversion.
+ */
+static void
+TransitionTableAddTuple(EState *estate,
+ TransitionCaptureState *transition_capture,
+ ResultRelInfo *relinfo,
+ TupleTableSlot *slot,
+ TupleTableSlot *original_insert_tuple,
+ Tuplestorestate *tuplestore)
+{
+ TupleConversionMap *map;
+
+ /*
+ * Nothing needs to be done if we don't have a tuplestore.
+ */
+ if (tuplestore == NULL)
+ return;
+
+ if (original_insert_tuple)
+ tuplestore_puttupleslot(tuplestore, original_insert_tuple);
+ else if ((map = ExecGetChildToRootMap(relinfo)) != NULL)
+ {
+ AfterTriggersTableData *table = transition_capture->tcs_private;
+ TupleTableSlot *storeslot;
+
+ storeslot = GetAfterTriggersStoreSlot(table, map->outdesc);
+ execute_attr_map_slot(map->attrMap, slot, storeslot);
+ tuplestore_puttupleslot(tuplestore, storeslot);
+ }
+ else
+ tuplestore_puttupleslot(tuplestore, slot);
+}
+
+/* ----------
+ * AfterTriggerEnlargeQueryState()
+ *
+ * Prepare the necessary state so that we can record AFTER trigger events
+ * queued by a query. It is allowed to have nested queries within a
+ * (sub)transaction, so we need to have separate state for each query
+ * nesting level.
+ * ----------
+ */
+static void
+AfterTriggerEnlargeQueryState(void)
+{
+ int init_depth = afterTriggers.maxquerydepth;
+
+ Assert(afterTriggers.query_depth >= afterTriggers.maxquerydepth);
+
+ if (afterTriggers.maxquerydepth == 0)
+ {
+ int new_alloc = Max(afterTriggers.query_depth + 1, 8);
+
+ afterTriggers.query_stack = (AfterTriggersQueryData *)
+ MemoryContextAlloc(TopTransactionContext,
+ new_alloc * sizeof(AfterTriggersQueryData));
+ afterTriggers.maxquerydepth = new_alloc;
+ }
+ else
+ {
+ /* repalloc will keep the stack in the same context */
+ int old_alloc = afterTriggers.maxquerydepth;
+ int new_alloc = Max(afterTriggers.query_depth + 1,
+ old_alloc * 2);
+
+ afterTriggers.query_stack = (AfterTriggersQueryData *)
+ repalloc(afterTriggers.query_stack,
+ new_alloc * sizeof(AfterTriggersQueryData));
+ afterTriggers.maxquerydepth = new_alloc;
+ }
+
+ /* Initialize new array entries to empty */
+ while (init_depth < afterTriggers.maxquerydepth)
+ {
+ AfterTriggersQueryData *qs = &afterTriggers.query_stack[init_depth];
+
+ qs->events.head = NULL;
+ qs->events.tail = NULL;
+ qs->events.tailfree = NULL;
+ qs->fdw_tuplestore = NULL;
+ qs->tables = NIL;
+
+ ++init_depth;
+ }
+}
+
+/*
+ * Create an empty SetConstraintState with room for numalloc trigstates
+ */
+static SetConstraintState
+SetConstraintStateCreate(int numalloc)
+{
+ SetConstraintState state;
+
+ /* Behave sanely with numalloc == 0 */
+ if (numalloc <= 0)
+ numalloc = 1;
+
+ /*
+ * We assume that zeroing will correctly initialize the state values.
+ */
+ state = (SetConstraintState)
+ MemoryContextAllocZero(TopTransactionContext,
+ offsetof(SetConstraintStateData, trigstates) +
+ numalloc * sizeof(SetConstraintTriggerData));
+
+ state->numalloc = numalloc;
+
+ return state;
+}
+
+/*
+ * Copy a SetConstraintState
+ */
+static SetConstraintState
+SetConstraintStateCopy(SetConstraintState origstate)
+{
+ SetConstraintState state;
+
+ state = SetConstraintStateCreate(origstate->numstates);
+
+ state->all_isset = origstate->all_isset;
+ state->all_isdeferred = origstate->all_isdeferred;
+ state->numstates = origstate->numstates;
+ memcpy(state->trigstates, origstate->trigstates,
+ origstate->numstates * sizeof(SetConstraintTriggerData));
+
+ return state;
+}
+
+/*
+ * Add a per-trigger item to a SetConstraintState. Returns possibly-changed
+ * pointer to the state object (it will change if we have to repalloc).
+ */
+static SetConstraintState
+SetConstraintStateAddItem(SetConstraintState state,
+ Oid tgoid, bool tgisdeferred)
+{
+ if (state->numstates >= state->numalloc)
+ {
+ int newalloc = state->numalloc * 2;
+
+ newalloc = Max(newalloc, 8); /* in case original has size 0 */
+ state = (SetConstraintState)
+ repalloc(state,
+ offsetof(SetConstraintStateData, trigstates) +
+ newalloc * sizeof(SetConstraintTriggerData));
+ state->numalloc = newalloc;
+ Assert(state->numstates < state->numalloc);
+ }
+
+ state->trigstates[state->numstates].sct_tgoid = tgoid;
+ state->trigstates[state->numstates].sct_tgisdeferred = tgisdeferred;
+ state->numstates++;
+
+ return state;
+}
+
+/* ----------
+ * AfterTriggerSetState()
+ *
+ * Execute the SET CONSTRAINTS ... utility command.
+ * ----------
+ */
+void
+AfterTriggerSetState(ConstraintsSetStmt *stmt)
+{
+ int my_level = GetCurrentTransactionNestLevel();
+
+ /* If we haven't already done so, initialize our state. */
+ if (afterTriggers.state == NULL)
+ afterTriggers.state = SetConstraintStateCreate(8);
+
+ /*
+ * If in a subtransaction, and we didn't save the current state already,
+ * save it so it can be restored if the subtransaction aborts.
+ */
+ if (my_level > 1 &&
+ afterTriggers.trans_stack[my_level].state == NULL)
+ {
+ afterTriggers.trans_stack[my_level].state =
+ SetConstraintStateCopy(afterTriggers.state);
+ }
+
+ /*
+ * Handle SET CONSTRAINTS ALL ...
+ */
+ if (stmt->constraints == NIL)
+ {
+ /*
+ * Forget any previous SET CONSTRAINTS commands in this transaction.
+ */
+ afterTriggers.state->numstates = 0;
+
+ /*
+ * Set the per-transaction ALL state to known.
+ */
+ afterTriggers.state->all_isset = true;
+ afterTriggers.state->all_isdeferred = stmt->deferred;
+ }
+ else
+ {
+ Relation conrel;
+ Relation tgrel;
+ List *conoidlist = NIL;
+ List *tgoidlist = NIL;
+ ListCell *lc;
+
+ /*
+ * Handle SET CONSTRAINTS constraint-name [, ...]
+ *
+ * First, identify all the named constraints and make a list of their
+ * OIDs. Since, unlike the SQL spec, we allow multiple constraints of
+ * the same name within a schema, the specifications are not
+ * necessarily unique. Our strategy is to target all matching
+ * constraints within the first search-path schema that has any
+ * matches, but disregard matches in schemas beyond the first match.
+ * (This is a bit odd but it's the historical behavior.)
+ *
+ * A constraint in a partitioned table may have corresponding
+ * constraints in the partitions. Grab those too.
+ */
+ conrel = table_open(ConstraintRelationId, AccessShareLock);
+
+ foreach(lc, stmt->constraints)
+ {
+ RangeVar *constraint = lfirst(lc);
+ bool found;
+ List *namespacelist;
+ ListCell *nslc;
+
+ if (constraint->catalogname)
+ {
+ if (strcmp(constraint->catalogname, get_database_name(MyDatabaseId)) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cross-database references are not implemented: \"%s.%s.%s\"",
+ constraint->catalogname, constraint->schemaname,
+ constraint->relname)));
+ }
+
+ /*
+ * If we're given the schema name with the constraint, look only
+ * in that schema. If given a bare constraint name, use the
+ * search path to find the first matching constraint.
+ */
+ if (constraint->schemaname)
+ {
+ Oid namespaceId = LookupExplicitNamespace(constraint->schemaname,
+ false);
+
+ namespacelist = list_make1_oid(namespaceId);
+ }
+ else
+ {
+ namespacelist = fetch_search_path(true);
+ }
+
+ found = false;
+ foreach(nslc, namespacelist)
+ {
+ Oid namespaceId = lfirst_oid(nslc);
+ SysScanDesc conscan;
+ ScanKeyData skey[2];
+ HeapTuple tup;
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_constraint_conname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(constraint->relname));
+ ScanKeyInit(&skey[1],
+ Anum_pg_constraint_connamespace,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(namespaceId));
+
+ conscan = systable_beginscan(conrel, ConstraintNameNspIndexId,
+ true, NULL, 2, skey);
+
+ while (HeapTupleIsValid(tup = systable_getnext(conscan)))
+ {
+ Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tup);
+
+ if (con->condeferrable)
+ conoidlist = lappend_oid(conoidlist, con->oid);
+ else if (stmt->deferred)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("constraint \"%s\" is not deferrable",
+ constraint->relname)));
+ found = true;
+ }
+
+ systable_endscan(conscan);
+
+ /*
+ * Once we've found a matching constraint we do not search
+ * later parts of the search path.
+ */
+ if (found)
+ break;
+ }
+
+ list_free(namespacelist);
+
+ /*
+ * Not found ?
+ */
+ if (!found)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("constraint \"%s\" does not exist",
+ constraint->relname)));
+ }
+
+ /*
+ * Scan for any possible descendants of the constraints. We append
+ * whatever we find to the same list that we're scanning; this has the
+ * effect that we create new scans for those, too, so if there are
+ * further descendents, we'll also catch them.
+ */
+ foreach(lc, conoidlist)
+ {
+ Oid parent = lfirst_oid(lc);
+ ScanKeyData key;
+ SysScanDesc scan;
+ HeapTuple tuple;
+
+ ScanKeyInit(&key,
+ Anum_pg_constraint_conparentid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(parent));
+
+ scan = systable_beginscan(conrel, ConstraintParentIndexId, true, NULL, 1, &key);
+
+ while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+ {
+ Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple);
+
+ conoidlist = lappend_oid(conoidlist, con->oid);
+ }
+
+ systable_endscan(scan);
+ }
+
+ table_close(conrel, AccessShareLock);
+
+ /*
+ * Now, locate the trigger(s) implementing each of these constraints,
+ * and make a list of their OIDs.
+ */
+ tgrel = table_open(TriggerRelationId, AccessShareLock);
+
+ foreach(lc, conoidlist)
+ {
+ Oid conoid = lfirst_oid(lc);
+ ScanKeyData skey;
+ SysScanDesc tgscan;
+ HeapTuple htup;
+
+ ScanKeyInit(&skey,
+ Anum_pg_trigger_tgconstraint,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(conoid));
+
+ tgscan = systable_beginscan(tgrel, TriggerConstraintIndexId, true,
+ NULL, 1, &skey);
+
+ while (HeapTupleIsValid(htup = systable_getnext(tgscan)))
+ {
+ Form_pg_trigger pg_trigger = (Form_pg_trigger) GETSTRUCT(htup);
+
+ /*
+ * Silently skip triggers that are marked as non-deferrable in
+ * pg_trigger. This is not an error condition, since a
+ * deferrable RI constraint may have some non-deferrable
+ * actions.
+ */
+ if (pg_trigger->tgdeferrable)
+ tgoidlist = lappend_oid(tgoidlist, pg_trigger->oid);
+ }
+
+ systable_endscan(tgscan);
+ }
+
+ table_close(tgrel, AccessShareLock);
+
+ /*
+ * Now we can set the trigger states of individual triggers for this
+ * xact.
+ */
+ foreach(lc, tgoidlist)
+ {
+ Oid tgoid = lfirst_oid(lc);
+ SetConstraintState state = afterTriggers.state;
+ bool found = false;
+ int i;
+
+ for (i = 0; i < state->numstates; i++)
+ {
+ if (state->trigstates[i].sct_tgoid == tgoid)
+ {
+ state->trigstates[i].sct_tgisdeferred = stmt->deferred;
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ afterTriggers.state =
+ SetConstraintStateAddItem(state, tgoid, stmt->deferred);
+ }
+ }
+ }
+
+ /*
+ * SQL99 requires that when a constraint is set to IMMEDIATE, any deferred
+ * checks against that constraint must be made when the SET CONSTRAINTS
+ * command is executed -- i.e. the effects of the SET CONSTRAINTS command
+ * apply retroactively. We've updated the constraints state, so scan the
+ * list of previously deferred events to fire any that have now become
+ * immediate.
+ *
+ * Obviously, if this was SET ... DEFERRED then it can't have converted
+ * any unfired events to immediate, so we need do nothing in that case.
+ */
+ if (!stmt->deferred)
+ {
+ AfterTriggerEventList *events = &afterTriggers.events;
+ bool snapshot_set = false;
+
+ while (afterTriggerMarkEvents(events, NULL, true))
+ {
+ CommandId firing_id = afterTriggers.firing_counter++;
+
+ /*
+ * Make sure a snapshot has been established in case trigger
+ * functions need one. Note that we avoid setting a snapshot if
+ * we don't find at least one trigger that has to be fired now.
+ * This is so that BEGIN; SET CONSTRAINTS ...; SET TRANSACTION
+ * ISOLATION LEVEL SERIALIZABLE; ... works properly. (If we are
+ * at the start of a transaction it's not possible for any trigger
+ * events to be queued yet.)
+ */
+ if (!snapshot_set)
+ {
+ PushActiveSnapshot(GetTransactionSnapshot());
+ snapshot_set = true;
+ }
+
+ /*
+ * We can delete fired events if we are at top transaction level,
+ * but we'd better not if inside a subtransaction, since the
+ * subtransaction could later get rolled back.
+ */
+ if (afterTriggerInvokeEvents(events, firing_id, NULL,
+ !IsSubTransaction()))
+ break; /* all fired */
+ }
+
+ if (snapshot_set)
+ PopActiveSnapshot();
+ }
+}
+
+/* ----------
+ * AfterTriggerPendingOnRel()
+ * Test to see if there are any pending after-trigger events for rel.
+ *
+ * This is used by TRUNCATE, CLUSTER, ALTER TABLE, etc to detect whether
+ * it is unsafe to perform major surgery on a relation. Note that only
+ * local pending events are examined. We assume that having exclusive lock
+ * on a rel guarantees there are no unserviced events in other backends ---
+ * but having a lock does not prevent there being such events in our own.
+ *
+ * In some scenarios it'd be reasonable to remove pending events (more
+ * specifically, mark them DONE by the current subxact) but without a lot
+ * of knowledge of the trigger semantics we can't do this in general.
+ * ----------
+ */
+bool
+AfterTriggerPendingOnRel(Oid relid)
+{
+ AfterTriggerEvent event;
+ AfterTriggerEventChunk *chunk;
+ int depth;
+
+ /* Scan queued events */
+ for_each_event_chunk(event, chunk, afterTriggers.events)
+ {
+ AfterTriggerShared evtshared = GetTriggerSharedData(event);
+
+ /*
+ * We can ignore completed events. (Even if a DONE flag is rolled
+ * back by subxact abort, it's OK because the effects of the TRUNCATE
+ * or whatever must get rolled back too.)
+ */
+ if (event->ate_flags & AFTER_TRIGGER_DONE)
+ continue;
+
+ if (evtshared->ats_relid == relid)
+ return true;
+ }
+
+ /*
+ * Also scan events queued by incomplete queries. This could only matter
+ * if TRUNCATE/etc is executed by a function or trigger within an updating
+ * query on the same relation, which is pretty perverse, but let's check.
+ */
+ for (depth = 0; depth <= afterTriggers.query_depth && depth < afterTriggers.maxquerydepth; depth++)
+ {
+ for_each_event_chunk(event, chunk, afterTriggers.query_stack[depth].events)
+ {
+ AfterTriggerShared evtshared = GetTriggerSharedData(event);
+
+ if (event->ate_flags & AFTER_TRIGGER_DONE)
+ continue;
+
+ if (evtshared->ats_relid == relid)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* ----------
+ * AfterTriggerSaveEvent()
+ *
+ * Called by ExecA[RS]...Triggers() to queue up the triggers that should
+ * be fired for an event.
+ *
+ * NOTE: this is called whenever there are any triggers associated with
+ * the event (even if they are disabled). This function decides which
+ * triggers actually need to be queued. It is also called after each row,
+ * even if there are no triggers for that event, if there are any AFTER
+ * STATEMENT triggers for the statement which use transition tables, so that
+ * the transition tuplestores can be built. Furthermore, if the transition
+ * capture is happening for UPDATEd rows being moved to another partition due
+ * to the partition-key being changed, then this function is called once when
+ * the row is deleted (to capture OLD row), and once when the row is inserted
+ * into another partition (to capture NEW row). This is done separately because
+ * DELETE and INSERT happen on different tables.
+ *
+ * Transition tuplestores are built now, rather than when events are pulled
+ * off of the queue because AFTER ROW triggers are allowed to select from the
+ * transition tables for the statement.
+ *
+ * This contains special support to queue the update events for the case where
+ * a partitioned table undergoing a cross-partition update may have foreign
+ * keys pointing into it. Normally, a partitioned table's row triggers are
+ * not fired because the leaf partition(s) which are modified as a result of
+ * the operation on the partitioned table contain the same triggers which are
+ * fired instead. But that general scheme can cause problematic behavior with
+ * foreign key triggers during cross-partition updates, which are implemented
+ * as DELETE on the source partition followed by INSERT into the destination
+ * partition. Specifically, firing DELETE triggers would lead to the wrong
+ * foreign key action to be enforced considering that the original command is
+ * UPDATE; in this case, this function is called with relinfo as the
+ * partitioned table, and src_partinfo and dst_partinfo referring to the
+ * source and target leaf partitions, respectively.
+ *
+ * is_crosspart_update is true either when a DELETE event is fired on the
+ * source partition (which is to be ignored) or an UPDATE event is fired on
+ * the root partitioned table.
+ * ----------
+ */
+static void
+AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
+ ResultRelInfo *src_partinfo,
+ ResultRelInfo *dst_partinfo,
+ int event, bool row_trigger,
+ TupleTableSlot *oldslot, TupleTableSlot *newslot,
+ List *recheckIndexes, Bitmapset *modifiedCols,
+ TransitionCaptureState *transition_capture,
+ bool is_crosspart_update)
+{
+ Relation rel = relinfo->ri_RelationDesc;
+ TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+ AfterTriggerEventData new_event;
+ AfterTriggerSharedData new_shared;
+ char relkind = rel->rd_rel->relkind;
+ int tgtype_event;
+ int tgtype_level;
+ int i;
+ Tuplestorestate *fdw_tuplestore = NULL;
+
+ /*
+ * Check state. We use a normal test not Assert because it is possible to
+ * reach here in the wrong state given misconfigured RI triggers, in
+ * particular deferring a cascade action trigger.
+ */
+ if (afterTriggers.query_depth < 0)
+ elog(ERROR, "AfterTriggerSaveEvent() called outside of query");
+
+ /* Be sure we have enough space to record events at this query depth. */
+ if (afterTriggers.query_depth >= afterTriggers.maxquerydepth)
+ AfterTriggerEnlargeQueryState();
+
+ /*
+ * If the directly named relation has any triggers with transition tables,
+ * then we need to capture transition tuples.
+ */
+ if (row_trigger && transition_capture != NULL)
+ {
+ TupleTableSlot *original_insert_tuple = transition_capture->tcs_original_insert_tuple;
+
+ /*
+ * Capture the old tuple in the appropriate transition table based on
+ * the event.
+ */
+ if (!TupIsNull(oldslot))
+ {
+ Tuplestorestate *old_tuplestore;
+
+ old_tuplestore = GetAfterTriggersTransitionTable(event,
+ oldslot,
+ NULL,
+ transition_capture);
+ TransitionTableAddTuple(estate, transition_capture, relinfo,
+ oldslot, NULL, old_tuplestore);
+ }
+
+ /*
+ * Capture the new tuple in the appropriate transition table based on
+ * the event.
+ */
+ if (!TupIsNull(newslot))
+ {
+ Tuplestorestate *new_tuplestore;
+
+ new_tuplestore = GetAfterTriggersTransitionTable(event,
+ NULL,
+ newslot,
+ transition_capture);
+ TransitionTableAddTuple(estate, transition_capture, relinfo,
+ newslot, original_insert_tuple, new_tuplestore);
+ }
+
+ /*
+ * If transition tables are the only reason we're here, return. As
+ * mentioned above, we can also be here during update tuple routing in
+ * presence of transition tables, in which case this function is
+ * called separately for OLD and NEW, so we expect exactly one of them
+ * to be NULL.
+ */
+ if (trigdesc == NULL ||
+ (event == TRIGGER_EVENT_DELETE && !trigdesc->trig_delete_after_row) ||
+ (event == TRIGGER_EVENT_INSERT && !trigdesc->trig_insert_after_row) ||
+ (event == TRIGGER_EVENT_UPDATE && !trigdesc->trig_update_after_row) ||
+ (event == TRIGGER_EVENT_UPDATE && (TupIsNull(oldslot) ^ TupIsNull(newslot))))
+ return;
+ }
+
+ /*
+ * We normally don't see partitioned tables here for row level triggers
+ * except in the special case of a cross-partition update. In that case,
+ * nodeModifyTable.c:ExecCrossPartitionUpdateForeignKey() calls here to
+ * queue an update event on the root target partitioned table, also
+ * passing the source and destination partitions and their tuples.
+ */
+ Assert(!row_trigger ||
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE ||
+ (is_crosspart_update &&
+ TRIGGER_FIRED_BY_UPDATE(event) &&
+ src_partinfo != NULL && dst_partinfo != NULL));
+
+ /*
+ * Validate the event code and collect the associated tuple CTIDs.
+ *
+ * The event code will be used both as a bitmask and an array offset, so
+ * validation is important to make sure we don't walk off the edge of our
+ * arrays.
+ *
+ * Also, if we're considering statement-level triggers, check whether we
+ * already queued a set of them for this event, and cancel the prior set
+ * if so. This preserves the behavior that statement-level triggers fire
+ * just once per statement and fire after row-level triggers.
+ */
+ switch (event)
+ {
+ case TRIGGER_EVENT_INSERT:
+ tgtype_event = TRIGGER_TYPE_INSERT;
+ if (row_trigger)
+ {
+ Assert(oldslot == NULL);
+ Assert(newslot != NULL);
+ ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid1));
+ ItemPointerSetInvalid(&(new_event.ate_ctid2));
+ }
+ else
+ {
+ Assert(oldslot == NULL);
+ Assert(newslot == NULL);
+ ItemPointerSetInvalid(&(new_event.ate_ctid1));
+ ItemPointerSetInvalid(&(new_event.ate_ctid2));
+ cancel_prior_stmt_triggers(RelationGetRelid(rel),
+ CMD_INSERT, event);
+ }
+ break;
+ case TRIGGER_EVENT_DELETE:
+ tgtype_event = TRIGGER_TYPE_DELETE;
+ if (row_trigger)
+ {
+ Assert(oldslot != NULL);
+ Assert(newslot == NULL);
+ ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
+ ItemPointerSetInvalid(&(new_event.ate_ctid2));
+ }
+ else
+ {
+ Assert(oldslot == NULL);
+ Assert(newslot == NULL);
+ ItemPointerSetInvalid(&(new_event.ate_ctid1));
+ ItemPointerSetInvalid(&(new_event.ate_ctid2));
+ cancel_prior_stmt_triggers(RelationGetRelid(rel),
+ CMD_DELETE, event);
+ }
+ break;
+ case TRIGGER_EVENT_UPDATE:
+ tgtype_event = TRIGGER_TYPE_UPDATE;
+ if (row_trigger)
+ {
+ Assert(oldslot != NULL);
+ Assert(newslot != NULL);
+ ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
+ ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid2));
+
+ /*
+ * Also remember the OIDs of partitions to fetch these tuples
+ * out of later in AfterTriggerExecute().
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ Assert(src_partinfo != NULL && dst_partinfo != NULL);
+ new_event.ate_src_part =
+ RelationGetRelid(src_partinfo->ri_RelationDesc);
+ new_event.ate_dst_part =
+ RelationGetRelid(dst_partinfo->ri_RelationDesc);
+ }
+ }
+ else
+ {
+ Assert(oldslot == NULL);
+ Assert(newslot == NULL);
+ ItemPointerSetInvalid(&(new_event.ate_ctid1));
+ ItemPointerSetInvalid(&(new_event.ate_ctid2));
+ cancel_prior_stmt_triggers(RelationGetRelid(rel),
+ CMD_UPDATE, event);
+ }
+ break;
+ case TRIGGER_EVENT_TRUNCATE:
+ tgtype_event = TRIGGER_TYPE_TRUNCATE;
+ Assert(oldslot == NULL);
+ Assert(newslot == NULL);
+ ItemPointerSetInvalid(&(new_event.ate_ctid1));
+ ItemPointerSetInvalid(&(new_event.ate_ctid2));
+ break;
+ default:
+ elog(ERROR, "invalid after-trigger event code: %d", event);
+ tgtype_event = 0; /* keep compiler quiet */
+ break;
+ }
+
+ /* Determine flags */
+ if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger))
+ {
+ if (row_trigger && event == TRIGGER_EVENT_UPDATE)
+ {
+ if (relkind == RELKIND_PARTITIONED_TABLE)
+ new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE;
+ else
+ new_event.ate_flags = AFTER_TRIGGER_2CTID;
+ }
+ else
+ new_event.ate_flags = AFTER_TRIGGER_1CTID;
+ }
+
+ /* else, we'll initialize ate_flags for each trigger */
+
+ tgtype_level = (row_trigger ? TRIGGER_TYPE_ROW : TRIGGER_TYPE_STATEMENT);
+
+ /*
+ * Must convert/copy the source and destination partition tuples into the
+ * root partitioned table's format/slot, because the processing in the
+ * loop below expects both oldslot and newslot tuples to be in that form.
+ */
+ if (row_trigger && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ TupleTableSlot *rootslot;
+ TupleConversionMap *map;
+
+ rootslot = ExecGetTriggerOldSlot(estate, relinfo);
+ map = ExecGetChildToRootMap(src_partinfo);
+ if (map)
+ oldslot = execute_attr_map_slot(map->attrMap,
+ oldslot,
+ rootslot);
+ else
+ oldslot = ExecCopySlot(rootslot, oldslot);
+
+ rootslot = ExecGetTriggerNewSlot(estate, relinfo);
+ map = ExecGetChildToRootMap(dst_partinfo);
+ if (map)
+ newslot = execute_attr_map_slot(map->attrMap,
+ newslot,
+ rootslot);
+ else
+ newslot = ExecCopySlot(rootslot, newslot);
+ }
+
+ for (i = 0; i < trigdesc->numtriggers; i++)
+ {
+ Trigger *trigger = &trigdesc->triggers[i];
+
+ if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
+ tgtype_level,
+ TRIGGER_TYPE_AFTER,
+ tgtype_event))
+ continue;
+ if (!TriggerEnabled(estate, relinfo, trigger, event,
+ modifiedCols, oldslot, newslot))
+ continue;
+
+ if (relkind == RELKIND_FOREIGN_TABLE && row_trigger)
+ {
+ if (fdw_tuplestore == NULL)
+ {
+ fdw_tuplestore = GetCurrentFDWTuplestore();
+ new_event.ate_flags = AFTER_TRIGGER_FDW_FETCH;
+ }
+ else
+ /* subsequent event for the same tuple */
+ new_event.ate_flags = AFTER_TRIGGER_FDW_REUSE;
+ }
+
+ /*
+ * If the trigger is a foreign key enforcement trigger, there are
+ * certain cases where we can skip queueing the event because we can
+ * tell by inspection that the FK constraint will still pass. There
+ * are also some cases during cross-partition updates of a partitioned
+ * table where queuing the event can be skipped.
+ */
+ if (TRIGGER_FIRED_BY_UPDATE(event) || TRIGGER_FIRED_BY_DELETE(event))
+ {
+ switch (RI_FKey_trigger_type(trigger->tgfoid))
+ {
+ case RI_TRIGGER_PK:
+
+ /*
+ * For cross-partitioned updates of partitioned PK table,
+ * skip the event fired by the component delete on the
+ * source leaf partition unless the constraint originates
+ * in the partition itself (!tgisclone), because the
+ * update event that will be fired on the root
+ * (partitioned) target table will be used to perform the
+ * necessary foreign key enforcement action.
+ */
+ if (is_crosspart_update &&
+ TRIGGER_FIRED_BY_DELETE(event) &&
+ trigger->tgisclone)
+ continue;
+
+ /* Update or delete on trigger's PK table */
+ if (!RI_FKey_pk_upd_check_required(trigger, rel,
+ oldslot, newslot))
+ {
+ /* skip queuing this event */
+ continue;
+ }
+ break;
+
+ case RI_TRIGGER_FK:
+
+ /*
+ * Update on trigger's FK table. We can skip the update
+ * event fired on a partitioned table during a
+ * cross-partition of that table, because the insert event
+ * that is fired on the destination leaf partition would
+ * suffice to perform the necessary foreign key check.
+ * Moreover, RI_FKey_fk_upd_check_required() expects to be
+ * passed a tuple that contains system attributes, most of
+ * which are not present in the virtual slot belonging to
+ * a partitioned table.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
+ !RI_FKey_fk_upd_check_required(trigger, rel,
+ oldslot, newslot))
+ {
+ /* skip queuing this event */
+ continue;
+ }
+ break;
+
+ case RI_TRIGGER_NONE:
+
+ /*
+ * Not an FK trigger. No need to queue the update event
+ * fired during a cross-partitioned update of a
+ * partitioned table, because the same row trigger must be
+ * present in the leaf partition(s) that are affected as
+ * part of this update and the events fired on them are
+ * queued instead.
+ */
+ if (row_trigger &&
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ continue;
+ break;
+ }
+ }
+
+ /*
+ * If the trigger is a deferred unique constraint check trigger, only
+ * queue it if the unique constraint was potentially violated, which
+ * we know from index insertion time.
+ */
+ if (trigger->tgfoid == F_UNIQUE_KEY_RECHECK)
+ {
+ if (!list_member_oid(recheckIndexes, trigger->tgconstrindid))
+ continue; /* Uniqueness definitely not violated */
+ }
+
+ /*
+ * Fill in event structure and add it to the current query's queue.
+ * Note we set ats_table to NULL whenever this trigger doesn't use
+ * transition tables, to improve sharability of the shared event data.
+ */
+ new_shared.ats_event =
+ (event & TRIGGER_EVENT_OPMASK) |
+ (row_trigger ? TRIGGER_EVENT_ROW : 0) |
+ (trigger->tgdeferrable ? AFTER_TRIGGER_DEFERRABLE : 0) |
+ (trigger->tginitdeferred ? AFTER_TRIGGER_INITDEFERRED : 0);
+ new_shared.ats_tgoid = trigger->tgoid;
+ new_shared.ats_relid = RelationGetRelid(rel);
+ new_shared.ats_firing_id = 0;
+ if ((trigger->tgoldtable || trigger->tgnewtable) &&
+ transition_capture != NULL)
+ new_shared.ats_table = transition_capture->tcs_private;
+ else
+ new_shared.ats_table = NULL;
+ new_shared.ats_modifiedcols = afterTriggerCopyBitmap(modifiedCols);
+
+ afterTriggerAddEvent(&afterTriggers.query_stack[afterTriggers.query_depth].events,
+ &new_event, &new_shared);
+ }
+
+ /*
+ * Finally, spool any foreign tuple(s). The tuplestore squashes them to
+ * minimal tuples, so this loses any system columns. The executor lost
+ * those columns before us, for an unrelated reason, so this is fine.
+ */
+ if (fdw_tuplestore)
+ {
+ if (oldslot != NULL)
+ tuplestore_puttupleslot(fdw_tuplestore, oldslot);
+ if (newslot != NULL)
+ tuplestore_puttupleslot(fdw_tuplestore, newslot);
+ }
+}
+
+/*
+ * Detect whether we already queued BEFORE STATEMENT triggers for the given
+ * relation + operation, and set the flag so the next call will report "true".
+ */
+static bool
+before_stmt_triggers_fired(Oid relid, CmdType cmdType)
+{
+ bool result;
+ AfterTriggersTableData *table;
+
+ /* Check state, like AfterTriggerSaveEvent. */
+ if (afterTriggers.query_depth < 0)
+ elog(ERROR, "before_stmt_triggers_fired() called outside of query");
+
+ /* Be sure we have enough space to record events at this query depth. */
+ if (afterTriggers.query_depth >= afterTriggers.maxquerydepth)
+ AfterTriggerEnlargeQueryState();
+
+ /*
+ * We keep this state in the AfterTriggersTableData that also holds
+ * transition tables for the relation + operation. In this way, if we are
+ * forced to make a new set of transition tables because more tuples get
+ * entered after we've already fired triggers, we will allow a new set of
+ * statement triggers to get queued.
+ */
+ table = GetAfterTriggersTableData(relid, cmdType);
+ result = table->before_trig_done;
+ table->before_trig_done = true;
+ return result;
+}
+
+/*
+ * If we previously queued a set of AFTER STATEMENT triggers for the given
+ * relation + operation, and they've not been fired yet, cancel them. The
+ * caller will queue a fresh set that's after any row-level triggers that may
+ * have been queued by the current sub-statement, preserving (as much as
+ * possible) the property that AFTER ROW triggers fire before AFTER STATEMENT
+ * triggers, and that the latter only fire once. This deals with the
+ * situation where several FK enforcement triggers sequentially queue triggers
+ * for the same table into the same trigger query level. We can't fully
+ * prevent odd behavior though: if there are AFTER ROW triggers taking
+ * transition tables, we don't want to change the transition tables once the
+ * first such trigger has seen them. In such a case, any additional events
+ * will result in creating new transition tables and allowing new firings of
+ * statement triggers.
+ *
+ * This also saves the current event list location so that a later invocation
+ * of this function can cheaply find the triggers we're about to queue and
+ * cancel them.
+ */
+static void
+cancel_prior_stmt_triggers(Oid relid, CmdType cmdType, int tgevent)
+{
+ AfterTriggersTableData *table;
+ AfterTriggersQueryData *qs = &afterTriggers.query_stack[afterTriggers.query_depth];
+
+ /*
+ * We keep this state in the AfterTriggersTableData that also holds
+ * transition tables for the relation + operation. In this way, if we are
+ * forced to make a new set of transition tables because more tuples get
+ * entered after we've already fired triggers, we will allow a new set of
+ * statement triggers to get queued without canceling the old ones.
+ */
+ table = GetAfterTriggersTableData(relid, cmdType);
+
+ if (table->after_trig_done)
+ {
+ /*
+ * We want to start scanning from the tail location that existed just
+ * before we inserted any statement triggers. But the events list
+ * might've been entirely empty then, in which case scan from the
+ * current head.
+ */
+ AfterTriggerEvent event;
+ AfterTriggerEventChunk *chunk;
+
+ if (table->after_trig_events.tail)
+ {
+ chunk = table->after_trig_events.tail;
+ event = (AfterTriggerEvent) table->after_trig_events.tailfree;
+ }
+ else
+ {
+ chunk = qs->events.head;
+ event = NULL;
+ }
+
+ for_each_chunk_from(chunk)
+ {
+ if (event == NULL)
+ event = (AfterTriggerEvent) CHUNK_DATA_START(chunk);
+ for_each_event_from(event, chunk)
+ {
+ AfterTriggerShared evtshared = GetTriggerSharedData(event);
+
+ /*
+ * Exit loop when we reach events that aren't AS triggers for
+ * the target relation.
+ */
+ if (evtshared->ats_relid != relid)
+ goto done;
+ if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) != tgevent)
+ goto done;
+ if (!TRIGGER_FIRED_FOR_STATEMENT(evtshared->ats_event))
+ goto done;
+ if (!TRIGGER_FIRED_AFTER(evtshared->ats_event))
+ goto done;
+ /* OK, mark it DONE */
+ event->ate_flags &= ~AFTER_TRIGGER_IN_PROGRESS;
+ event->ate_flags |= AFTER_TRIGGER_DONE;
+ }
+ /* signal we must reinitialize event ptr for next chunk */
+ event = NULL;
+ }
+ }
+done:
+
+ /* In any case, save current insertion point for next time */
+ table->after_trig_done = true;
+ table->after_trig_events = qs->events;
+}
+
+/*
+ * SQL function pg_trigger_depth()
+ */
+Datum
+pg_trigger_depth(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT32(MyTriggerDepth);
+}
diff --git a/src/backend/commands/tsearchcmds.c b/src/backend/commands/tsearchcmds.c
new file mode 100644
index 0000000..4cc4e3c
--- /dev/null
+++ b/src/backend/commands/tsearchcmds.c
@@ -0,0 +1,1759 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsearchcmds.c
+ *
+ * Routines for tsearch manipulation commands
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/tsearchcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_ts_config.h"
+#include "catalog/pg_ts_config_map.h"
+#include "catalog/pg_ts_dict.h"
+#include "catalog/pg_ts_parser.h"
+#include "catalog/pg_ts_template.h"
+#include "catalog/pg_type.h"
+#include "commands/alter.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "common/string.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "parser/parse_func.h"
+#include "tsearch/ts_cache.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+static void MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
+ HeapTuple tup, Relation relMap);
+static void DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
+ HeapTuple tup, Relation relMap);
+static DefElem *buildDefItem(const char *name, const char *val,
+ bool was_quoted);
+
+
+/* --------------------- TS Parser commands ------------------------ */
+
+/*
+ * lookup a parser support function and return its OID (as a Datum)
+ *
+ * attnum is the pg_ts_parser column the function will go into
+ */
+static Datum
+get_ts_parser_func(DefElem *defel, int attnum)
+{
+ List *funcName = defGetQualifiedName(defel);
+ Oid typeId[3];
+ Oid retTypeId;
+ int nargs;
+ Oid procOid;
+
+ retTypeId = INTERNALOID; /* correct for most */
+ typeId[0] = INTERNALOID;
+ switch (attnum)
+ {
+ case Anum_pg_ts_parser_prsstart:
+ nargs = 2;
+ typeId[1] = INT4OID;
+ break;
+ case Anum_pg_ts_parser_prstoken:
+ nargs = 3;
+ typeId[1] = INTERNALOID;
+ typeId[2] = INTERNALOID;
+ break;
+ case Anum_pg_ts_parser_prsend:
+ nargs = 1;
+ retTypeId = VOIDOID;
+ break;
+ case Anum_pg_ts_parser_prsheadline:
+ nargs = 3;
+ typeId[1] = INTERNALOID;
+ typeId[2] = TSQUERYOID;
+ break;
+ case Anum_pg_ts_parser_prslextype:
+ nargs = 1;
+
+ /*
+ * Note: because the lextype method returns type internal, it must
+ * have an internal-type argument for security reasons. The
+ * argument is not actually used, but is just passed as a zero.
+ */
+ break;
+ default:
+ /* should not be here */
+ elog(ERROR, "unrecognized attribute for text search parser: %d",
+ attnum);
+ nargs = 0; /* keep compiler quiet */
+ }
+
+ procOid = LookupFuncName(funcName, nargs, typeId, false);
+ if (get_func_rettype(procOid) != retTypeId)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("function %s should return type %s",
+ func_signature_string(funcName, nargs, NIL, typeId),
+ format_type_be(retTypeId))));
+
+ return ObjectIdGetDatum(procOid);
+}
+
+/*
+ * make pg_depend entries for a new pg_ts_parser entry
+ *
+ * Return value is the address of said new entry.
+ */
+static ObjectAddress
+makeParserDependencies(HeapTuple tuple)
+{
+ Form_pg_ts_parser prs = (Form_pg_ts_parser) GETSTRUCT(tuple);
+ ObjectAddress myself,
+ referenced;
+ ObjectAddresses *addrs;
+
+ ObjectAddressSet(myself, TSParserRelationId, prs->oid);
+
+ /* dependency on extension */
+ recordDependencyOnCurrentExtension(&myself, false);
+
+ addrs = new_object_addresses();
+
+ /* dependency on namespace */
+ ObjectAddressSet(referenced, NamespaceRelationId, prs->prsnamespace);
+ add_exact_object_address(&referenced, addrs);
+
+ /* dependencies on functions */
+ ObjectAddressSet(referenced, ProcedureRelationId, prs->prsstart);
+ add_exact_object_address(&referenced, addrs);
+
+ referenced.objectId = prs->prstoken;
+ add_exact_object_address(&referenced, addrs);
+
+ referenced.objectId = prs->prsend;
+ add_exact_object_address(&referenced, addrs);
+
+ referenced.objectId = prs->prslextype;
+ add_exact_object_address(&referenced, addrs);
+
+ if (OidIsValid(prs->prsheadline))
+ {
+ referenced.objectId = prs->prsheadline;
+ add_exact_object_address(&referenced, addrs);
+ }
+
+ record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
+ free_object_addresses(addrs);
+
+ return myself;
+}
+
+/*
+ * CREATE TEXT SEARCH PARSER
+ */
+ObjectAddress
+DefineTSParser(List *names, List *parameters)
+{
+ char *prsname;
+ ListCell *pl;
+ Relation prsRel;
+ HeapTuple tup;
+ Datum values[Natts_pg_ts_parser];
+ bool nulls[Natts_pg_ts_parser];
+ NameData pname;
+ Oid prsOid;
+ Oid namespaceoid;
+ ObjectAddress address;
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to create text search parsers")));
+
+ prsRel = table_open(TSParserRelationId, RowExclusiveLock);
+
+ /* Convert list of names to a name and namespace */
+ namespaceoid = QualifiedNameGetCreationNamespace(names, &prsname);
+
+ /* initialize tuple fields with name/namespace */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ prsOid = GetNewOidWithIndex(prsRel, TSParserOidIndexId,
+ Anum_pg_ts_parser_oid);
+ values[Anum_pg_ts_parser_oid - 1] = ObjectIdGetDatum(prsOid);
+ namestrcpy(&pname, prsname);
+ values[Anum_pg_ts_parser_prsname - 1] = NameGetDatum(&pname);
+ values[Anum_pg_ts_parser_prsnamespace - 1] = ObjectIdGetDatum(namespaceoid);
+
+ /*
+ * loop over the definition list and extract the information we need.
+ */
+ foreach(pl, parameters)
+ {
+ DefElem *defel = (DefElem *) lfirst(pl);
+
+ if (strcmp(defel->defname, "start") == 0)
+ {
+ values[Anum_pg_ts_parser_prsstart - 1] =
+ get_ts_parser_func(defel, Anum_pg_ts_parser_prsstart);
+ }
+ else if (strcmp(defel->defname, "gettoken") == 0)
+ {
+ values[Anum_pg_ts_parser_prstoken - 1] =
+ get_ts_parser_func(defel, Anum_pg_ts_parser_prstoken);
+ }
+ else if (strcmp(defel->defname, "end") == 0)
+ {
+ values[Anum_pg_ts_parser_prsend - 1] =
+ get_ts_parser_func(defel, Anum_pg_ts_parser_prsend);
+ }
+ else if (strcmp(defel->defname, "headline") == 0)
+ {
+ values[Anum_pg_ts_parser_prsheadline - 1] =
+ get_ts_parser_func(defel, Anum_pg_ts_parser_prsheadline);
+ }
+ else if (strcmp(defel->defname, "lextypes") == 0)
+ {
+ values[Anum_pg_ts_parser_prslextype - 1] =
+ get_ts_parser_func(defel, Anum_pg_ts_parser_prslextype);
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("text search parser parameter \"%s\" not recognized",
+ defel->defname)));
+ }
+
+ /*
+ * Validation
+ */
+ if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prsstart - 1])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("text search parser start method is required")));
+
+ if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prstoken - 1])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("text search parser gettoken method is required")));
+
+ if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prsend - 1])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("text search parser end method is required")));
+
+ if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prslextype - 1])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("text search parser lextypes method is required")));
+
+ /*
+ * Looks good, insert
+ */
+ tup = heap_form_tuple(prsRel->rd_att, values, nulls);
+
+ CatalogTupleInsert(prsRel, tup);
+
+ address = makeParserDependencies(tup);
+
+ /* Post creation hook for new text search parser */
+ InvokeObjectPostCreateHook(TSParserRelationId, prsOid, 0);
+
+ heap_freetuple(tup);
+
+ table_close(prsRel, RowExclusiveLock);
+
+ return address;
+}
+
+/* ---------------------- TS Dictionary commands -----------------------*/
+
+/*
+ * make pg_depend entries for a new pg_ts_dict entry
+ *
+ * Return value is address of the new entry
+ */
+static ObjectAddress
+makeDictionaryDependencies(HeapTuple tuple)
+{
+ Form_pg_ts_dict dict = (Form_pg_ts_dict) GETSTRUCT(tuple);
+ ObjectAddress myself,
+ referenced;
+ ObjectAddresses *addrs;
+
+ ObjectAddressSet(myself, TSDictionaryRelationId, dict->oid);
+
+ /* dependency on owner */
+ recordDependencyOnOwner(myself.classId, myself.objectId, dict->dictowner);
+
+ /* dependency on extension */
+ recordDependencyOnCurrentExtension(&myself, false);
+
+ addrs = new_object_addresses();
+
+ /* dependency on namespace */
+ ObjectAddressSet(referenced, NamespaceRelationId, dict->dictnamespace);
+ add_exact_object_address(&referenced, addrs);
+
+ /* dependency on template */
+ ObjectAddressSet(referenced, TSTemplateRelationId, dict->dicttemplate);
+ add_exact_object_address(&referenced, addrs);
+
+ record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
+ free_object_addresses(addrs);
+
+ return myself;
+}
+
+/*
+ * verify that a template's init method accepts a proposed option list
+ */
+static void
+verify_dictoptions(Oid tmplId, List *dictoptions)
+{
+ HeapTuple tup;
+ Form_pg_ts_template tform;
+ Oid initmethod;
+
+ /*
+ * Suppress this test when running in a standalone backend. This is a
+ * hack to allow initdb to create prefab dictionaries that might not
+ * actually be usable in template1's encoding (due to using external files
+ * that can't be translated into template1's encoding). We want to create
+ * them anyway, since they might be usable later in other databases.
+ */
+ if (!IsUnderPostmaster)
+ return;
+
+ tup = SearchSysCache1(TSTEMPLATEOID, ObjectIdGetDatum(tmplId));
+ if (!HeapTupleIsValid(tup)) /* should not happen */
+ elog(ERROR, "cache lookup failed for text search template %u",
+ tmplId);
+ tform = (Form_pg_ts_template) GETSTRUCT(tup);
+
+ initmethod = tform->tmplinit;
+
+ if (!OidIsValid(initmethod))
+ {
+ /* If there is no init method, disallow any options */
+ if (dictoptions)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("text search template \"%s\" does not accept options",
+ NameStr(tform->tmplname))));
+ }
+ else
+ {
+ /*
+ * Copy the options just in case init method thinks it can scribble on
+ * them ...
+ */
+ dictoptions = copyObject(dictoptions);
+
+ /*
+ * Call the init method and see if it complains. We don't worry about
+ * it leaking memory, since our command will soon be over anyway.
+ */
+ (void) OidFunctionCall1(initmethod, PointerGetDatum(dictoptions));
+ }
+
+ ReleaseSysCache(tup);
+}
+
+/*
+ * CREATE TEXT SEARCH DICTIONARY
+ */
+ObjectAddress
+DefineTSDictionary(List *names, List *parameters)
+{
+ ListCell *pl;
+ Relation dictRel;
+ HeapTuple tup;
+ Datum values[Natts_pg_ts_dict];
+ bool nulls[Natts_pg_ts_dict];
+ NameData dname;
+ Oid templId = InvalidOid;
+ List *dictoptions = NIL;
+ Oid dictOid;
+ Oid namespaceoid;
+ AclResult aclresult;
+ char *dictname;
+ ObjectAddress address;
+
+ /* Convert list of names to a name and namespace */
+ namespaceoid = QualifiedNameGetCreationNamespace(names, &dictname);
+
+ /* Check we have creation rights in target namespace */
+ aclresult = pg_namespace_aclcheck(namespaceoid, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(namespaceoid));
+
+ /*
+ * loop over the definition list and extract the information we need.
+ */
+ foreach(pl, parameters)
+ {
+ DefElem *defel = (DefElem *) lfirst(pl);
+
+ if (strcmp(defel->defname, "template") == 0)
+ {
+ templId = get_ts_template_oid(defGetQualifiedName(defel), false);
+ }
+ else
+ {
+ /* Assume it's an option for the dictionary itself */
+ dictoptions = lappend(dictoptions, defel);
+ }
+ }
+
+ /*
+ * Validation
+ */
+ if (!OidIsValid(templId))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("text search template is required")));
+
+ verify_dictoptions(templId, dictoptions);
+
+
+ dictRel = table_open(TSDictionaryRelationId, RowExclusiveLock);
+
+ /*
+ * Looks good, insert
+ */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ dictOid = GetNewOidWithIndex(dictRel, TSDictionaryOidIndexId,
+ Anum_pg_ts_dict_oid);
+ values[Anum_pg_ts_dict_oid - 1] = ObjectIdGetDatum(dictOid);
+ namestrcpy(&dname, dictname);
+ values[Anum_pg_ts_dict_dictname - 1] = NameGetDatum(&dname);
+ values[Anum_pg_ts_dict_dictnamespace - 1] = ObjectIdGetDatum(namespaceoid);
+ values[Anum_pg_ts_dict_dictowner - 1] = ObjectIdGetDatum(GetUserId());
+ values[Anum_pg_ts_dict_dicttemplate - 1] = ObjectIdGetDatum(templId);
+ if (dictoptions)
+ values[Anum_pg_ts_dict_dictinitoption - 1] =
+ PointerGetDatum(serialize_deflist(dictoptions));
+ else
+ nulls[Anum_pg_ts_dict_dictinitoption - 1] = true;
+
+ tup = heap_form_tuple(dictRel->rd_att, values, nulls);
+
+ CatalogTupleInsert(dictRel, tup);
+
+ address = makeDictionaryDependencies(tup);
+
+ /* Post creation hook for new text search dictionary */
+ InvokeObjectPostCreateHook(TSDictionaryRelationId, dictOid, 0);
+
+ heap_freetuple(tup);
+
+ table_close(dictRel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * ALTER TEXT SEARCH DICTIONARY
+ */
+ObjectAddress
+AlterTSDictionary(AlterTSDictionaryStmt *stmt)
+{
+ HeapTuple tup,
+ newtup;
+ Relation rel;
+ Oid dictId;
+ ListCell *pl;
+ List *dictoptions;
+ Datum opt;
+ bool isnull;
+ Datum repl_val[Natts_pg_ts_dict];
+ bool repl_null[Natts_pg_ts_dict];
+ bool repl_repl[Natts_pg_ts_dict];
+ ObjectAddress address;
+
+ dictId = get_ts_dict_oid(stmt->dictname, false);
+
+ rel = table_open(TSDictionaryRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(TSDICTOID, ObjectIdGetDatum(dictId));
+
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for text search dictionary %u",
+ dictId);
+
+ /* must be owner */
+ if (!pg_ts_dict_ownercheck(dictId, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TSDICTIONARY,
+ NameListToString(stmt->dictname));
+
+ /* deserialize the existing set of options */
+ opt = SysCacheGetAttr(TSDICTOID, tup,
+ Anum_pg_ts_dict_dictinitoption,
+ &isnull);
+ if (isnull)
+ dictoptions = NIL;
+ else
+ dictoptions = deserialize_deflist(opt);
+
+ /*
+ * Modify the options list as per specified changes
+ */
+ foreach(pl, stmt->options)
+ {
+ DefElem *defel = (DefElem *) lfirst(pl);
+ ListCell *cell;
+
+ /*
+ * Remove any matches ...
+ */
+ foreach(cell, dictoptions)
+ {
+ DefElem *oldel = (DefElem *) lfirst(cell);
+
+ if (strcmp(oldel->defname, defel->defname) == 0)
+ dictoptions = foreach_delete_current(dictoptions, cell);
+ }
+
+ /*
+ * and add new value if it's got one
+ */
+ if (defel->arg)
+ dictoptions = lappend(dictoptions, defel);
+ }
+
+ /*
+ * Validate
+ */
+ verify_dictoptions(((Form_pg_ts_dict) GETSTRUCT(tup))->dicttemplate,
+ dictoptions);
+
+ /*
+ * Looks good, update
+ */
+ memset(repl_val, 0, sizeof(repl_val));
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ if (dictoptions)
+ repl_val[Anum_pg_ts_dict_dictinitoption - 1] =
+ PointerGetDatum(serialize_deflist(dictoptions));
+ else
+ repl_null[Anum_pg_ts_dict_dictinitoption - 1] = true;
+ repl_repl[Anum_pg_ts_dict_dictinitoption - 1] = true;
+
+ newtup = heap_modify_tuple(tup, RelationGetDescr(rel),
+ repl_val, repl_null, repl_repl);
+
+ CatalogTupleUpdate(rel, &newtup->t_self, newtup);
+
+ InvokeObjectPostAlterHook(TSDictionaryRelationId, dictId, 0);
+
+ ObjectAddressSet(address, TSDictionaryRelationId, dictId);
+
+ /*
+ * NOTE: because we only support altering the options, not the template,
+ * there is no need to update dependencies. This might have to change if
+ * the options ever reference inside-the-database objects.
+ */
+
+ heap_freetuple(newtup);
+ ReleaseSysCache(tup);
+
+ table_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+/* ---------------------- TS Template commands -----------------------*/
+
+/*
+ * lookup a template support function and return its OID (as a Datum)
+ *
+ * attnum is the pg_ts_template column the function will go into
+ */
+static Datum
+get_ts_template_func(DefElem *defel, int attnum)
+{
+ List *funcName = defGetQualifiedName(defel);
+ Oid typeId[4];
+ Oid retTypeId;
+ int nargs;
+ Oid procOid;
+
+ retTypeId = INTERNALOID;
+ typeId[0] = INTERNALOID;
+ typeId[1] = INTERNALOID;
+ typeId[2] = INTERNALOID;
+ typeId[3] = INTERNALOID;
+ switch (attnum)
+ {
+ case Anum_pg_ts_template_tmplinit:
+ nargs = 1;
+ break;
+ case Anum_pg_ts_template_tmpllexize:
+ nargs = 4;
+ break;
+ default:
+ /* should not be here */
+ elog(ERROR, "unrecognized attribute for text search template: %d",
+ attnum);
+ nargs = 0; /* keep compiler quiet */
+ }
+
+ procOid = LookupFuncName(funcName, nargs, typeId, false);
+ if (get_func_rettype(procOid) != retTypeId)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("function %s should return type %s",
+ func_signature_string(funcName, nargs, NIL, typeId),
+ format_type_be(retTypeId))));
+
+ return ObjectIdGetDatum(procOid);
+}
+
+/*
+ * make pg_depend entries for a new pg_ts_template entry
+ */
+static ObjectAddress
+makeTSTemplateDependencies(HeapTuple tuple)
+{
+ Form_pg_ts_template tmpl = (Form_pg_ts_template) GETSTRUCT(tuple);
+ ObjectAddress myself,
+ referenced;
+ ObjectAddresses *addrs;
+
+ ObjectAddressSet(myself, TSTemplateRelationId, tmpl->oid);
+
+ /* dependency on extension */
+ recordDependencyOnCurrentExtension(&myself, false);
+
+ addrs = new_object_addresses();
+
+ /* dependency on namespace */
+ ObjectAddressSet(referenced, NamespaceRelationId, tmpl->tmplnamespace);
+ add_exact_object_address(&referenced, addrs);
+
+ /* dependencies on functions */
+ ObjectAddressSet(referenced, ProcedureRelationId, tmpl->tmpllexize);
+ add_exact_object_address(&referenced, addrs);
+
+ if (OidIsValid(tmpl->tmplinit))
+ {
+ referenced.objectId = tmpl->tmplinit;
+ add_exact_object_address(&referenced, addrs);
+ }
+
+ record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
+ free_object_addresses(addrs);
+
+ return myself;
+}
+
+/*
+ * CREATE TEXT SEARCH TEMPLATE
+ */
+ObjectAddress
+DefineTSTemplate(List *names, List *parameters)
+{
+ ListCell *pl;
+ Relation tmplRel;
+ HeapTuple tup;
+ Datum values[Natts_pg_ts_template];
+ bool nulls[Natts_pg_ts_template];
+ NameData dname;
+ int i;
+ Oid tmplOid;
+ Oid namespaceoid;
+ char *tmplname;
+ ObjectAddress address;
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to create text search templates")));
+
+ /* Convert list of names to a name and namespace */
+ namespaceoid = QualifiedNameGetCreationNamespace(names, &tmplname);
+
+ tmplRel = table_open(TSTemplateRelationId, RowExclusiveLock);
+
+ for (i = 0; i < Natts_pg_ts_template; i++)
+ {
+ nulls[i] = false;
+ values[i] = ObjectIdGetDatum(InvalidOid);
+ }
+
+ tmplOid = GetNewOidWithIndex(tmplRel, TSTemplateOidIndexId,
+ Anum_pg_ts_dict_oid);
+ values[Anum_pg_ts_template_oid - 1] = ObjectIdGetDatum(tmplOid);
+ namestrcpy(&dname, tmplname);
+ values[Anum_pg_ts_template_tmplname - 1] = NameGetDatum(&dname);
+ values[Anum_pg_ts_template_tmplnamespace - 1] = ObjectIdGetDatum(namespaceoid);
+
+ /*
+ * loop over the definition list and extract the information we need.
+ */
+ foreach(pl, parameters)
+ {
+ DefElem *defel = (DefElem *) lfirst(pl);
+
+ if (strcmp(defel->defname, "init") == 0)
+ {
+ values[Anum_pg_ts_template_tmplinit - 1] =
+ get_ts_template_func(defel, Anum_pg_ts_template_tmplinit);
+ nulls[Anum_pg_ts_template_tmplinit - 1] = false;
+ }
+ else if (strcmp(defel->defname, "lexize") == 0)
+ {
+ values[Anum_pg_ts_template_tmpllexize - 1] =
+ get_ts_template_func(defel, Anum_pg_ts_template_tmpllexize);
+ nulls[Anum_pg_ts_template_tmpllexize - 1] = false;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("text search template parameter \"%s\" not recognized",
+ defel->defname)));
+ }
+
+ /*
+ * Validation
+ */
+ if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_template_tmpllexize - 1])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("text search template lexize method is required")));
+
+ /*
+ * Looks good, insert
+ */
+ tup = heap_form_tuple(tmplRel->rd_att, values, nulls);
+
+ CatalogTupleInsert(tmplRel, tup);
+
+ address = makeTSTemplateDependencies(tup);
+
+ /* Post creation hook for new text search template */
+ InvokeObjectPostCreateHook(TSTemplateRelationId, tmplOid, 0);
+
+ heap_freetuple(tup);
+
+ table_close(tmplRel, RowExclusiveLock);
+
+ return address;
+}
+
+/* ---------------------- TS Configuration commands -----------------------*/
+
+/*
+ * Finds syscache tuple of configuration.
+ * Returns NULL if no such cfg.
+ */
+static HeapTuple
+GetTSConfigTuple(List *names)
+{
+ HeapTuple tup;
+ Oid cfgId;
+
+ cfgId = get_ts_config_oid(names, true);
+ if (!OidIsValid(cfgId))
+ return NULL;
+
+ tup = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(cfgId));
+
+ if (!HeapTupleIsValid(tup)) /* should not happen */
+ elog(ERROR, "cache lookup failed for text search configuration %u",
+ cfgId);
+
+ return tup;
+}
+
+/*
+ * make pg_depend entries for a new or updated pg_ts_config entry
+ *
+ * Pass opened pg_ts_config_map relation if there might be any config map
+ * entries for the config.
+ */
+static ObjectAddress
+makeConfigurationDependencies(HeapTuple tuple, bool removeOld,
+ Relation mapRel)
+{
+ Form_pg_ts_config cfg = (Form_pg_ts_config) GETSTRUCT(tuple);
+ ObjectAddresses *addrs;
+ ObjectAddress myself,
+ referenced;
+
+ myself.classId = TSConfigRelationId;
+ myself.objectId = cfg->oid;
+ myself.objectSubId = 0;
+
+ /* for ALTER case, first flush old dependencies, except extension deps */
+ if (removeOld)
+ {
+ deleteDependencyRecordsFor(myself.classId, myself.objectId, true);
+ deleteSharedDependencyRecordsFor(myself.classId, myself.objectId, 0);
+ }
+
+ /*
+ * We use an ObjectAddresses list to remove possible duplicate
+ * dependencies from the config map info. The pg_ts_config items
+ * shouldn't be duplicates, but might as well fold them all into one call.
+ */
+ addrs = new_object_addresses();
+
+ /* dependency on namespace */
+ referenced.classId = NamespaceRelationId;
+ referenced.objectId = cfg->cfgnamespace;
+ referenced.objectSubId = 0;
+ add_exact_object_address(&referenced, addrs);
+
+ /* dependency on owner */
+ recordDependencyOnOwner(myself.classId, myself.objectId, cfg->cfgowner);
+
+ /* dependency on extension */
+ recordDependencyOnCurrentExtension(&myself, removeOld);
+
+ /* dependency on parser */
+ referenced.classId = TSParserRelationId;
+ referenced.objectId = cfg->cfgparser;
+ referenced.objectSubId = 0;
+ add_exact_object_address(&referenced, addrs);
+
+ /* dependencies on dictionaries listed in config map */
+ if (mapRel)
+ {
+ ScanKeyData skey;
+ SysScanDesc scan;
+ HeapTuple maptup;
+
+ /* CCI to ensure we can see effects of caller's changes */
+ CommandCounterIncrement();
+
+ ScanKeyInit(&skey,
+ Anum_pg_ts_config_map_mapcfg,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(myself.objectId));
+
+ scan = systable_beginscan(mapRel, TSConfigMapIndexId, true,
+ NULL, 1, &skey);
+
+ while (HeapTupleIsValid((maptup = systable_getnext(scan))))
+ {
+ Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup);
+
+ referenced.classId = TSDictionaryRelationId;
+ referenced.objectId = cfgmap->mapdict;
+ referenced.objectSubId = 0;
+ add_exact_object_address(&referenced, addrs);
+ }
+
+ systable_endscan(scan);
+ }
+
+ /* Record 'em (this includes duplicate elimination) */
+ record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
+
+ free_object_addresses(addrs);
+
+ return myself;
+}
+
+/*
+ * CREATE TEXT SEARCH CONFIGURATION
+ */
+ObjectAddress
+DefineTSConfiguration(List *names, List *parameters, ObjectAddress *copied)
+{
+ Relation cfgRel;
+ Relation mapRel = NULL;
+ HeapTuple tup;
+ Datum values[Natts_pg_ts_config];
+ bool nulls[Natts_pg_ts_config];
+ AclResult aclresult;
+ Oid namespaceoid;
+ char *cfgname;
+ NameData cname;
+ Oid sourceOid = InvalidOid;
+ Oid prsOid = InvalidOid;
+ Oid cfgOid;
+ ListCell *pl;
+ ObjectAddress address;
+
+ /* Convert list of names to a name and namespace */
+ namespaceoid = QualifiedNameGetCreationNamespace(names, &cfgname);
+
+ /* Check we have creation rights in target namespace */
+ aclresult = pg_namespace_aclcheck(namespaceoid, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(namespaceoid));
+
+ /*
+ * loop over the definition list and extract the information we need.
+ */
+ foreach(pl, parameters)
+ {
+ DefElem *defel = (DefElem *) lfirst(pl);
+
+ if (strcmp(defel->defname, "parser") == 0)
+ prsOid = get_ts_parser_oid(defGetQualifiedName(defel), false);
+ else if (strcmp(defel->defname, "copy") == 0)
+ sourceOid = get_ts_config_oid(defGetQualifiedName(defel), false);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("text search configuration parameter \"%s\" not recognized",
+ defel->defname)));
+ }
+
+ if (OidIsValid(sourceOid) && OidIsValid(prsOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot specify both PARSER and COPY options")));
+
+ /* make copied tsconfig available to callers */
+ if (copied && OidIsValid(sourceOid))
+ {
+ ObjectAddressSet(*copied,
+ TSConfigRelationId,
+ sourceOid);
+ }
+
+ /*
+ * Look up source config if given.
+ */
+ if (OidIsValid(sourceOid))
+ {
+ Form_pg_ts_config cfg;
+
+ tup = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(sourceOid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for text search configuration %u",
+ sourceOid);
+
+ cfg = (Form_pg_ts_config) GETSTRUCT(tup);
+
+ /* use source's parser */
+ prsOid = cfg->cfgparser;
+
+ ReleaseSysCache(tup);
+ }
+
+ /*
+ * Validation
+ */
+ if (!OidIsValid(prsOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("text search parser is required")));
+
+ cfgRel = table_open(TSConfigRelationId, RowExclusiveLock);
+
+ /*
+ * Looks good, build tuple and insert
+ */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ cfgOid = GetNewOidWithIndex(cfgRel, TSConfigOidIndexId,
+ Anum_pg_ts_config_oid);
+ values[Anum_pg_ts_config_oid - 1] = ObjectIdGetDatum(cfgOid);
+ namestrcpy(&cname, cfgname);
+ values[Anum_pg_ts_config_cfgname - 1] = NameGetDatum(&cname);
+ values[Anum_pg_ts_config_cfgnamespace - 1] = ObjectIdGetDatum(namespaceoid);
+ values[Anum_pg_ts_config_cfgowner - 1] = ObjectIdGetDatum(GetUserId());
+ values[Anum_pg_ts_config_cfgparser - 1] = ObjectIdGetDatum(prsOid);
+
+ tup = heap_form_tuple(cfgRel->rd_att, values, nulls);
+
+ CatalogTupleInsert(cfgRel, tup);
+
+ if (OidIsValid(sourceOid))
+ {
+ /*
+ * Copy token-dicts map from source config
+ */
+ ScanKeyData skey;
+ SysScanDesc scan;
+ HeapTuple maptup;
+
+ mapRel = table_open(TSConfigMapRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&skey,
+ Anum_pg_ts_config_map_mapcfg,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(sourceOid));
+
+ scan = systable_beginscan(mapRel, TSConfigMapIndexId, true,
+ NULL, 1, &skey);
+
+ while (HeapTupleIsValid((maptup = systable_getnext(scan))))
+ {
+ Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup);
+ HeapTuple newmaptup;
+ Datum mapvalues[Natts_pg_ts_config_map];
+ bool mapnulls[Natts_pg_ts_config_map];
+
+ memset(mapvalues, 0, sizeof(mapvalues));
+ memset(mapnulls, false, sizeof(mapnulls));
+
+ mapvalues[Anum_pg_ts_config_map_mapcfg - 1] = cfgOid;
+ mapvalues[Anum_pg_ts_config_map_maptokentype - 1] = cfgmap->maptokentype;
+ mapvalues[Anum_pg_ts_config_map_mapseqno - 1] = cfgmap->mapseqno;
+ mapvalues[Anum_pg_ts_config_map_mapdict - 1] = cfgmap->mapdict;
+
+ newmaptup = heap_form_tuple(mapRel->rd_att, mapvalues, mapnulls);
+
+ CatalogTupleInsert(mapRel, newmaptup);
+
+ heap_freetuple(newmaptup);
+ }
+
+ systable_endscan(scan);
+ }
+
+ address = makeConfigurationDependencies(tup, false, mapRel);
+
+ /* Post creation hook for new text search configuration */
+ InvokeObjectPostCreateHook(TSConfigRelationId, cfgOid, 0);
+
+ heap_freetuple(tup);
+
+ if (mapRel)
+ table_close(mapRel, RowExclusiveLock);
+ table_close(cfgRel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * Guts of TS configuration deletion.
+ */
+void
+RemoveTSConfigurationById(Oid cfgId)
+{
+ Relation relCfg,
+ relMap;
+ HeapTuple tup;
+ ScanKeyData skey;
+ SysScanDesc scan;
+
+ /* Remove the pg_ts_config entry */
+ relCfg = table_open(TSConfigRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(cfgId));
+
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for text search dictionary %u",
+ cfgId);
+
+ CatalogTupleDelete(relCfg, &tup->t_self);
+
+ ReleaseSysCache(tup);
+
+ table_close(relCfg, RowExclusiveLock);
+
+ /* Remove any pg_ts_config_map entries */
+ relMap = table_open(TSConfigMapRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&skey,
+ Anum_pg_ts_config_map_mapcfg,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(cfgId));
+
+ scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
+ NULL, 1, &skey);
+
+ while (HeapTupleIsValid((tup = systable_getnext(scan))))
+ {
+ CatalogTupleDelete(relMap, &tup->t_self);
+ }
+
+ systable_endscan(scan);
+
+ table_close(relMap, RowExclusiveLock);
+}
+
+/*
+ * ALTER TEXT SEARCH CONFIGURATION - main entry point
+ */
+ObjectAddress
+AlterTSConfiguration(AlterTSConfigurationStmt *stmt)
+{
+ HeapTuple tup;
+ Oid cfgId;
+ Relation relMap;
+ ObjectAddress address;
+
+ /* Find the configuration */
+ tup = GetTSConfigTuple(stmt->cfgname);
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("text search configuration \"%s\" does not exist",
+ NameListToString(stmt->cfgname))));
+
+ cfgId = ((Form_pg_ts_config) GETSTRUCT(tup))->oid;
+
+ /* must be owner */
+ if (!pg_ts_config_ownercheck(cfgId, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TSCONFIGURATION,
+ NameListToString(stmt->cfgname));
+
+ relMap = table_open(TSConfigMapRelationId, RowExclusiveLock);
+
+ /* Add or drop mappings */
+ if (stmt->dicts)
+ MakeConfigurationMapping(stmt, tup, relMap);
+ else if (stmt->tokentype)
+ DropConfigurationMapping(stmt, tup, relMap);
+
+ /* Update dependencies */
+ makeConfigurationDependencies(tup, true, relMap);
+
+ InvokeObjectPostAlterHook(TSConfigRelationId, cfgId, 0);
+
+ ObjectAddressSet(address, TSConfigRelationId, cfgId);
+
+ table_close(relMap, RowExclusiveLock);
+
+ ReleaseSysCache(tup);
+
+ return address;
+}
+
+/*
+ * Translate a list of token type names to an array of token type numbers
+ */
+static int *
+getTokenTypes(Oid prsId, List *tokennames)
+{
+ TSParserCacheEntry *prs = lookup_ts_parser_cache(prsId);
+ LexDescr *list;
+ int *res,
+ i,
+ ntoken;
+ ListCell *tn;
+
+ ntoken = list_length(tokennames);
+ if (ntoken == 0)
+ return NULL;
+ res = (int *) palloc(sizeof(int) * ntoken);
+
+ if (!OidIsValid(prs->lextypeOid))
+ elog(ERROR, "method lextype isn't defined for text search parser %u",
+ prsId);
+
+ /* lextype takes one dummy argument */
+ list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
+ (Datum) 0));
+
+ i = 0;
+ foreach(tn, tokennames)
+ {
+ String *val = lfirst_node(String, tn);
+ bool found = false;
+ int j;
+
+ j = 0;
+ while (list && list[j].lexid)
+ {
+ if (strcmp(strVal(val), list[j].alias) == 0)
+ {
+ res[i] = list[j].lexid;
+ found = true;
+ break;
+ }
+ j++;
+ }
+ if (!found)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("token type \"%s\" does not exist",
+ strVal(val))));
+ i++;
+ }
+
+ return res;
+}
+
+/*
+ * ALTER TEXT SEARCH CONFIGURATION ADD/ALTER MAPPING
+ */
+static void
+MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
+ HeapTuple tup, Relation relMap)
+{
+ Form_pg_ts_config tsform;
+ Oid cfgId;
+ ScanKeyData skey[2];
+ SysScanDesc scan;
+ HeapTuple maptup;
+ int i;
+ int j;
+ Oid prsId;
+ int *tokens,
+ ntoken;
+ Oid *dictIds;
+ int ndict;
+ ListCell *c;
+
+ tsform = (Form_pg_ts_config) GETSTRUCT(tup);
+ cfgId = tsform->oid;
+ prsId = tsform->cfgparser;
+
+ tokens = getTokenTypes(prsId, stmt->tokentype);
+ ntoken = list_length(stmt->tokentype);
+
+ if (stmt->override)
+ {
+ /*
+ * delete maps for tokens if they exist and command was ALTER
+ */
+ for (i = 0; i < ntoken; i++)
+ {
+ ScanKeyInit(&skey[0],
+ Anum_pg_ts_config_map_mapcfg,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(cfgId));
+ ScanKeyInit(&skey[1],
+ Anum_pg_ts_config_map_maptokentype,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(tokens[i]));
+
+ scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
+ NULL, 2, skey);
+
+ while (HeapTupleIsValid((maptup = systable_getnext(scan))))
+ {
+ CatalogTupleDelete(relMap, &maptup->t_self);
+ }
+
+ systable_endscan(scan);
+ }
+ }
+
+ /*
+ * Convert list of dictionary names to array of dict OIDs
+ */
+ ndict = list_length(stmt->dicts);
+ dictIds = (Oid *) palloc(sizeof(Oid) * ndict);
+ i = 0;
+ foreach(c, stmt->dicts)
+ {
+ List *names = (List *) lfirst(c);
+
+ dictIds[i] = get_ts_dict_oid(names, false);
+ i++;
+ }
+
+ if (stmt->replace)
+ {
+ /*
+ * Replace a specific dictionary in existing entries
+ */
+ Oid dictOld = dictIds[0],
+ dictNew = dictIds[1];
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_ts_config_map_mapcfg,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(cfgId));
+
+ scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
+ NULL, 1, skey);
+
+ while (HeapTupleIsValid((maptup = systable_getnext(scan))))
+ {
+ Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup);
+
+ /*
+ * check if it's one of target token types
+ */
+ if (tokens)
+ {
+ bool tokmatch = false;
+
+ for (j = 0; j < ntoken; j++)
+ {
+ if (cfgmap->maptokentype == tokens[j])
+ {
+ tokmatch = true;
+ break;
+ }
+ }
+ if (!tokmatch)
+ continue;
+ }
+
+ /*
+ * replace dictionary if match
+ */
+ if (cfgmap->mapdict == dictOld)
+ {
+ Datum repl_val[Natts_pg_ts_config_map];
+ bool repl_null[Natts_pg_ts_config_map];
+ bool repl_repl[Natts_pg_ts_config_map];
+ HeapTuple newtup;
+
+ memset(repl_val, 0, sizeof(repl_val));
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ repl_val[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(dictNew);
+ repl_repl[Anum_pg_ts_config_map_mapdict - 1] = true;
+
+ newtup = heap_modify_tuple(maptup,
+ RelationGetDescr(relMap),
+ repl_val, repl_null, repl_repl);
+ CatalogTupleUpdate(relMap, &newtup->t_self, newtup);
+ }
+ }
+
+ systable_endscan(scan);
+ }
+ else
+ {
+ /*
+ * Insertion of new entries
+ */
+ for (i = 0; i < ntoken; i++)
+ {
+ for (j = 0; j < ndict; j++)
+ {
+ Datum values[Natts_pg_ts_config_map];
+ bool nulls[Natts_pg_ts_config_map];
+
+ memset(nulls, false, sizeof(nulls));
+ values[Anum_pg_ts_config_map_mapcfg - 1] = ObjectIdGetDatum(cfgId);
+ values[Anum_pg_ts_config_map_maptokentype - 1] = Int32GetDatum(tokens[i]);
+ values[Anum_pg_ts_config_map_mapseqno - 1] = Int32GetDatum(j + 1);
+ values[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(dictIds[j]);
+
+ tup = heap_form_tuple(relMap->rd_att, values, nulls);
+ CatalogTupleInsert(relMap, tup);
+
+ heap_freetuple(tup);
+ }
+ }
+ }
+
+ EventTriggerCollectAlterTSConfig(stmt, cfgId, dictIds, ndict);
+}
+
+/*
+ * ALTER TEXT SEARCH CONFIGURATION DROP MAPPING
+ */
+static void
+DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
+ HeapTuple tup, Relation relMap)
+{
+ Form_pg_ts_config tsform;
+ Oid cfgId;
+ ScanKeyData skey[2];
+ SysScanDesc scan;
+ HeapTuple maptup;
+ int i;
+ Oid prsId;
+ int *tokens;
+ ListCell *c;
+
+ tsform = (Form_pg_ts_config) GETSTRUCT(tup);
+ cfgId = tsform->oid;
+ prsId = tsform->cfgparser;
+
+ tokens = getTokenTypes(prsId, stmt->tokentype);
+
+ i = 0;
+ foreach(c, stmt->tokentype)
+ {
+ String *val = lfirst_node(String, c);
+ bool found = false;
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_ts_config_map_mapcfg,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(cfgId));
+ ScanKeyInit(&skey[1],
+ Anum_pg_ts_config_map_maptokentype,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(tokens[i]));
+
+ scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
+ NULL, 2, skey);
+
+ while (HeapTupleIsValid((maptup = systable_getnext(scan))))
+ {
+ CatalogTupleDelete(relMap, &maptup->t_self);
+ found = true;
+ }
+
+ systable_endscan(scan);
+
+ if (!found)
+ {
+ if (!stmt->missing_ok)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("mapping for token type \"%s\" does not exist",
+ strVal(val))));
+ }
+ else
+ {
+ ereport(NOTICE,
+ (errmsg("mapping for token type \"%s\" does not exist, skipping",
+ strVal(val))));
+ }
+ }
+
+ i++;
+ }
+
+ EventTriggerCollectAlterTSConfig(stmt, cfgId, NULL, 0);
+}
+
+
+/*
+ * Serialize dictionary options, producing a TEXT datum from a List of DefElem
+ *
+ * This is used to form the value stored in pg_ts_dict.dictinitoption.
+ * For the convenience of pg_dump, the output is formatted exactly as it
+ * would need to appear in CREATE TEXT SEARCH DICTIONARY to reproduce the
+ * same options.
+ */
+text *
+serialize_deflist(List *deflist)
+{
+ text *result;
+ StringInfoData buf;
+ ListCell *l;
+
+ initStringInfo(&buf);
+
+ foreach(l, deflist)
+ {
+ DefElem *defel = (DefElem *) lfirst(l);
+ char *val = defGetString(defel);
+
+ appendStringInfo(&buf, "%s = ",
+ quote_identifier(defel->defname));
+
+ /*
+ * If the value is a T_Integer or T_Float, emit it without quotes,
+ * otherwise with quotes. This is essential to allow correct
+ * reconstruction of the node type as well as the value.
+ */
+ if (IsA(defel->arg, Integer) || IsA(defel->arg, Float))
+ appendStringInfoString(&buf, val);
+ else
+ {
+ /* If backslashes appear, force E syntax to quote them safely */
+ if (strchr(val, '\\'))
+ appendStringInfoChar(&buf, ESCAPE_STRING_SYNTAX);
+ appendStringInfoChar(&buf, '\'');
+ while (*val)
+ {
+ char ch = *val++;
+
+ if (SQL_STR_DOUBLE(ch, true))
+ appendStringInfoChar(&buf, ch);
+ appendStringInfoChar(&buf, ch);
+ }
+ appendStringInfoChar(&buf, '\'');
+ }
+ if (lnext(deflist, l) != NULL)
+ appendStringInfoString(&buf, ", ");
+ }
+
+ result = cstring_to_text_with_len(buf.data, buf.len);
+ pfree(buf.data);
+ return result;
+}
+
+/*
+ * Deserialize dictionary options, reconstructing a List of DefElem from TEXT
+ *
+ * This is also used for prsheadline options, so for backward compatibility
+ * we need to accept a few things serialize_deflist() will never emit:
+ * in particular, unquoted and double-quoted strings.
+ */
+List *
+deserialize_deflist(Datum txt)
+{
+ text *in = DatumGetTextPP(txt); /* in case it's toasted */
+ List *result = NIL;
+ int len = VARSIZE_ANY_EXHDR(in);
+ char *ptr,
+ *endptr,
+ *workspace,
+ *wsptr = NULL,
+ *startvalue = NULL;
+ typedef enum
+ {
+ CS_WAITKEY,
+ CS_INKEY,
+ CS_INQKEY,
+ CS_WAITEQ,
+ CS_WAITVALUE,
+ CS_INSQVALUE,
+ CS_INDQVALUE,
+ CS_INWVALUE
+ } ds_state;
+ ds_state state = CS_WAITKEY;
+
+ workspace = (char *) palloc(len + 1); /* certainly enough room */
+ ptr = VARDATA_ANY(in);
+ endptr = ptr + len;
+ for (; ptr < endptr; ptr++)
+ {
+ switch (state)
+ {
+ case CS_WAITKEY:
+ if (isspace((unsigned char) *ptr) || *ptr == ',')
+ continue;
+ if (*ptr == '"')
+ {
+ wsptr = workspace;
+ state = CS_INQKEY;
+ }
+ else
+ {
+ wsptr = workspace;
+ *wsptr++ = *ptr;
+ state = CS_INKEY;
+ }
+ break;
+ case CS_INKEY:
+ if (isspace((unsigned char) *ptr))
+ {
+ *wsptr++ = '\0';
+ state = CS_WAITEQ;
+ }
+ else if (*ptr == '=')
+ {
+ *wsptr++ = '\0';
+ state = CS_WAITVALUE;
+ }
+ else
+ {
+ *wsptr++ = *ptr;
+ }
+ break;
+ case CS_INQKEY:
+ if (*ptr == '"')
+ {
+ if (ptr + 1 < endptr && ptr[1] == '"')
+ {
+ /* copy only one of the two quotes */
+ *wsptr++ = *ptr++;
+ }
+ else
+ {
+ *wsptr++ = '\0';
+ state = CS_WAITEQ;
+ }
+ }
+ else
+ {
+ *wsptr++ = *ptr;
+ }
+ break;
+ case CS_WAITEQ:
+ if (*ptr == '=')
+ state = CS_WAITVALUE;
+ else if (!isspace((unsigned char) *ptr))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid parameter list format: \"%s\"",
+ text_to_cstring(in))));
+ break;
+ case CS_WAITVALUE:
+ if (*ptr == '\'')
+ {
+ startvalue = wsptr;
+ state = CS_INSQVALUE;
+ }
+ else if (*ptr == 'E' && ptr + 1 < endptr && ptr[1] == '\'')
+ {
+ ptr++;
+ startvalue = wsptr;
+ state = CS_INSQVALUE;
+ }
+ else if (*ptr == '"')
+ {
+ startvalue = wsptr;
+ state = CS_INDQVALUE;
+ }
+ else if (!isspace((unsigned char) *ptr))
+ {
+ startvalue = wsptr;
+ *wsptr++ = *ptr;
+ state = CS_INWVALUE;
+ }
+ break;
+ case CS_INSQVALUE:
+ if (*ptr == '\'')
+ {
+ if (ptr + 1 < endptr && ptr[1] == '\'')
+ {
+ /* copy only one of the two quotes */
+ *wsptr++ = *ptr++;
+ }
+ else
+ {
+ *wsptr++ = '\0';
+ result = lappend(result,
+ buildDefItem(workspace,
+ startvalue,
+ true));
+ state = CS_WAITKEY;
+ }
+ }
+ else if (*ptr == '\\')
+ {
+ if (ptr + 1 < endptr && ptr[1] == '\\')
+ {
+ /* copy only one of the two backslashes */
+ *wsptr++ = *ptr++;
+ }
+ else
+ *wsptr++ = *ptr;
+ }
+ else
+ {
+ *wsptr++ = *ptr;
+ }
+ break;
+ case CS_INDQVALUE:
+ if (*ptr == '"')
+ {
+ if (ptr + 1 < endptr && ptr[1] == '"')
+ {
+ /* copy only one of the two quotes */
+ *wsptr++ = *ptr++;
+ }
+ else
+ {
+ *wsptr++ = '\0';
+ result = lappend(result,
+ buildDefItem(workspace,
+ startvalue,
+ true));
+ state = CS_WAITKEY;
+ }
+ }
+ else
+ {
+ *wsptr++ = *ptr;
+ }
+ break;
+ case CS_INWVALUE:
+ if (*ptr == ',' || isspace((unsigned char) *ptr))
+ {
+ *wsptr++ = '\0';
+ result = lappend(result,
+ buildDefItem(workspace,
+ startvalue,
+ false));
+ state = CS_WAITKEY;
+ }
+ else
+ {
+ *wsptr++ = *ptr;
+ }
+ break;
+ default:
+ elog(ERROR, "unrecognized deserialize_deflist state: %d",
+ state);
+ }
+ }
+
+ if (state == CS_INWVALUE)
+ {
+ *wsptr++ = '\0';
+ result = lappend(result,
+ buildDefItem(workspace,
+ startvalue,
+ false));
+ }
+ else if (state != CS_WAITKEY)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid parameter list format: \"%s\"",
+ text_to_cstring(in))));
+
+ pfree(workspace);
+
+ return result;
+}
+
+/*
+ * Build one DefElem for deserialize_deflist
+ */
+static DefElem *
+buildDefItem(const char *name, const char *val, bool was_quoted)
+{
+ /* If input was quoted, always emit as string */
+ if (!was_quoted && val[0] != '\0')
+ {
+ int v;
+ char *endptr;
+
+ /* Try to parse as an integer */
+ errno = 0;
+ v = strtoint(val, &endptr, 10);
+ if (errno == 0 && *endptr == '\0')
+ return makeDefElem(pstrdup(name),
+ (Node *) makeInteger(v),
+ -1);
+ /* Nope, how about as a float? */
+ errno = 0;
+ (void) strtod(val, &endptr);
+ if (errno == 0 && *endptr == '\0')
+ return makeDefElem(pstrdup(name),
+ (Node *) makeFloat(pstrdup(val)),
+ -1);
+
+ if (strcmp(val, "true") == 0)
+ return makeDefElem(pstrdup(name),
+ (Node *) makeBoolean(true),
+ -1);
+ if (strcmp(val, "false") == 0)
+ return makeDefElem(pstrdup(name),
+ (Node *) makeBoolean(false),
+ -1);
+ }
+ /* Just make it a string */
+ return makeDefElem(pstrdup(name),
+ (Node *) makeString(pstrdup(val)),
+ -1);
+}
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
new file mode 100644
index 0000000..9b92b04
--- /dev/null
+++ b/src/backend/commands/typecmds.c
@@ -0,0 +1,4495 @@
+/*-------------------------------------------------------------------------
+ *
+ * typecmds.c
+ * Routines for SQL commands that manipulate types (and domains).
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/typecmds.c
+ *
+ * DESCRIPTION
+ * The "DefineFoo" routines take the parse tree and pick out the
+ * appropriate arguments/flags, passing the results to the
+ * corresponding "FooDefine" routines (in src/catalog) that do
+ * the actual catalog-munging. These routines also verify permission
+ * of the user to execute the command.
+ *
+ * NOTES
+ * These things must be defined and committed in the following order:
+ * "create function":
+ * input/output, recv/send functions
+ * "create type":
+ * type
+ * "create operator":
+ * operators
+ *
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "catalog/binary_upgrade.h"
+#include "catalog/catalog.h"
+#include "catalog/heap.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_cast.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_constraint.h"
+#include "catalog/pg_depend.h"
+#include "catalog/pg_enum.h"
+#include "catalog/pg_language.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_range.h"
+#include "catalog/pg_type.h"
+#include "commands/defrem.h"
+#include "commands/tablecmds.h"
+#include "commands/typecmds.h"
+#include "executor/executor.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "optimizer/optimizer.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_func.h"
+#include "parser/parse_type.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/ruleutils.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+
+
+/* result structure for get_rels_with_domain() */
+typedef struct
+{
+ Relation rel; /* opened and locked relation */
+ int natts; /* number of attributes of interest */
+ int *atts; /* attribute numbers */
+ /* atts[] is of allocated length RelationGetNumberOfAttributes(rel) */
+} RelToCheck;
+
+/* parameter structure for AlterTypeRecurse() */
+typedef struct
+{
+ /* Flags indicating which type attributes to update */
+ bool updateStorage;
+ bool updateReceive;
+ bool updateSend;
+ bool updateTypmodin;
+ bool updateTypmodout;
+ bool updateAnalyze;
+ bool updateSubscript;
+ /* New values for relevant attributes */
+ char storage;
+ Oid receiveOid;
+ Oid sendOid;
+ Oid typmodinOid;
+ Oid typmodoutOid;
+ Oid analyzeOid;
+ Oid subscriptOid;
+} AlterTypeRecurseParams;
+
+/* Potentially set by pg_upgrade_support functions */
+Oid binary_upgrade_next_array_pg_type_oid = InvalidOid;
+Oid binary_upgrade_next_mrng_pg_type_oid = InvalidOid;
+Oid binary_upgrade_next_mrng_array_pg_type_oid = InvalidOid;
+
+static void makeRangeConstructors(const char *name, Oid namespace,
+ Oid rangeOid, Oid subtype);
+static void makeMultirangeConstructors(const char *name, Oid namespace,
+ Oid multirangeOid, Oid rangeOid,
+ Oid rangeArrayOid, Oid *castFuncOid);
+static Oid findTypeInputFunction(List *procname, Oid typeOid);
+static Oid findTypeOutputFunction(List *procname, Oid typeOid);
+static Oid findTypeReceiveFunction(List *procname, Oid typeOid);
+static Oid findTypeSendFunction(List *procname, Oid typeOid);
+static Oid findTypeTypmodinFunction(List *procname);
+static Oid findTypeTypmodoutFunction(List *procname);
+static Oid findTypeAnalyzeFunction(List *procname, Oid typeOid);
+static Oid findTypeSubscriptingFunction(List *procname, Oid typeOid);
+static Oid findRangeSubOpclass(List *opcname, Oid subtype);
+static Oid findRangeCanonicalFunction(List *procname, Oid typeOid);
+static Oid findRangeSubtypeDiffFunction(List *procname, Oid subtype);
+static void validateDomainConstraint(Oid domainoid, char *ccbin);
+static List *get_rels_with_domain(Oid domainOid, LOCKMODE lockmode);
+static void checkEnumOwner(HeapTuple tup);
+static char *domainAddConstraint(Oid domainOid, Oid domainNamespace,
+ Oid baseTypeOid,
+ int typMod, Constraint *constr,
+ const char *domainName, ObjectAddress *constrAddr);
+static Node *replace_domain_constraint_value(ParseState *pstate,
+ ColumnRef *cref);
+static void AlterTypeRecurse(Oid typeOid, bool isImplicitArray,
+ HeapTuple tup, Relation catalog,
+ AlterTypeRecurseParams *atparams);
+
+
+/*
+ * DefineType
+ * Registers a new base type.
+ */
+ObjectAddress
+DefineType(ParseState *pstate, List *names, List *parameters)
+{
+ char *typeName;
+ Oid typeNamespace;
+ int16 internalLength = -1; /* default: variable-length */
+ List *inputName = NIL;
+ List *outputName = NIL;
+ List *receiveName = NIL;
+ List *sendName = NIL;
+ List *typmodinName = NIL;
+ List *typmodoutName = NIL;
+ List *analyzeName = NIL;
+ List *subscriptName = NIL;
+ char category = TYPCATEGORY_USER;
+ bool preferred = false;
+ char delimiter = DEFAULT_TYPDELIM;
+ Oid elemType = InvalidOid;
+ char *defaultValue = NULL;
+ bool byValue = false;
+ char alignment = TYPALIGN_INT; /* default alignment */
+ char storage = TYPSTORAGE_PLAIN; /* default TOAST storage method */
+ Oid collation = InvalidOid;
+ DefElem *likeTypeEl = NULL;
+ DefElem *internalLengthEl = NULL;
+ DefElem *inputNameEl = NULL;
+ DefElem *outputNameEl = NULL;
+ DefElem *receiveNameEl = NULL;
+ DefElem *sendNameEl = NULL;
+ DefElem *typmodinNameEl = NULL;
+ DefElem *typmodoutNameEl = NULL;
+ DefElem *analyzeNameEl = NULL;
+ DefElem *subscriptNameEl = NULL;
+ DefElem *categoryEl = NULL;
+ DefElem *preferredEl = NULL;
+ DefElem *delimiterEl = NULL;
+ DefElem *elemTypeEl = NULL;
+ DefElem *defaultValueEl = NULL;
+ DefElem *byValueEl = NULL;
+ DefElem *alignmentEl = NULL;
+ DefElem *storageEl = NULL;
+ DefElem *collatableEl = NULL;
+ Oid inputOid;
+ Oid outputOid;
+ Oid receiveOid = InvalidOid;
+ Oid sendOid = InvalidOid;
+ Oid typmodinOid = InvalidOid;
+ Oid typmodoutOid = InvalidOid;
+ Oid analyzeOid = InvalidOid;
+ Oid subscriptOid = InvalidOid;
+ char *array_type;
+ Oid array_oid;
+ Oid typoid;
+ ListCell *pl;
+ ObjectAddress address;
+
+ /*
+ * As of Postgres 8.4, we require superuser privilege to create a base
+ * type. This is simple paranoia: there are too many ways to mess up the
+ * system with an incorrect type definition (for instance, representation
+ * parameters that don't match what the C code expects). In practice it
+ * takes superuser privilege to create the I/O functions, and so the
+ * former requirement that you own the I/O functions pretty much forced
+ * superuserness anyway. We're just making doubly sure here.
+ *
+ * XXX re-enable NOT_USED code sections below if you remove this test.
+ */
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to create a base type")));
+
+ /* Convert list of names to a name and namespace */
+ typeNamespace = QualifiedNameGetCreationNamespace(names, &typeName);
+
+#ifdef NOT_USED
+ /* XXX this is unnecessary given the superuser check above */
+ /* Check we have creation rights in target namespace */
+ aclresult = pg_namespace_aclcheck(typeNamespace, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(typeNamespace));
+#endif
+
+ /*
+ * Look to see if type already exists.
+ */
+ typoid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid,
+ CStringGetDatum(typeName),
+ ObjectIdGetDatum(typeNamespace));
+
+ /*
+ * If it's not a shell, see if it's an autogenerated array type, and if so
+ * rename it out of the way.
+ */
+ if (OidIsValid(typoid) && get_typisdefined(typoid))
+ {
+ if (moveArrayTypeName(typoid, typeName, typeNamespace))
+ typoid = InvalidOid;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("type \"%s\" already exists", typeName)));
+ }
+
+ /*
+ * If this command is a parameterless CREATE TYPE, then we're just here to
+ * make a shell type, so do that (or fail if there already is a shell).
+ */
+ if (parameters == NIL)
+ {
+ if (OidIsValid(typoid))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("type \"%s\" already exists", typeName)));
+
+ address = TypeShellMake(typeName, typeNamespace, GetUserId());
+ return address;
+ }
+
+ /*
+ * Otherwise, we must already have a shell type, since there is no other
+ * way that the I/O functions could have been created.
+ */
+ if (!OidIsValid(typoid))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("type \"%s\" does not exist", typeName),
+ errhint("Create the type as a shell type, then create its I/O functions, then do a full CREATE TYPE.")));
+
+ /* Extract the parameters from the parameter list */
+ foreach(pl, parameters)
+ {
+ DefElem *defel = (DefElem *) lfirst(pl);
+ DefElem **defelp;
+
+ if (strcmp(defel->defname, "like") == 0)
+ defelp = &likeTypeEl;
+ else if (strcmp(defel->defname, "internallength") == 0)
+ defelp = &internalLengthEl;
+ else if (strcmp(defel->defname, "input") == 0)
+ defelp = &inputNameEl;
+ else if (strcmp(defel->defname, "output") == 0)
+ defelp = &outputNameEl;
+ else if (strcmp(defel->defname, "receive") == 0)
+ defelp = &receiveNameEl;
+ else if (strcmp(defel->defname, "send") == 0)
+ defelp = &sendNameEl;
+ else if (strcmp(defel->defname, "typmod_in") == 0)
+ defelp = &typmodinNameEl;
+ else if (strcmp(defel->defname, "typmod_out") == 0)
+ defelp = &typmodoutNameEl;
+ else if (strcmp(defel->defname, "analyze") == 0 ||
+ strcmp(defel->defname, "analyse") == 0)
+ defelp = &analyzeNameEl;
+ else if (strcmp(defel->defname, "subscript") == 0)
+ defelp = &subscriptNameEl;
+ else if (strcmp(defel->defname, "category") == 0)
+ defelp = &categoryEl;
+ else if (strcmp(defel->defname, "preferred") == 0)
+ defelp = &preferredEl;
+ else if (strcmp(defel->defname, "delimiter") == 0)
+ defelp = &delimiterEl;
+ else if (strcmp(defel->defname, "element") == 0)
+ defelp = &elemTypeEl;
+ else if (strcmp(defel->defname, "default") == 0)
+ defelp = &defaultValueEl;
+ else if (strcmp(defel->defname, "passedbyvalue") == 0)
+ defelp = &byValueEl;
+ else if (strcmp(defel->defname, "alignment") == 0)
+ defelp = &alignmentEl;
+ else if (strcmp(defel->defname, "storage") == 0)
+ defelp = &storageEl;
+ else if (strcmp(defel->defname, "collatable") == 0)
+ defelp = &collatableEl;
+ else
+ {
+ /* WARNING, not ERROR, for historical backwards-compatibility */
+ ereport(WARNING,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("type attribute \"%s\" not recognized",
+ defel->defname),
+ parser_errposition(pstate, defel->location)));
+ continue;
+ }
+ if (*defelp != NULL)
+ errorConflictingDefElem(defel, pstate);
+ *defelp = defel;
+ }
+
+ /*
+ * Now interpret the options; we do this separately so that LIKE can be
+ * overridden by other options regardless of the ordering in the parameter
+ * list.
+ */
+ if (likeTypeEl)
+ {
+ Type likeType;
+ Form_pg_type likeForm;
+
+ likeType = typenameType(NULL, defGetTypeName(likeTypeEl), NULL);
+ likeForm = (Form_pg_type) GETSTRUCT(likeType);
+ internalLength = likeForm->typlen;
+ byValue = likeForm->typbyval;
+ alignment = likeForm->typalign;
+ storage = likeForm->typstorage;
+ ReleaseSysCache(likeType);
+ }
+ if (internalLengthEl)
+ internalLength = defGetTypeLength(internalLengthEl);
+ if (inputNameEl)
+ inputName = defGetQualifiedName(inputNameEl);
+ if (outputNameEl)
+ outputName = defGetQualifiedName(outputNameEl);
+ if (receiveNameEl)
+ receiveName = defGetQualifiedName(receiveNameEl);
+ if (sendNameEl)
+ sendName = defGetQualifiedName(sendNameEl);
+ if (typmodinNameEl)
+ typmodinName = defGetQualifiedName(typmodinNameEl);
+ if (typmodoutNameEl)
+ typmodoutName = defGetQualifiedName(typmodoutNameEl);
+ if (analyzeNameEl)
+ analyzeName = defGetQualifiedName(analyzeNameEl);
+ if (subscriptNameEl)
+ subscriptName = defGetQualifiedName(subscriptNameEl);
+ if (categoryEl)
+ {
+ char *p = defGetString(categoryEl);
+
+ category = p[0];
+ /* restrict to non-control ASCII */
+ if (category < 32 || category > 126)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid type category \"%s\": must be simple ASCII",
+ p)));
+ }
+ if (preferredEl)
+ preferred = defGetBoolean(preferredEl);
+ if (delimiterEl)
+ {
+ char *p = defGetString(delimiterEl);
+
+ delimiter = p[0];
+ /* XXX shouldn't we restrict the delimiter? */
+ }
+ if (elemTypeEl)
+ {
+ elemType = typenameTypeId(NULL, defGetTypeName(elemTypeEl));
+ /* disallow arrays of pseudotypes */
+ if (get_typtype(elemType) == TYPTYPE_PSEUDO)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("array element type cannot be %s",
+ format_type_be(elemType))));
+ }
+ if (defaultValueEl)
+ defaultValue = defGetString(defaultValueEl);
+ if (byValueEl)
+ byValue = defGetBoolean(byValueEl);
+ if (alignmentEl)
+ {
+ char *a = defGetString(alignmentEl);
+
+ /*
+ * Note: if argument was an unquoted identifier, parser will have
+ * applied translations to it, so be prepared to recognize translated
+ * type names as well as the nominal form.
+ */
+ if (pg_strcasecmp(a, "double") == 0 ||
+ pg_strcasecmp(a, "float8") == 0 ||
+ pg_strcasecmp(a, "pg_catalog.float8") == 0)
+ alignment = TYPALIGN_DOUBLE;
+ else if (pg_strcasecmp(a, "int4") == 0 ||
+ pg_strcasecmp(a, "pg_catalog.int4") == 0)
+ alignment = TYPALIGN_INT;
+ else if (pg_strcasecmp(a, "int2") == 0 ||
+ pg_strcasecmp(a, "pg_catalog.int2") == 0)
+ alignment = TYPALIGN_SHORT;
+ else if (pg_strcasecmp(a, "char") == 0 ||
+ pg_strcasecmp(a, "pg_catalog.bpchar") == 0)
+ alignment = TYPALIGN_CHAR;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("alignment \"%s\" not recognized", a)));
+ }
+ if (storageEl)
+ {
+ char *a = defGetString(storageEl);
+
+ if (pg_strcasecmp(a, "plain") == 0)
+ storage = TYPSTORAGE_PLAIN;
+ else if (pg_strcasecmp(a, "external") == 0)
+ storage = TYPSTORAGE_EXTERNAL;
+ else if (pg_strcasecmp(a, "extended") == 0)
+ storage = TYPSTORAGE_EXTENDED;
+ else if (pg_strcasecmp(a, "main") == 0)
+ storage = TYPSTORAGE_MAIN;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("storage \"%s\" not recognized", a)));
+ }
+ if (collatableEl)
+ collation = defGetBoolean(collatableEl) ? DEFAULT_COLLATION_OID : InvalidOid;
+
+ /*
+ * make sure we have our required definitions
+ */
+ if (inputName == NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("type input function must be specified")));
+ if (outputName == NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("type output function must be specified")));
+
+ if (typmodinName == NIL && typmodoutName != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("type modifier output function is useless without a type modifier input function")));
+
+ /*
+ * Convert I/O proc names to OIDs
+ */
+ inputOid = findTypeInputFunction(inputName, typoid);
+ outputOid = findTypeOutputFunction(outputName, typoid);
+ if (receiveName)
+ receiveOid = findTypeReceiveFunction(receiveName, typoid);
+ if (sendName)
+ sendOid = findTypeSendFunction(sendName, typoid);
+
+ /*
+ * Convert typmodin/out function proc names to OIDs.
+ */
+ if (typmodinName)
+ typmodinOid = findTypeTypmodinFunction(typmodinName);
+ if (typmodoutName)
+ typmodoutOid = findTypeTypmodoutFunction(typmodoutName);
+
+ /*
+ * Convert analysis function proc name to an OID. If no analysis function
+ * is specified, we'll use zero to select the built-in default algorithm.
+ */
+ if (analyzeName)
+ analyzeOid = findTypeAnalyzeFunction(analyzeName, typoid);
+
+ /*
+ * Likewise look up the subscripting function if any. If it is not
+ * specified, but a typelem is specified, allow that if
+ * raw_array_subscript_handler can be used. (This is for backwards
+ * compatibility; maybe someday we should throw an error instead.)
+ */
+ if (subscriptName)
+ subscriptOid = findTypeSubscriptingFunction(subscriptName, typoid);
+ else if (OidIsValid(elemType))
+ {
+ if (internalLength > 0 && !byValue && get_typlen(elemType) > 0)
+ subscriptOid = F_RAW_ARRAY_SUBSCRIPT_HANDLER;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("element type cannot be specified without a subscripting function")));
+ }
+
+ /*
+ * Check permissions on functions. We choose to require the creator/owner
+ * of a type to also own the underlying functions. Since creating a type
+ * is tantamount to granting public execute access on the functions, the
+ * minimum sane check would be for execute-with-grant-option. But we
+ * don't have a way to make the type go away if the grant option is
+ * revoked, so ownership seems better.
+ *
+ * XXX For now, this is all unnecessary given the superuser check above.
+ * If we ever relax that, these calls likely should be moved into
+ * findTypeInputFunction et al, where they could be shared by AlterType.
+ */
+#ifdef NOT_USED
+ if (inputOid && !pg_proc_ownercheck(inputOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+ NameListToString(inputName));
+ if (outputOid && !pg_proc_ownercheck(outputOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+ NameListToString(outputName));
+ if (receiveOid && !pg_proc_ownercheck(receiveOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+ NameListToString(receiveName));
+ if (sendOid && !pg_proc_ownercheck(sendOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+ NameListToString(sendName));
+ if (typmodinOid && !pg_proc_ownercheck(typmodinOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+ NameListToString(typmodinName));
+ if (typmodoutOid && !pg_proc_ownercheck(typmodoutOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+ NameListToString(typmodoutName));
+ if (analyzeOid && !pg_proc_ownercheck(analyzeOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+ NameListToString(analyzeName));
+ if (subscriptOid && !pg_proc_ownercheck(subscriptOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION,
+ NameListToString(subscriptName));
+#endif
+
+ /*
+ * OK, we're done checking, time to make the type. We must assign the
+ * array type OID ahead of calling TypeCreate, since the base type and
+ * array type each refer to the other.
+ */
+ array_oid = AssignTypeArrayOid();
+
+ /*
+ * now have TypeCreate do all the real work.
+ *
+ * Note: the pg_type.oid is stored in user tables as array elements (base
+ * types) in ArrayType and in composite types in DatumTupleFields. This
+ * oid must be preserved by binary upgrades.
+ */
+ address =
+ TypeCreate(InvalidOid, /* no predetermined type OID */
+ typeName, /* type name */
+ typeNamespace, /* namespace */
+ InvalidOid, /* relation oid (n/a here) */
+ 0, /* relation kind (ditto) */
+ GetUserId(), /* owner's ID */
+ internalLength, /* internal size */
+ TYPTYPE_BASE, /* type-type (base type) */
+ category, /* type-category */
+ preferred, /* is it a preferred type? */
+ delimiter, /* array element delimiter */
+ inputOid, /* input procedure */
+ outputOid, /* output procedure */
+ receiveOid, /* receive procedure */
+ sendOid, /* send procedure */
+ typmodinOid, /* typmodin procedure */
+ typmodoutOid, /* typmodout procedure */
+ analyzeOid, /* analyze procedure */
+ subscriptOid, /* subscript procedure */
+ elemType, /* element type ID */
+ false, /* this is not an implicit array type */
+ array_oid, /* array type we are about to create */
+ InvalidOid, /* base type ID (only for domains) */
+ defaultValue, /* default type value */
+ NULL, /* no binary form available */
+ byValue, /* passed by value */
+ alignment, /* required alignment */
+ storage, /* TOAST strategy */
+ -1, /* typMod (Domains only) */
+ 0, /* Array Dimensions of typbasetype */
+ false, /* Type NOT NULL */
+ collation); /* type's collation */
+ Assert(typoid == address.objectId);
+
+ /*
+ * Create the array type that goes with it.
+ */
+ array_type = makeArrayTypeName(typeName, typeNamespace);
+
+ /* alignment must be TYPALIGN_INT or TYPALIGN_DOUBLE for arrays */
+ alignment = (alignment == TYPALIGN_DOUBLE) ? TYPALIGN_DOUBLE : TYPALIGN_INT;
+
+ TypeCreate(array_oid, /* force assignment of this type OID */
+ array_type, /* type name */
+ typeNamespace, /* namespace */
+ InvalidOid, /* relation oid (n/a here) */
+ 0, /* relation kind (ditto) */
+ GetUserId(), /* owner's ID */
+ -1, /* internal size (always varlena) */
+ TYPTYPE_BASE, /* type-type (base type) */
+ TYPCATEGORY_ARRAY, /* type-category (array) */
+ false, /* array types are never preferred */
+ delimiter, /* array element delimiter */
+ F_ARRAY_IN, /* input procedure */
+ F_ARRAY_OUT, /* output procedure */
+ F_ARRAY_RECV, /* receive procedure */
+ F_ARRAY_SEND, /* send procedure */
+ typmodinOid, /* typmodin procedure */
+ typmodoutOid, /* typmodout procedure */
+ F_ARRAY_TYPANALYZE, /* analyze procedure */
+ F_ARRAY_SUBSCRIPT_HANDLER, /* array subscript procedure */
+ typoid, /* element type ID */
+ true, /* yes this is an array type */
+ InvalidOid, /* no further array type */
+ InvalidOid, /* base type ID */
+ NULL, /* never a default type value */
+ NULL, /* binary default isn't sent either */
+ false, /* never passed by value */
+ alignment, /* see above */
+ TYPSTORAGE_EXTENDED, /* ARRAY is always toastable */
+ -1, /* typMod (Domains only) */
+ 0, /* Array dimensions of typbasetype */
+ false, /* Type NOT NULL */
+ collation); /* type's collation */
+
+ pfree(array_type);
+
+ return address;
+}
+
+/*
+ * Guts of type deletion.
+ */
+void
+RemoveTypeById(Oid typeOid)
+{
+ Relation relation;
+ HeapTuple tup;
+
+ relation = table_open(TypeRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeOid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", typeOid);
+
+ CatalogTupleDelete(relation, &tup->t_self);
+
+ /*
+ * If it is an enum, delete the pg_enum entries too; we don't bother with
+ * making dependency entries for those, so it has to be done "by hand"
+ * here.
+ */
+ if (((Form_pg_type) GETSTRUCT(tup))->typtype == TYPTYPE_ENUM)
+ EnumValuesDelete(typeOid);
+
+ /*
+ * If it is a range type, delete the pg_range entry too; we don't bother
+ * with making a dependency entry for that, so it has to be done "by hand"
+ * here.
+ */
+ if (((Form_pg_type) GETSTRUCT(tup))->typtype == TYPTYPE_RANGE)
+ RangeDelete(typeOid);
+
+ ReleaseSysCache(tup);
+
+ table_close(relation, RowExclusiveLock);
+}
+
+
+/*
+ * DefineDomain
+ * Registers a new domain.
+ */
+ObjectAddress
+DefineDomain(CreateDomainStmt *stmt)
+{
+ char *domainName;
+ char *domainArrayName;
+ Oid domainNamespace;
+ AclResult aclresult;
+ int16 internalLength;
+ Oid inputProcedure;
+ Oid outputProcedure;
+ Oid receiveProcedure;
+ Oid sendProcedure;
+ Oid analyzeProcedure;
+ bool byValue;
+ char category;
+ char delimiter;
+ char alignment;
+ char storage;
+ char typtype;
+ Datum datum;
+ bool isnull;
+ char *defaultValue = NULL;
+ char *defaultValueBin = NULL;
+ bool saw_default = false;
+ bool typNotNull = false;
+ bool nullDefined = false;
+ int32 typNDims = list_length(stmt->typeName->arrayBounds);
+ HeapTuple typeTup;
+ List *schema = stmt->constraints;
+ ListCell *listptr;
+ Oid basetypeoid;
+ Oid old_type_oid;
+ Oid domaincoll;
+ Oid domainArrayOid;
+ Form_pg_type baseType;
+ int32 basetypeMod;
+ Oid baseColl;
+ ObjectAddress address;
+
+ /* Convert list of names to a name and namespace */
+ domainNamespace = QualifiedNameGetCreationNamespace(stmt->domainname,
+ &domainName);
+
+ /* Check we have creation rights in target namespace */
+ aclresult = pg_namespace_aclcheck(domainNamespace, GetUserId(),
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(domainNamespace));
+
+ /*
+ * Check for collision with an existing type name. If there is one and
+ * it's an autogenerated array, we can rename it out of the way.
+ */
+ old_type_oid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid,
+ CStringGetDatum(domainName),
+ ObjectIdGetDatum(domainNamespace));
+ if (OidIsValid(old_type_oid))
+ {
+ if (!moveArrayTypeName(old_type_oid, domainName, domainNamespace))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("type \"%s\" already exists", domainName)));
+ }
+
+ /*
+ * Look up the base type.
+ */
+ typeTup = typenameType(NULL, stmt->typeName, &basetypeMod);
+ baseType = (Form_pg_type) GETSTRUCT(typeTup);
+ basetypeoid = baseType->oid;
+
+ /*
+ * Base type must be a plain base type, a composite type, another domain,
+ * an enum or a range type. Domains over pseudotypes would create a
+ * security hole. (It would be shorter to code this to just check for
+ * pseudotypes; but it seems safer to call out the specific typtypes that
+ * are supported, rather than assume that all future typtypes would be
+ * automatically supported.)
+ */
+ typtype = baseType->typtype;
+ if (typtype != TYPTYPE_BASE &&
+ typtype != TYPTYPE_COMPOSITE &&
+ typtype != TYPTYPE_DOMAIN &&
+ typtype != TYPTYPE_ENUM &&
+ typtype != TYPTYPE_RANGE &&
+ typtype != TYPTYPE_MULTIRANGE)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("\"%s\" is not a valid base type for a domain",
+ TypeNameToString(stmt->typeName))));
+
+ aclresult = pg_type_aclcheck(basetypeoid, GetUserId(), ACL_USAGE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error_type(aclresult, basetypeoid);
+
+ /*
+ * Collect the properties of the new domain. Some are inherited from the
+ * base type, some are not. If you change any of this inheritance
+ * behavior, be sure to update AlterTypeRecurse() to match!
+ */
+
+ /*
+ * Identify the collation if any
+ */
+ baseColl = baseType->typcollation;
+ if (stmt->collClause)
+ domaincoll = get_collation_oid(stmt->collClause->collname, false);
+ else
+ domaincoll = baseColl;
+
+ /* Complain if COLLATE is applied to an uncollatable type */
+ if (OidIsValid(domaincoll) && !OidIsValid(baseColl))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("collations are not supported by type %s",
+ format_type_be(basetypeoid))));
+
+ /* passed by value */
+ byValue = baseType->typbyval;
+
+ /* Required Alignment */
+ alignment = baseType->typalign;
+
+ /* TOAST Strategy */
+ storage = baseType->typstorage;
+
+ /* Storage Length */
+ internalLength = baseType->typlen;
+
+ /* Type Category */
+ category = baseType->typcategory;
+
+ /* Array element Delimiter */
+ delimiter = baseType->typdelim;
+
+ /* I/O Functions */
+ inputProcedure = F_DOMAIN_IN;
+ outputProcedure = baseType->typoutput;
+ receiveProcedure = F_DOMAIN_RECV;
+ sendProcedure = baseType->typsend;
+
+ /* Domains never accept typmods, so no typmodin/typmodout needed */
+
+ /* Analysis function */
+ analyzeProcedure = baseType->typanalyze;
+
+ /*
+ * Domains don't need a subscript function, since they are not
+ * subscriptable on their own. If the base type is subscriptable, the
+ * parser will reduce the type to the base type before subscripting.
+ */
+
+ /* Inherited default value */
+ datum = SysCacheGetAttr(TYPEOID, typeTup,
+ Anum_pg_type_typdefault, &isnull);
+ if (!isnull)
+ defaultValue = TextDatumGetCString(datum);
+
+ /* Inherited default binary value */
+ datum = SysCacheGetAttr(TYPEOID, typeTup,
+ Anum_pg_type_typdefaultbin, &isnull);
+ if (!isnull)
+ defaultValueBin = TextDatumGetCString(datum);
+
+ /*
+ * Run through constraints manually to avoid the additional processing
+ * conducted by DefineRelation() and friends.
+ */
+ foreach(listptr, schema)
+ {
+ Constraint *constr = lfirst(listptr);
+
+ if (!IsA(constr, Constraint))
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(constr));
+ switch (constr->contype)
+ {
+ case CONSTR_DEFAULT:
+
+ /*
+ * The inherited default value may be overridden by the user
+ * with the DEFAULT <expr> clause ... but only once.
+ */
+ if (saw_default)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("multiple default expressions")));
+ saw_default = true;
+
+ if (constr->raw_expr)
+ {
+ ParseState *pstate;
+ Node *defaultExpr;
+
+ /* Create a dummy ParseState for transformExpr */
+ pstate = make_parsestate(NULL);
+
+ /*
+ * Cook the constr->raw_expr into an expression. Note:
+ * name is strictly for error message
+ */
+ defaultExpr = cookDefault(pstate, constr->raw_expr,
+ basetypeoid,
+ basetypeMod,
+ domainName,
+ 0);
+
+ /*
+ * If the expression is just a NULL constant, we treat it
+ * like not having a default.
+ *
+ * Note that if the basetype is another domain, we'll see
+ * a CoerceToDomain expr here and not discard the default.
+ * This is critical because the domain default needs to be
+ * retained to override any default that the base domain
+ * might have.
+ */
+ if (defaultExpr == NULL ||
+ (IsA(defaultExpr, Const) &&
+ ((Const *) defaultExpr)->constisnull))
+ {
+ defaultValue = NULL;
+ defaultValueBin = NULL;
+ }
+ else
+ {
+ /*
+ * Expression must be stored as a nodeToString result,
+ * but we also require a valid textual representation
+ * (mainly to make life easier for pg_dump).
+ */
+ defaultValue =
+ deparse_expression(defaultExpr,
+ NIL, false, false);
+ defaultValueBin = nodeToString(defaultExpr);
+ }
+ }
+ else
+ {
+ /* No default (can this still happen?) */
+ defaultValue = NULL;
+ defaultValueBin = NULL;
+ }
+ break;
+
+ case CONSTR_NOTNULL:
+ if (nullDefined && !typNotNull)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting NULL/NOT NULL constraints")));
+ typNotNull = true;
+ nullDefined = true;
+ break;
+
+ case CONSTR_NULL:
+ if (nullDefined && typNotNull)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting NULL/NOT NULL constraints")));
+ typNotNull = false;
+ nullDefined = true;
+ break;
+
+ case CONSTR_CHECK:
+
+ /*
+ * Check constraints are handled after domain creation, as
+ * they require the Oid of the domain; at this point we can
+ * only check that they're not marked NO INHERIT, because that
+ * would be bogus.
+ */
+ if (constr->is_no_inherit)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("check constraints for domains cannot be marked NO INHERIT")));
+ break;
+
+ /*
+ * All else are error cases
+ */
+ case CONSTR_UNIQUE:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unique constraints not possible for domains")));
+ break;
+
+ case CONSTR_PRIMARY:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("primary key constraints not possible for domains")));
+ break;
+
+ case CONSTR_EXCLUSION:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("exclusion constraints not possible for domains")));
+ break;
+
+ case CONSTR_FOREIGN:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("foreign key constraints not possible for domains")));
+ break;
+
+ case CONSTR_ATTR_DEFERRABLE:
+ case CONSTR_ATTR_NOT_DEFERRABLE:
+ case CONSTR_ATTR_DEFERRED:
+ case CONSTR_ATTR_IMMEDIATE:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("specifying constraint deferrability not supported for domains")));
+ break;
+
+ default:
+ elog(ERROR, "unrecognized constraint subtype: %d",
+ (int) constr->contype);
+ break;
+ }
+ }
+
+ /* Allocate OID for array type */
+ domainArrayOid = AssignTypeArrayOid();
+
+ /*
+ * Have TypeCreate do all the real work.
+ */
+ address =
+ TypeCreate(InvalidOid, /* no predetermined type OID */
+ domainName, /* type name */
+ domainNamespace, /* namespace */
+ InvalidOid, /* relation oid (n/a here) */
+ 0, /* relation kind (ditto) */
+ GetUserId(), /* owner's ID */
+ internalLength, /* internal size */
+ TYPTYPE_DOMAIN, /* type-type (domain type) */
+ category, /* type-category */
+ false, /* domain types are never preferred */
+ delimiter, /* array element delimiter */
+ inputProcedure, /* input procedure */
+ outputProcedure, /* output procedure */
+ receiveProcedure, /* receive procedure */
+ sendProcedure, /* send procedure */
+ InvalidOid, /* typmodin procedure - none */
+ InvalidOid, /* typmodout procedure - none */
+ analyzeProcedure, /* analyze procedure */
+ InvalidOid, /* subscript procedure - none */
+ InvalidOid, /* no array element type */
+ false, /* this isn't an array */
+ domainArrayOid, /* array type we are about to create */
+ basetypeoid, /* base type ID */
+ defaultValue, /* default type value (text) */
+ defaultValueBin, /* default type value (binary) */
+ byValue, /* passed by value */
+ alignment, /* required alignment */
+ storage, /* TOAST strategy */
+ basetypeMod, /* typeMod value */
+ typNDims, /* Array dimensions for base type */
+ typNotNull, /* Type NOT NULL */
+ domaincoll); /* type's collation */
+
+ /*
+ * Create the array type that goes with it.
+ */
+ domainArrayName = makeArrayTypeName(domainName, domainNamespace);
+
+ /* alignment must be TYPALIGN_INT or TYPALIGN_DOUBLE for arrays */
+ alignment = (alignment == TYPALIGN_DOUBLE) ? TYPALIGN_DOUBLE : TYPALIGN_INT;
+
+ TypeCreate(domainArrayOid, /* force assignment of this type OID */
+ domainArrayName, /* type name */
+ domainNamespace, /* namespace */
+ InvalidOid, /* relation oid (n/a here) */
+ 0, /* relation kind (ditto) */
+ GetUserId(), /* owner's ID */
+ -1, /* internal size (always varlena) */
+ TYPTYPE_BASE, /* type-type (base type) */
+ TYPCATEGORY_ARRAY, /* type-category (array) */
+ false, /* array types are never preferred */
+ delimiter, /* array element delimiter */
+ F_ARRAY_IN, /* input procedure */
+ F_ARRAY_OUT, /* output procedure */
+ F_ARRAY_RECV, /* receive procedure */
+ F_ARRAY_SEND, /* send procedure */
+ InvalidOid, /* typmodin procedure - none */
+ InvalidOid, /* typmodout procedure - none */
+ F_ARRAY_TYPANALYZE, /* analyze procedure */
+ F_ARRAY_SUBSCRIPT_HANDLER, /* array subscript procedure */
+ address.objectId, /* element type ID */
+ true, /* yes this is an array type */
+ InvalidOid, /* no further array type */
+ InvalidOid, /* base type ID */
+ NULL, /* never a default type value */
+ NULL, /* binary default isn't sent either */
+ false, /* never passed by value */
+ alignment, /* see above */
+ TYPSTORAGE_EXTENDED, /* ARRAY is always toastable */
+ -1, /* typMod (Domains only) */
+ 0, /* Array dimensions of typbasetype */
+ false, /* Type NOT NULL */
+ domaincoll); /* type's collation */
+
+ pfree(domainArrayName);
+
+ /*
+ * Process constraints which refer to the domain ID returned by TypeCreate
+ */
+ foreach(listptr, schema)
+ {
+ Constraint *constr = lfirst(listptr);
+
+ /* it must be a Constraint, per check above */
+
+ switch (constr->contype)
+ {
+ case CONSTR_CHECK:
+ domainAddConstraint(address.objectId, domainNamespace,
+ basetypeoid, basetypeMod,
+ constr, domainName, NULL);
+ break;
+
+ /* Other constraint types were fully processed above */
+
+ default:
+ break;
+ }
+
+ /* CCI so we can detect duplicate constraint names */
+ CommandCounterIncrement();
+ }
+
+ /*
+ * Now we can clean up.
+ */
+ ReleaseSysCache(typeTup);
+
+ return address;
+}
+
+
+/*
+ * DefineEnum
+ * Registers a new enum.
+ */
+ObjectAddress
+DefineEnum(CreateEnumStmt *stmt)
+{
+ char *enumName;
+ char *enumArrayName;
+ Oid enumNamespace;
+ AclResult aclresult;
+ Oid old_type_oid;
+ Oid enumArrayOid;
+ ObjectAddress enumTypeAddr;
+
+ /* Convert list of names to a name and namespace */
+ enumNamespace = QualifiedNameGetCreationNamespace(stmt->typeName,
+ &enumName);
+
+ /* Check we have creation rights in target namespace */
+ aclresult = pg_namespace_aclcheck(enumNamespace, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(enumNamespace));
+
+ /*
+ * Check for collision with an existing type name. If there is one and
+ * it's an autogenerated array, we can rename it out of the way.
+ */
+ old_type_oid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid,
+ CStringGetDatum(enumName),
+ ObjectIdGetDatum(enumNamespace));
+ if (OidIsValid(old_type_oid))
+ {
+ if (!moveArrayTypeName(old_type_oid, enumName, enumNamespace))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("type \"%s\" already exists", enumName)));
+ }
+
+ /* Allocate OID for array type */
+ enumArrayOid = AssignTypeArrayOid();
+
+ /* Create the pg_type entry */
+ enumTypeAddr =
+ TypeCreate(InvalidOid, /* no predetermined type OID */
+ enumName, /* type name */
+ enumNamespace, /* namespace */
+ InvalidOid, /* relation oid (n/a here) */
+ 0, /* relation kind (ditto) */
+ GetUserId(), /* owner's ID */
+ sizeof(Oid), /* internal size */
+ TYPTYPE_ENUM, /* type-type (enum type) */
+ TYPCATEGORY_ENUM, /* type-category (enum type) */
+ false, /* enum types are never preferred */
+ DEFAULT_TYPDELIM, /* array element delimiter */
+ F_ENUM_IN, /* input procedure */
+ F_ENUM_OUT, /* output procedure */
+ F_ENUM_RECV, /* receive procedure */
+ F_ENUM_SEND, /* send procedure */
+ InvalidOid, /* typmodin procedure - none */
+ InvalidOid, /* typmodout procedure - none */
+ InvalidOid, /* analyze procedure - default */
+ InvalidOid, /* subscript procedure - none */
+ InvalidOid, /* element type ID */
+ false, /* this is not an array type */
+ enumArrayOid, /* array type we are about to create */
+ InvalidOid, /* base type ID (only for domains) */
+ NULL, /* never a default type value */
+ NULL, /* binary default isn't sent either */
+ true, /* always passed by value */
+ TYPALIGN_INT, /* int alignment */
+ TYPSTORAGE_PLAIN, /* TOAST strategy always plain */
+ -1, /* typMod (Domains only) */
+ 0, /* Array dimensions of typbasetype */
+ false, /* Type NOT NULL */
+ InvalidOid); /* type's collation */
+
+ /* Enter the enum's values into pg_enum */
+ EnumValuesCreate(enumTypeAddr.objectId, stmt->vals);
+
+ /*
+ * Create the array type that goes with it.
+ */
+ enumArrayName = makeArrayTypeName(enumName, enumNamespace);
+
+ TypeCreate(enumArrayOid, /* force assignment of this type OID */
+ enumArrayName, /* type name */
+ enumNamespace, /* namespace */
+ InvalidOid, /* relation oid (n/a here) */
+ 0, /* relation kind (ditto) */
+ GetUserId(), /* owner's ID */
+ -1, /* internal size (always varlena) */
+ TYPTYPE_BASE, /* type-type (base type) */
+ TYPCATEGORY_ARRAY, /* type-category (array) */
+ false, /* array types are never preferred */
+ DEFAULT_TYPDELIM, /* array element delimiter */
+ F_ARRAY_IN, /* input procedure */
+ F_ARRAY_OUT, /* output procedure */
+ F_ARRAY_RECV, /* receive procedure */
+ F_ARRAY_SEND, /* send procedure */
+ InvalidOid, /* typmodin procedure - none */
+ InvalidOid, /* typmodout procedure - none */
+ F_ARRAY_TYPANALYZE, /* analyze procedure */
+ F_ARRAY_SUBSCRIPT_HANDLER, /* array subscript procedure */
+ enumTypeAddr.objectId, /* element type ID */
+ true, /* yes this is an array type */
+ InvalidOid, /* no further array type */
+ InvalidOid, /* base type ID */
+ NULL, /* never a default type value */
+ NULL, /* binary default isn't sent either */
+ false, /* never passed by value */
+ TYPALIGN_INT, /* enums have int align, so do their arrays */
+ TYPSTORAGE_EXTENDED, /* ARRAY is always toastable */
+ -1, /* typMod (Domains only) */
+ 0, /* Array dimensions of typbasetype */
+ false, /* Type NOT NULL */
+ InvalidOid); /* type's collation */
+
+ pfree(enumArrayName);
+
+ return enumTypeAddr;
+}
+
+/*
+ * AlterEnum
+ * Adds a new label to an existing enum.
+ */
+ObjectAddress
+AlterEnum(AlterEnumStmt *stmt)
+{
+ Oid enum_type_oid;
+ TypeName *typename;
+ HeapTuple tup;
+ ObjectAddress address;
+
+ /* Make a TypeName so we can use standard type lookup machinery */
+ typename = makeTypeNameFromNameList(stmt->typeName);
+ enum_type_oid = typenameTypeId(NULL, typename);
+
+ tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(enum_type_oid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", enum_type_oid);
+
+ /* Check it's an enum and check user has permission to ALTER the enum */
+ checkEnumOwner(tup);
+
+ ReleaseSysCache(tup);
+
+ if (stmt->oldVal)
+ {
+ /* Rename an existing label */
+ RenameEnumLabel(enum_type_oid, stmt->oldVal, stmt->newVal);
+ }
+ else
+ {
+ /* Add a new label */
+ AddEnumLabel(enum_type_oid, stmt->newVal,
+ stmt->newValNeighbor, stmt->newValIsAfter,
+ stmt->skipIfNewValExists);
+ }
+
+ InvokeObjectPostAlterHook(TypeRelationId, enum_type_oid, 0);
+
+ ObjectAddressSet(address, TypeRelationId, enum_type_oid);
+
+ return address;
+}
+
+
+/*
+ * checkEnumOwner
+ *
+ * Check that the type is actually an enum and that the current user
+ * has permission to do ALTER TYPE on it. Throw an error if not.
+ */
+static void
+checkEnumOwner(HeapTuple tup)
+{
+ Form_pg_type typTup = (Form_pg_type) GETSTRUCT(tup);
+
+ /* Check that this is actually an enum */
+ if (typTup->typtype != TYPTYPE_ENUM)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("%s is not an enum",
+ format_type_be(typTup->oid))));
+
+ /* Permission check: must own type */
+ if (!pg_type_ownercheck(typTup->oid, GetUserId()))
+ aclcheck_error_type(ACLCHECK_NOT_OWNER, typTup->oid);
+}
+
+
+/*
+ * DefineRange
+ * Registers a new range type.
+ *
+ * Perhaps it might be worthwhile to set pg_type.typelem to the base type,
+ * and likewise on multiranges to set it to the range type. But having a
+ * non-zero typelem is treated elsewhere as a synonym for being an array,
+ * and users might have queries with that same assumption.
+ */
+ObjectAddress
+DefineRange(ParseState *pstate, CreateRangeStmt *stmt)
+{
+ char *typeName;
+ Oid typeNamespace;
+ Oid typoid;
+ char *rangeArrayName;
+ char *multirangeTypeName = NULL;
+ char *multirangeArrayName;
+ Oid multirangeNamespace = InvalidOid;
+ Oid rangeArrayOid;
+ Oid multirangeOid;
+ Oid multirangeArrayOid;
+ Oid rangeSubtype = InvalidOid;
+ List *rangeSubOpclassName = NIL;
+ List *rangeCollationName = NIL;
+ List *rangeCanonicalName = NIL;
+ List *rangeSubtypeDiffName = NIL;
+ Oid rangeSubOpclass;
+ Oid rangeCollation;
+ regproc rangeCanonical;
+ regproc rangeSubtypeDiff;
+ int16 subtyplen;
+ bool subtypbyval;
+ char subtypalign;
+ char alignment;
+ AclResult aclresult;
+ ListCell *lc;
+ ObjectAddress address;
+ ObjectAddress mltrngaddress PG_USED_FOR_ASSERTS_ONLY;
+ Oid castFuncOid;
+
+ /* Convert list of names to a name and namespace */
+ typeNamespace = QualifiedNameGetCreationNamespace(stmt->typeName,
+ &typeName);
+
+ /* Check we have creation rights in target namespace */
+ aclresult = pg_namespace_aclcheck(typeNamespace, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(typeNamespace));
+
+ /*
+ * Look to see if type already exists.
+ */
+ typoid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid,
+ CStringGetDatum(typeName),
+ ObjectIdGetDatum(typeNamespace));
+
+ /*
+ * If it's not a shell, see if it's an autogenerated array type, and if so
+ * rename it out of the way.
+ */
+ if (OidIsValid(typoid) && get_typisdefined(typoid))
+ {
+ if (moveArrayTypeName(typoid, typeName, typeNamespace))
+ typoid = InvalidOid;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("type \"%s\" already exists", typeName)));
+ }
+
+ /*
+ * Unlike DefineType(), we don't insist on a shell type existing first, as
+ * it's only needed if the user wants to specify a canonical function.
+ */
+
+ /* Extract the parameters from the parameter list */
+ foreach(lc, stmt->params)
+ {
+ DefElem *defel = (DefElem *) lfirst(lc);
+
+ if (strcmp(defel->defname, "subtype") == 0)
+ {
+ if (OidIsValid(rangeSubtype))
+ errorConflictingDefElem(defel, pstate);
+ /* we can look up the subtype name immediately */
+ rangeSubtype = typenameTypeId(NULL, defGetTypeName(defel));
+ }
+ else if (strcmp(defel->defname, "subtype_opclass") == 0)
+ {
+ if (rangeSubOpclassName != NIL)
+ errorConflictingDefElem(defel, pstate);
+ rangeSubOpclassName = defGetQualifiedName(defel);
+ }
+ else if (strcmp(defel->defname, "collation") == 0)
+ {
+ if (rangeCollationName != NIL)
+ errorConflictingDefElem(defel, pstate);
+ rangeCollationName = defGetQualifiedName(defel);
+ }
+ else if (strcmp(defel->defname, "canonical") == 0)
+ {
+ if (rangeCanonicalName != NIL)
+ errorConflictingDefElem(defel, pstate);
+ rangeCanonicalName = defGetQualifiedName(defel);
+ }
+ else if (strcmp(defel->defname, "subtype_diff") == 0)
+ {
+ if (rangeSubtypeDiffName != NIL)
+ errorConflictingDefElem(defel, pstate);
+ rangeSubtypeDiffName = defGetQualifiedName(defel);
+ }
+ else if (strcmp(defel->defname, "multirange_type_name") == 0)
+ {
+ if (multirangeTypeName != NULL)
+ errorConflictingDefElem(defel, pstate);
+ /* we can look up the subtype name immediately */
+ multirangeNamespace = QualifiedNameGetCreationNamespace(defGetQualifiedName(defel),
+ &multirangeTypeName);
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("type attribute \"%s\" not recognized",
+ defel->defname)));
+ }
+
+ /* Must have a subtype */
+ if (!OidIsValid(rangeSubtype))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("type attribute \"subtype\" is required")));
+ /* disallow ranges of pseudotypes */
+ if (get_typtype(rangeSubtype) == TYPTYPE_PSEUDO)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("range subtype cannot be %s",
+ format_type_be(rangeSubtype))));
+
+ /* Identify subopclass */
+ rangeSubOpclass = findRangeSubOpclass(rangeSubOpclassName, rangeSubtype);
+
+ /* Identify collation to use, if any */
+ if (type_is_collatable(rangeSubtype))
+ {
+ if (rangeCollationName != NIL)
+ rangeCollation = get_collation_oid(rangeCollationName, false);
+ else
+ rangeCollation = get_typcollation(rangeSubtype);
+ }
+ else
+ {
+ if (rangeCollationName != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("range collation specified but subtype does not support collation")));
+ rangeCollation = InvalidOid;
+ }
+
+ /* Identify support functions, if provided */
+ if (rangeCanonicalName != NIL)
+ {
+ if (!OidIsValid(typoid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot specify a canonical function without a pre-created shell type"),
+ errhint("Create the type as a shell type, then create its canonicalization function, then do a full CREATE TYPE.")));
+ rangeCanonical = findRangeCanonicalFunction(rangeCanonicalName,
+ typoid);
+ }
+ else
+ rangeCanonical = InvalidOid;
+
+ if (rangeSubtypeDiffName != NIL)
+ rangeSubtypeDiff = findRangeSubtypeDiffFunction(rangeSubtypeDiffName,
+ rangeSubtype);
+ else
+ rangeSubtypeDiff = InvalidOid;
+
+ get_typlenbyvalalign(rangeSubtype,
+ &subtyplen, &subtypbyval, &subtypalign);
+
+ /* alignment must be TYPALIGN_INT or TYPALIGN_DOUBLE for ranges */
+ alignment = (subtypalign == TYPALIGN_DOUBLE) ? TYPALIGN_DOUBLE : TYPALIGN_INT;
+
+ /* Allocate OID for array type, its multirange, and its multirange array */
+ rangeArrayOid = AssignTypeArrayOid();
+ multirangeOid = AssignTypeMultirangeOid();
+ multirangeArrayOid = AssignTypeMultirangeArrayOid();
+
+ /* Create the pg_type entry */
+ address =
+ TypeCreate(InvalidOid, /* no predetermined type OID */
+ typeName, /* type name */
+ typeNamespace, /* namespace */
+ InvalidOid, /* relation oid (n/a here) */
+ 0, /* relation kind (ditto) */
+ GetUserId(), /* owner's ID */
+ -1, /* internal size (always varlena) */
+ TYPTYPE_RANGE, /* type-type (range type) */
+ TYPCATEGORY_RANGE, /* type-category (range type) */
+ false, /* range types are never preferred */
+ DEFAULT_TYPDELIM, /* array element delimiter */
+ F_RANGE_IN, /* input procedure */
+ F_RANGE_OUT, /* output procedure */
+ F_RANGE_RECV, /* receive procedure */
+ F_RANGE_SEND, /* send procedure */
+ InvalidOid, /* typmodin procedure - none */
+ InvalidOid, /* typmodout procedure - none */
+ F_RANGE_TYPANALYZE, /* analyze procedure */
+ InvalidOid, /* subscript procedure - none */
+ InvalidOid, /* element type ID - none */
+ false, /* this is not an array type */
+ rangeArrayOid, /* array type we are about to create */
+ InvalidOid, /* base type ID (only for domains) */
+ NULL, /* never a default type value */
+ NULL, /* no binary form available either */
+ false, /* never passed by value */
+ alignment, /* alignment */
+ TYPSTORAGE_EXTENDED, /* TOAST strategy (always extended) */
+ -1, /* typMod (Domains only) */
+ 0, /* Array dimensions of typbasetype */
+ false, /* Type NOT NULL */
+ InvalidOid); /* type's collation (ranges never have one) */
+ Assert(typoid == InvalidOid || typoid == address.objectId);
+ typoid = address.objectId;
+
+ /* Create the multirange that goes with it */
+ if (multirangeTypeName)
+ {
+ Oid old_typoid;
+
+ /*
+ * Look to see if multirange type already exists.
+ */
+ old_typoid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid,
+ CStringGetDatum(multirangeTypeName),
+ ObjectIdGetDatum(multirangeNamespace));
+
+ /*
+ * If it's not a shell, see if it's an autogenerated array type, and
+ * if so rename it out of the way.
+ */
+ if (OidIsValid(old_typoid) && get_typisdefined(old_typoid))
+ {
+ if (!moveArrayTypeName(old_typoid, multirangeTypeName, multirangeNamespace))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("type \"%s\" already exists", multirangeTypeName)));
+ }
+ }
+ else
+ {
+ /* Generate multirange name automatically */
+ multirangeNamespace = typeNamespace;
+ multirangeTypeName = makeMultirangeTypeName(typeName, multirangeNamespace);
+ }
+
+ mltrngaddress =
+ TypeCreate(multirangeOid, /* force assignment of this type OID */
+ multirangeTypeName, /* type name */
+ multirangeNamespace, /* namespace */
+ InvalidOid, /* relation oid (n/a here) */
+ 0, /* relation kind (ditto) */
+ GetUserId(), /* owner's ID */
+ -1, /* internal size (always varlena) */
+ TYPTYPE_MULTIRANGE, /* type-type (multirange type) */
+ TYPCATEGORY_RANGE, /* type-category (range type) */
+ false, /* multirange types are never preferred */
+ DEFAULT_TYPDELIM, /* array element delimiter */
+ F_MULTIRANGE_IN, /* input procedure */
+ F_MULTIRANGE_OUT, /* output procedure */
+ F_MULTIRANGE_RECV, /* receive procedure */
+ F_MULTIRANGE_SEND, /* send procedure */
+ InvalidOid, /* typmodin procedure - none */
+ InvalidOid, /* typmodout procedure - none */
+ F_MULTIRANGE_TYPANALYZE, /* analyze procedure */
+ InvalidOid, /* subscript procedure - none */
+ InvalidOid, /* element type ID - none */
+ false, /* this is not an array type */
+ multirangeArrayOid, /* array type we are about to create */
+ InvalidOid, /* base type ID (only for domains) */
+ NULL, /* never a default type value */
+ NULL, /* no binary form available either */
+ false, /* never passed by value */
+ alignment, /* alignment */
+ 'x', /* TOAST strategy (always extended) */
+ -1, /* typMod (Domains only) */
+ 0, /* Array dimensions of typbasetype */
+ false, /* Type NOT NULL */
+ InvalidOid); /* type's collation (ranges never have one) */
+ Assert(multirangeOid == mltrngaddress.objectId);
+
+ /* Create the entry in pg_range */
+ RangeCreate(typoid, rangeSubtype, rangeCollation, rangeSubOpclass,
+ rangeCanonical, rangeSubtypeDiff, multirangeOid);
+
+ /*
+ * Create the array type that goes with it.
+ */
+ rangeArrayName = makeArrayTypeName(typeName, typeNamespace);
+
+ TypeCreate(rangeArrayOid, /* force assignment of this type OID */
+ rangeArrayName, /* type name */
+ typeNamespace, /* namespace */
+ InvalidOid, /* relation oid (n/a here) */
+ 0, /* relation kind (ditto) */
+ GetUserId(), /* owner's ID */
+ -1, /* internal size (always varlena) */
+ TYPTYPE_BASE, /* type-type (base type) */
+ TYPCATEGORY_ARRAY, /* type-category (array) */
+ false, /* array types are never preferred */
+ DEFAULT_TYPDELIM, /* array element delimiter */
+ F_ARRAY_IN, /* input procedure */
+ F_ARRAY_OUT, /* output procedure */
+ F_ARRAY_RECV, /* receive procedure */
+ F_ARRAY_SEND, /* send procedure */
+ InvalidOid, /* typmodin procedure - none */
+ InvalidOid, /* typmodout procedure - none */
+ F_ARRAY_TYPANALYZE, /* analyze procedure */
+ F_ARRAY_SUBSCRIPT_HANDLER, /* array subscript procedure */
+ typoid, /* element type ID */
+ true, /* yes this is an array type */
+ InvalidOid, /* no further array type */
+ InvalidOid, /* base type ID */
+ NULL, /* never a default type value */
+ NULL, /* binary default isn't sent either */
+ false, /* never passed by value */
+ alignment, /* alignment - same as range's */
+ TYPSTORAGE_EXTENDED, /* ARRAY is always toastable */
+ -1, /* typMod (Domains only) */
+ 0, /* Array dimensions of typbasetype */
+ false, /* Type NOT NULL */
+ InvalidOid); /* typcollation */
+
+ pfree(rangeArrayName);
+
+ /* Create the multirange's array type */
+
+ multirangeArrayName = makeArrayTypeName(multirangeTypeName, typeNamespace);
+
+ TypeCreate(multirangeArrayOid, /* force assignment of this type OID */
+ multirangeArrayName, /* type name */
+ multirangeNamespace, /* namespace */
+ InvalidOid, /* relation oid (n/a here) */
+ 0, /* relation kind (ditto) */
+ GetUserId(), /* owner's ID */
+ -1, /* internal size (always varlena) */
+ TYPTYPE_BASE, /* type-type (base type) */
+ TYPCATEGORY_ARRAY, /* type-category (array) */
+ false, /* array types are never preferred */
+ DEFAULT_TYPDELIM, /* array element delimiter */
+ F_ARRAY_IN, /* input procedure */
+ F_ARRAY_OUT, /* output procedure */
+ F_ARRAY_RECV, /* receive procedure */
+ F_ARRAY_SEND, /* send procedure */
+ InvalidOid, /* typmodin procedure - none */
+ InvalidOid, /* typmodout procedure - none */
+ F_ARRAY_TYPANALYZE, /* analyze procedure */
+ F_ARRAY_SUBSCRIPT_HANDLER, /* array subscript procedure */
+ multirangeOid, /* element type ID */
+ true, /* yes this is an array type */
+ InvalidOid, /* no further array type */
+ InvalidOid, /* base type ID */
+ NULL, /* never a default type value */
+ NULL, /* binary default isn't sent either */
+ false, /* never passed by value */
+ alignment, /* alignment - same as range's */
+ 'x', /* ARRAY is always toastable */
+ -1, /* typMod (Domains only) */
+ 0, /* Array dimensions of typbasetype */
+ false, /* Type NOT NULL */
+ InvalidOid); /* typcollation */
+
+ /* And create the constructor functions for this range type */
+ makeRangeConstructors(typeName, typeNamespace, typoid, rangeSubtype);
+ makeMultirangeConstructors(multirangeTypeName, typeNamespace,
+ multirangeOid, typoid, rangeArrayOid,
+ &castFuncOid);
+
+ /* Create cast from the range type to its multirange type */
+ CastCreate(typoid, multirangeOid, castFuncOid, 'e', 'f', DEPENDENCY_INTERNAL);
+
+ pfree(multirangeArrayName);
+
+ return address;
+}
+
+/*
+ * Because there may exist several range types over the same subtype, the
+ * range type can't be uniquely determined from the subtype. So it's
+ * impossible to define a polymorphic constructor; we have to generate new
+ * constructor functions explicitly for each range type.
+ *
+ * We actually define 4 functions, with 0 through 3 arguments. This is just
+ * to offer more convenience for the user.
+ */
+static void
+makeRangeConstructors(const char *name, Oid namespace,
+ Oid rangeOid, Oid subtype)
+{
+ static const char *const prosrc[2] = {"range_constructor2",
+ "range_constructor3"};
+ static const int pronargs[2] = {2, 3};
+
+ Oid constructorArgTypes[3];
+ ObjectAddress myself,
+ referenced;
+ int i;
+
+ constructorArgTypes[0] = subtype;
+ constructorArgTypes[1] = subtype;
+ constructorArgTypes[2] = TEXTOID;
+
+ referenced.classId = TypeRelationId;
+ referenced.objectId = rangeOid;
+ referenced.objectSubId = 0;
+
+ for (i = 0; i < lengthof(prosrc); i++)
+ {
+ oidvector *constructorArgTypesVector;
+
+ constructorArgTypesVector = buildoidvector(constructorArgTypes,
+ pronargs[i]);
+
+ myself = ProcedureCreate(name, /* name: same as range type */
+ namespace, /* namespace */
+ false, /* replace */
+ false, /* returns set */
+ rangeOid, /* return type */
+ BOOTSTRAP_SUPERUSERID, /* proowner */
+ INTERNALlanguageId, /* language */
+ F_FMGR_INTERNAL_VALIDATOR, /* language validator */
+ prosrc[i], /* prosrc */
+ NULL, /* probin */
+ NULL, /* prosqlbody */
+ PROKIND_FUNCTION,
+ false, /* security_definer */
+ false, /* leakproof */
+ false, /* isStrict */
+ PROVOLATILE_IMMUTABLE, /* volatility */
+ PROPARALLEL_SAFE, /* parallel safety */
+ constructorArgTypesVector, /* parameterTypes */
+ PointerGetDatum(NULL), /* allParameterTypes */
+ PointerGetDatum(NULL), /* parameterModes */
+ PointerGetDatum(NULL), /* parameterNames */
+ NIL, /* parameterDefaults */
+ PointerGetDatum(NULL), /* trftypes */
+ PointerGetDatum(NULL), /* proconfig */
+ InvalidOid, /* prosupport */
+ 1.0, /* procost */
+ 0.0); /* prorows */
+
+ /*
+ * Make the constructors internally-dependent on the range type so
+ * that they go away silently when the type is dropped. Note that
+ * pg_dump depends on this choice to avoid dumping the constructors.
+ */
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
+ }
+}
+
+/*
+ * We make a separate multirange constructor for each range type
+ * so its name can include the base type, like range constructors do.
+ * If we had an anyrangearray polymorphic type we could use it here,
+ * but since each type has its own constructor name there's no need.
+ *
+ * Sets castFuncOid to the oid of the new constructor that can be used
+ * to cast from a range to a multirange.
+ */
+static void
+makeMultirangeConstructors(const char *name, Oid namespace,
+ Oid multirangeOid, Oid rangeOid, Oid rangeArrayOid,
+ Oid *castFuncOid)
+{
+ ObjectAddress myself,
+ referenced;
+ oidvector *argtypes;
+ Datum allParamTypes;
+ ArrayType *allParameterTypes;
+ Datum paramModes;
+ ArrayType *parameterModes;
+
+ referenced.classId = TypeRelationId;
+ referenced.objectId = multirangeOid;
+ referenced.objectSubId = 0;
+
+ /* 0-arg constructor - for empty multiranges */
+ argtypes = buildoidvector(NULL, 0);
+ myself = ProcedureCreate(name, /* name: same as multirange type */
+ namespace,
+ false, /* replace */
+ false, /* returns set */
+ multirangeOid, /* return type */
+ BOOTSTRAP_SUPERUSERID, /* proowner */
+ INTERNALlanguageId, /* language */
+ F_FMGR_INTERNAL_VALIDATOR,
+ "multirange_constructor0", /* prosrc */
+ NULL, /* probin */
+ NULL, /* prosqlbody */
+ PROKIND_FUNCTION,
+ false, /* security_definer */
+ false, /* leakproof */
+ true, /* isStrict */
+ PROVOLATILE_IMMUTABLE, /* volatility */
+ PROPARALLEL_SAFE, /* parallel safety */
+ argtypes, /* parameterTypes */
+ PointerGetDatum(NULL), /* allParameterTypes */
+ PointerGetDatum(NULL), /* parameterModes */
+ PointerGetDatum(NULL), /* parameterNames */
+ NIL, /* parameterDefaults */
+ PointerGetDatum(NULL), /* trftypes */
+ PointerGetDatum(NULL), /* proconfig */
+ InvalidOid, /* prosupport */
+ 1.0, /* procost */
+ 0.0); /* prorows */
+
+ /*
+ * Make the constructor internally-dependent on the multirange type so
+ * that they go away silently when the type is dropped. Note that pg_dump
+ * depends on this choice to avoid dumping the constructors.
+ */
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
+ pfree(argtypes);
+
+ /*
+ * 1-arg constructor - for casts
+ *
+ * In theory we shouldn't need both this and the vararg (n-arg)
+ * constructor, but having a separate 1-arg function lets us define casts
+ * against it.
+ */
+ argtypes = buildoidvector(&rangeOid, 1);
+ myself = ProcedureCreate(name, /* name: same as multirange type */
+ namespace,
+ false, /* replace */
+ false, /* returns set */
+ multirangeOid, /* return type */
+ BOOTSTRAP_SUPERUSERID, /* proowner */
+ INTERNALlanguageId, /* language */
+ F_FMGR_INTERNAL_VALIDATOR,
+ "multirange_constructor1", /* prosrc */
+ NULL, /* probin */
+ NULL, /* prosqlbody */
+ PROKIND_FUNCTION,
+ false, /* security_definer */
+ false, /* leakproof */
+ true, /* isStrict */
+ PROVOLATILE_IMMUTABLE, /* volatility */
+ PROPARALLEL_SAFE, /* parallel safety */
+ argtypes, /* parameterTypes */
+ PointerGetDatum(NULL), /* allParameterTypes */
+ PointerGetDatum(NULL), /* parameterModes */
+ PointerGetDatum(NULL), /* parameterNames */
+ NIL, /* parameterDefaults */
+ PointerGetDatum(NULL), /* trftypes */
+ PointerGetDatum(NULL), /* proconfig */
+ InvalidOid, /* prosupport */
+ 1.0, /* procost */
+ 0.0); /* prorows */
+ /* ditto */
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
+ pfree(argtypes);
+ *castFuncOid = myself.objectId;
+
+ /* n-arg constructor - vararg */
+ argtypes = buildoidvector(&rangeArrayOid, 1);
+ allParamTypes = ObjectIdGetDatum(rangeArrayOid);
+ allParameterTypes = construct_array(&allParamTypes,
+ 1, OIDOID,
+ sizeof(Oid), true, TYPALIGN_INT);
+ paramModes = CharGetDatum(FUNC_PARAM_VARIADIC);
+ parameterModes = construct_array(&paramModes, 1, CHAROID,
+ 1, true, TYPALIGN_CHAR);
+ myself = ProcedureCreate(name, /* name: same as multirange type */
+ namespace,
+ false, /* replace */
+ false, /* returns set */
+ multirangeOid, /* return type */
+ BOOTSTRAP_SUPERUSERID, /* proowner */
+ INTERNALlanguageId, /* language */
+ F_FMGR_INTERNAL_VALIDATOR,
+ "multirange_constructor2", /* prosrc */
+ NULL, /* probin */
+ NULL, /* prosqlbody */
+ PROKIND_FUNCTION,
+ false, /* security_definer */
+ false, /* leakproof */
+ true, /* isStrict */
+ PROVOLATILE_IMMUTABLE, /* volatility */
+ PROPARALLEL_SAFE, /* parallel safety */
+ argtypes, /* parameterTypes */
+ PointerGetDatum(allParameterTypes), /* allParameterTypes */
+ PointerGetDatum(parameterModes), /* parameterModes */
+ PointerGetDatum(NULL), /* parameterNames */
+ NIL, /* parameterDefaults */
+ PointerGetDatum(NULL), /* trftypes */
+ PointerGetDatum(NULL), /* proconfig */
+ InvalidOid, /* prosupport */
+ 1.0, /* procost */
+ 0.0); /* prorows */
+ /* ditto */
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
+ pfree(argtypes);
+ pfree(allParameterTypes);
+ pfree(parameterModes);
+}
+
+/*
+ * Find suitable I/O and other support functions for a type.
+ *
+ * typeOid is the type's OID (which will already exist, if only as a shell
+ * type).
+ */
+
+static Oid
+findTypeInputFunction(List *procname, Oid typeOid)
+{
+ Oid argList[3];
+ Oid procOid;
+ Oid procOid2;
+
+ /*
+ * Input functions can take a single argument of type CSTRING, or three
+ * arguments (string, typioparam OID, typmod). Whine about ambiguity if
+ * both forms exist.
+ */
+ argList[0] = CSTRINGOID;
+ argList[1] = OIDOID;
+ argList[2] = INT4OID;
+
+ procOid = LookupFuncName(procname, 1, argList, true);
+ procOid2 = LookupFuncName(procname, 3, argList, true);
+ if (OidIsValid(procOid))
+ {
+ if (OidIsValid(procOid2))
+ ereport(ERROR,
+ (errcode(ERRCODE_AMBIGUOUS_FUNCTION),
+ errmsg("type input function %s has multiple matches",
+ NameListToString(procname))));
+ }
+ else
+ {
+ procOid = procOid2;
+ /* If not found, reference the 1-argument signature in error msg */
+ if (!OidIsValid(procOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("function %s does not exist",
+ func_signature_string(procname, 1, NIL, argList))));
+ }
+
+ /* Input functions must return the target type. */
+ if (get_func_rettype(procOid) != typeOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("type input function %s must return type %s",
+ NameListToString(procname), format_type_be(typeOid))));
+
+ /*
+ * Print warnings if any of the type's I/O functions are marked volatile.
+ * There is a general assumption that I/O functions are stable or
+ * immutable; this allows us for example to mark record_in/record_out
+ * stable rather than volatile. Ideally we would throw errors not just
+ * warnings here; but since this check is new as of 9.5, and since the
+ * volatility marking might be just an error-of-omission and not a true
+ * indication of how the function behaves, we'll let it pass as a warning
+ * for now.
+ */
+ if (func_volatile(procOid) == PROVOLATILE_VOLATILE)
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("type input function %s should not be volatile",
+ NameListToString(procname))));
+
+ return procOid;
+}
+
+static Oid
+findTypeOutputFunction(List *procname, Oid typeOid)
+{
+ Oid argList[1];
+ Oid procOid;
+
+ /*
+ * Output functions always take a single argument of the type and return
+ * cstring.
+ */
+ argList[0] = typeOid;
+
+ procOid = LookupFuncName(procname, 1, argList, true);
+ if (!OidIsValid(procOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("function %s does not exist",
+ func_signature_string(procname, 1, NIL, argList))));
+
+ if (get_func_rettype(procOid) != CSTRINGOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("type output function %s must return type %s",
+ NameListToString(procname), "cstring")));
+
+ /* Just a warning for now, per comments in findTypeInputFunction */
+ if (func_volatile(procOid) == PROVOLATILE_VOLATILE)
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("type output function %s should not be volatile",
+ NameListToString(procname))));
+
+ return procOid;
+}
+
+static Oid
+findTypeReceiveFunction(List *procname, Oid typeOid)
+{
+ Oid argList[3];
+ Oid procOid;
+ Oid procOid2;
+
+ /*
+ * Receive functions can take a single argument of type INTERNAL, or three
+ * arguments (internal, typioparam OID, typmod). Whine about ambiguity if
+ * both forms exist.
+ */
+ argList[0] = INTERNALOID;
+ argList[1] = OIDOID;
+ argList[2] = INT4OID;
+
+ procOid = LookupFuncName(procname, 1, argList, true);
+ procOid2 = LookupFuncName(procname, 3, argList, true);
+ if (OidIsValid(procOid))
+ {
+ if (OidIsValid(procOid2))
+ ereport(ERROR,
+ (errcode(ERRCODE_AMBIGUOUS_FUNCTION),
+ errmsg("type receive function %s has multiple matches",
+ NameListToString(procname))));
+ }
+ else
+ {
+ procOid = procOid2;
+ /* If not found, reference the 1-argument signature in error msg */
+ if (!OidIsValid(procOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("function %s does not exist",
+ func_signature_string(procname, 1, NIL, argList))));
+ }
+
+ /* Receive functions must return the target type. */
+ if (get_func_rettype(procOid) != typeOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("type receive function %s must return type %s",
+ NameListToString(procname), format_type_be(typeOid))));
+
+ /* Just a warning for now, per comments in findTypeInputFunction */
+ if (func_volatile(procOid) == PROVOLATILE_VOLATILE)
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("type receive function %s should not be volatile",
+ NameListToString(procname))));
+
+ return procOid;
+}
+
+static Oid
+findTypeSendFunction(List *procname, Oid typeOid)
+{
+ Oid argList[1];
+ Oid procOid;
+
+ /*
+ * Send functions always take a single argument of the type and return
+ * bytea.
+ */
+ argList[0] = typeOid;
+
+ procOid = LookupFuncName(procname, 1, argList, true);
+ if (!OidIsValid(procOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("function %s does not exist",
+ func_signature_string(procname, 1, NIL, argList))));
+
+ if (get_func_rettype(procOid) != BYTEAOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("type send function %s must return type %s",
+ NameListToString(procname), "bytea")));
+
+ /* Just a warning for now, per comments in findTypeInputFunction */
+ if (func_volatile(procOid) == PROVOLATILE_VOLATILE)
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("type send function %s should not be volatile",
+ NameListToString(procname))));
+
+ return procOid;
+}
+
+static Oid
+findTypeTypmodinFunction(List *procname)
+{
+ Oid argList[1];
+ Oid procOid;
+
+ /*
+ * typmodin functions always take one cstring[] argument and return int4.
+ */
+ argList[0] = CSTRINGARRAYOID;
+
+ procOid = LookupFuncName(procname, 1, argList, true);
+ if (!OidIsValid(procOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("function %s does not exist",
+ func_signature_string(procname, 1, NIL, argList))));
+
+ if (get_func_rettype(procOid) != INT4OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("typmod_in function %s must return type %s",
+ NameListToString(procname), "integer")));
+
+ /* Just a warning for now, per comments in findTypeInputFunction */
+ if (func_volatile(procOid) == PROVOLATILE_VOLATILE)
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("type modifier input function %s should not be volatile",
+ NameListToString(procname))));
+
+ return procOid;
+}
+
+static Oid
+findTypeTypmodoutFunction(List *procname)
+{
+ Oid argList[1];
+ Oid procOid;
+
+ /*
+ * typmodout functions always take one int4 argument and return cstring.
+ */
+ argList[0] = INT4OID;
+
+ procOid = LookupFuncName(procname, 1, argList, true);
+ if (!OidIsValid(procOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("function %s does not exist",
+ func_signature_string(procname, 1, NIL, argList))));
+
+ if (get_func_rettype(procOid) != CSTRINGOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("typmod_out function %s must return type %s",
+ NameListToString(procname), "cstring")));
+
+ /* Just a warning for now, per comments in findTypeInputFunction */
+ if (func_volatile(procOid) == PROVOLATILE_VOLATILE)
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("type modifier output function %s should not be volatile",
+ NameListToString(procname))));
+
+ return procOid;
+}
+
+static Oid
+findTypeAnalyzeFunction(List *procname, Oid typeOid)
+{
+ Oid argList[1];
+ Oid procOid;
+
+ /*
+ * Analyze functions always take one INTERNAL argument and return bool.
+ */
+ argList[0] = INTERNALOID;
+
+ procOid = LookupFuncName(procname, 1, argList, true);
+ if (!OidIsValid(procOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("function %s does not exist",
+ func_signature_string(procname, 1, NIL, argList))));
+
+ if (get_func_rettype(procOid) != BOOLOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("type analyze function %s must return type %s",
+ NameListToString(procname), "boolean")));
+
+ return procOid;
+}
+
+static Oid
+findTypeSubscriptingFunction(List *procname, Oid typeOid)
+{
+ Oid argList[1];
+ Oid procOid;
+
+ /*
+ * Subscripting support functions always take one INTERNAL argument and
+ * return INTERNAL. (The argument is not used, but we must have it to
+ * maintain type safety.)
+ */
+ argList[0] = INTERNALOID;
+
+ procOid = LookupFuncName(procname, 1, argList, true);
+ if (!OidIsValid(procOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("function %s does not exist",
+ func_signature_string(procname, 1, NIL, argList))));
+
+ if (get_func_rettype(procOid) != INTERNALOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("type subscripting function %s must return type %s",
+ NameListToString(procname), "internal")));
+
+ /*
+ * We disallow array_subscript_handler() from being selected explicitly,
+ * since that must only be applied to autogenerated array types.
+ */
+ if (procOid == F_ARRAY_SUBSCRIPT_HANDLER)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("user-defined types cannot use subscripting function %s",
+ NameListToString(procname))));
+
+ return procOid;
+}
+
+/*
+ * Find suitable support functions and opclasses for a range type.
+ */
+
+/*
+ * Find named btree opclass for subtype, or default btree opclass if
+ * opcname is NIL.
+ */
+static Oid
+findRangeSubOpclass(List *opcname, Oid subtype)
+{
+ Oid opcid;
+ Oid opInputType;
+
+ if (opcname != NIL)
+ {
+ opcid = get_opclass_oid(BTREE_AM_OID, opcname, false);
+
+ /*
+ * Verify that the operator class accepts this datatype. Note we will
+ * accept binary compatibility.
+ */
+ opInputType = get_opclass_input_type(opcid);
+ if (!IsBinaryCoercible(subtype, opInputType))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("operator class \"%s\" does not accept data type %s",
+ NameListToString(opcname),
+ format_type_be(subtype))));
+ }
+ else
+ {
+ opcid = GetDefaultOpClass(subtype, BTREE_AM_OID);
+ if (!OidIsValid(opcid))
+ {
+ /* We spell the error message identically to ResolveOpClass */
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("data type %s has no default operator class for access method \"%s\"",
+ format_type_be(subtype), "btree"),
+ errhint("You must specify an operator class for the range type or define a default operator class for the subtype.")));
+ }
+ }
+
+ return opcid;
+}
+
+static Oid
+findRangeCanonicalFunction(List *procname, Oid typeOid)
+{
+ Oid argList[1];
+ Oid procOid;
+ AclResult aclresult;
+
+ /*
+ * Range canonical functions must take and return the range type, and must
+ * be immutable.
+ */
+ argList[0] = typeOid;
+
+ procOid = LookupFuncName(procname, 1, argList, true);
+
+ if (!OidIsValid(procOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("function %s does not exist",
+ func_signature_string(procname, 1, NIL, argList))));
+
+ if (get_func_rettype(procOid) != typeOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("range canonical function %s must return range type",
+ func_signature_string(procname, 1, NIL, argList))));
+
+ if (func_volatile(procOid) != PROVOLATILE_IMMUTABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("range canonical function %s must be immutable",
+ func_signature_string(procname, 1, NIL, argList))));
+
+ /* Also, range type's creator must have permission to call function */
+ aclresult = pg_proc_aclcheck(procOid, GetUserId(), ACL_EXECUTE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_FUNCTION, get_func_name(procOid));
+
+ return procOid;
+}
+
+static Oid
+findRangeSubtypeDiffFunction(List *procname, Oid subtype)
+{
+ Oid argList[2];
+ Oid procOid;
+ AclResult aclresult;
+
+ /*
+ * Range subtype diff functions must take two arguments of the subtype,
+ * must return float8, and must be immutable.
+ */
+ argList[0] = subtype;
+ argList[1] = subtype;
+
+ procOid = LookupFuncName(procname, 2, argList, true);
+
+ if (!OidIsValid(procOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("function %s does not exist",
+ func_signature_string(procname, 2, NIL, argList))));
+
+ if (get_func_rettype(procOid) != FLOAT8OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("range subtype diff function %s must return type %s",
+ func_signature_string(procname, 2, NIL, argList),
+ "double precision")));
+
+ if (func_volatile(procOid) != PROVOLATILE_IMMUTABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("range subtype diff function %s must be immutable",
+ func_signature_string(procname, 2, NIL, argList))));
+
+ /* Also, range type's creator must have permission to call function */
+ aclresult = pg_proc_aclcheck(procOid, GetUserId(), ACL_EXECUTE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_FUNCTION, get_func_name(procOid));
+
+ return procOid;
+}
+
+/*
+ * AssignTypeArrayOid
+ *
+ * Pre-assign the type's array OID for use in pg_type.typarray
+ */
+Oid
+AssignTypeArrayOid(void)
+{
+ Oid type_array_oid;
+
+ /* Use binary-upgrade override for pg_type.typarray? */
+ if (IsBinaryUpgrade)
+ {
+ if (!OidIsValid(binary_upgrade_next_array_pg_type_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("pg_type array OID value not set when in binary upgrade mode")));
+
+ type_array_oid = binary_upgrade_next_array_pg_type_oid;
+ binary_upgrade_next_array_pg_type_oid = InvalidOid;
+ }
+ else
+ {
+ Relation pg_type = table_open(TypeRelationId, AccessShareLock);
+
+ type_array_oid = GetNewOidWithIndex(pg_type, TypeOidIndexId,
+ Anum_pg_type_oid);
+ table_close(pg_type, AccessShareLock);
+ }
+
+ return type_array_oid;
+}
+
+/*
+ * AssignTypeMultirangeOid
+ *
+ * Pre-assign the range type's multirange OID for use in pg_type.oid
+ */
+Oid
+AssignTypeMultirangeOid(void)
+{
+ Oid type_multirange_oid;
+
+ /* Use binary-upgrade override for pg_type.oid? */
+ if (IsBinaryUpgrade)
+ {
+ if (!OidIsValid(binary_upgrade_next_mrng_pg_type_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("pg_type multirange OID value not set when in binary upgrade mode")));
+
+ type_multirange_oid = binary_upgrade_next_mrng_pg_type_oid;
+ binary_upgrade_next_mrng_pg_type_oid = InvalidOid;
+ }
+ else
+ {
+ Relation pg_type = table_open(TypeRelationId, AccessShareLock);
+
+ type_multirange_oid = GetNewOidWithIndex(pg_type, TypeOidIndexId,
+ Anum_pg_type_oid);
+ table_close(pg_type, AccessShareLock);
+ }
+
+ return type_multirange_oid;
+}
+
+/*
+ * AssignTypeMultirangeArrayOid
+ *
+ * Pre-assign the range type's multirange array OID for use in pg_type.typarray
+ */
+Oid
+AssignTypeMultirangeArrayOid(void)
+{
+ Oid type_multirange_array_oid;
+
+ /* Use binary-upgrade override for pg_type.oid? */
+ if (IsBinaryUpgrade)
+ {
+ if (!OidIsValid(binary_upgrade_next_mrng_array_pg_type_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("pg_type multirange array OID value not set when in binary upgrade mode")));
+
+ type_multirange_array_oid = binary_upgrade_next_mrng_array_pg_type_oid;
+ binary_upgrade_next_mrng_array_pg_type_oid = InvalidOid;
+ }
+ else
+ {
+ Relation pg_type = table_open(TypeRelationId, AccessShareLock);
+
+ type_multirange_array_oid = GetNewOidWithIndex(pg_type, TypeOidIndexId,
+ Anum_pg_type_oid);
+ table_close(pg_type, AccessShareLock);
+ }
+
+ return type_multirange_array_oid;
+}
+
+
+/*-------------------------------------------------------------------
+ * DefineCompositeType
+ *
+ * Create a Composite Type relation.
+ * `DefineRelation' does all the work, we just provide the correct
+ * arguments!
+ *
+ * If the relation already exists, then 'DefineRelation' will abort
+ * the xact...
+ *
+ * Return type is the new type's object address.
+ *-------------------------------------------------------------------
+ */
+ObjectAddress
+DefineCompositeType(RangeVar *typevar, List *coldeflist)
+{
+ CreateStmt *createStmt = makeNode(CreateStmt);
+ Oid old_type_oid;
+ Oid typeNamespace;
+ ObjectAddress address;
+
+ /*
+ * now set the parameters for keys/inheritance etc. All of these are
+ * uninteresting for composite types...
+ */
+ createStmt->relation = typevar;
+ createStmt->tableElts = coldeflist;
+ createStmt->inhRelations = NIL;
+ createStmt->constraints = NIL;
+ createStmt->options = NIL;
+ createStmt->oncommit = ONCOMMIT_NOOP;
+ createStmt->tablespacename = NULL;
+ createStmt->if_not_exists = false;
+
+ /*
+ * Check for collision with an existing type name. If there is one and
+ * it's an autogenerated array, we can rename it out of the way. This
+ * check is here mainly to get a better error message about a "type"
+ * instead of below about a "relation".
+ */
+ typeNamespace = RangeVarGetAndCheckCreationNamespace(createStmt->relation,
+ NoLock, NULL);
+ RangeVarAdjustRelationPersistence(createStmt->relation, typeNamespace);
+ old_type_oid =
+ GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid,
+ CStringGetDatum(createStmt->relation->relname),
+ ObjectIdGetDatum(typeNamespace));
+ if (OidIsValid(old_type_oid))
+ {
+ if (!moveArrayTypeName(old_type_oid, createStmt->relation->relname, typeNamespace))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("type \"%s\" already exists", createStmt->relation->relname)));
+ }
+
+ /*
+ * Finally create the relation. This also creates the type.
+ */
+ DefineRelation(createStmt, RELKIND_COMPOSITE_TYPE, InvalidOid, &address,
+ NULL);
+
+ return address;
+}
+
+/*
+ * AlterDomainDefault
+ *
+ * Routine implementing ALTER DOMAIN SET/DROP DEFAULT statements.
+ *
+ * Returns ObjectAddress of the modified domain.
+ */
+ObjectAddress
+AlterDomainDefault(List *names, Node *defaultRaw)
+{
+ TypeName *typename;
+ Oid domainoid;
+ HeapTuple tup;
+ ParseState *pstate;
+ Relation rel;
+ char *defaultValue;
+ Node *defaultExpr = NULL; /* NULL if no default specified */
+ Datum new_record[Natts_pg_type];
+ bool new_record_nulls[Natts_pg_type];
+ bool new_record_repl[Natts_pg_type];
+ HeapTuple newtuple;
+ Form_pg_type typTup;
+ ObjectAddress address;
+
+ /* Make a TypeName so we can use standard type lookup machinery */
+ typename = makeTypeNameFromNameList(names);
+ domainoid = typenameTypeId(NULL, typename);
+
+ /* Look up the domain in the type table */
+ rel = table_open(TypeRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(domainoid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", domainoid);
+ typTup = (Form_pg_type) GETSTRUCT(tup);
+
+ /* Check it's a domain and check user has permission for ALTER DOMAIN */
+ checkDomainOwner(tup);
+
+ /* Setup new tuple */
+ MemSet(new_record, (Datum) 0, sizeof(new_record));
+ MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+ MemSet(new_record_repl, false, sizeof(new_record_repl));
+
+ /* Store the new default into the tuple */
+ if (defaultRaw)
+ {
+ /* Create a dummy ParseState for transformExpr */
+ pstate = make_parsestate(NULL);
+
+ /*
+ * Cook the colDef->raw_expr into an expression. Note: Name is
+ * strictly for error message
+ */
+ defaultExpr = cookDefault(pstate, defaultRaw,
+ typTup->typbasetype,
+ typTup->typtypmod,
+ NameStr(typTup->typname),
+ 0);
+
+ /*
+ * If the expression is just a NULL constant, we treat the command
+ * like ALTER ... DROP DEFAULT. (But see note for same test in
+ * DefineDomain.)
+ */
+ if (defaultExpr == NULL ||
+ (IsA(defaultExpr, Const) && ((Const *) defaultExpr)->constisnull))
+ {
+ /* Default is NULL, drop it */
+ defaultExpr = NULL;
+ new_record_nulls[Anum_pg_type_typdefaultbin - 1] = true;
+ new_record_repl[Anum_pg_type_typdefaultbin - 1] = true;
+ new_record_nulls[Anum_pg_type_typdefault - 1] = true;
+ new_record_repl[Anum_pg_type_typdefault - 1] = true;
+ }
+ else
+ {
+ /*
+ * Expression must be stored as a nodeToString result, but we also
+ * require a valid textual representation (mainly to make life
+ * easier for pg_dump).
+ */
+ defaultValue = deparse_expression(defaultExpr,
+ NIL, false, false);
+
+ /*
+ * Form an updated tuple with the new default and write it back.
+ */
+ new_record[Anum_pg_type_typdefaultbin - 1] = CStringGetTextDatum(nodeToString(defaultExpr));
+
+ new_record_repl[Anum_pg_type_typdefaultbin - 1] = true;
+ new_record[Anum_pg_type_typdefault - 1] = CStringGetTextDatum(defaultValue);
+ new_record_repl[Anum_pg_type_typdefault - 1] = true;
+ }
+ }
+ else
+ {
+ /* ALTER ... DROP DEFAULT */
+ new_record_nulls[Anum_pg_type_typdefaultbin - 1] = true;
+ new_record_repl[Anum_pg_type_typdefaultbin - 1] = true;
+ new_record_nulls[Anum_pg_type_typdefault - 1] = true;
+ new_record_repl[Anum_pg_type_typdefault - 1] = true;
+ }
+
+ newtuple = heap_modify_tuple(tup, RelationGetDescr(rel),
+ new_record, new_record_nulls,
+ new_record_repl);
+
+ CatalogTupleUpdate(rel, &tup->t_self, newtuple);
+
+ /* Rebuild dependencies */
+ GenerateTypeDependencies(newtuple,
+ rel,
+ defaultExpr,
+ NULL, /* don't have typacl handy */
+ 0, /* relation kind is n/a */
+ false, /* a domain isn't an implicit array */
+ false, /* nor is it any kind of dependent type */
+ false, /* don't touch extension membership */
+ true); /* We do need to rebuild dependencies */
+
+ InvokeObjectPostAlterHook(TypeRelationId, domainoid, 0);
+
+ ObjectAddressSet(address, TypeRelationId, domainoid);
+
+ /* Clean up */
+ table_close(rel, RowExclusiveLock);
+ heap_freetuple(newtuple);
+
+ return address;
+}
+
+/*
+ * AlterDomainNotNull
+ *
+ * Routine implementing ALTER DOMAIN SET/DROP NOT NULL statements.
+ *
+ * Returns ObjectAddress of the modified domain.
+ */
+ObjectAddress
+AlterDomainNotNull(List *names, bool notNull)
+{
+ TypeName *typename;
+ Oid domainoid;
+ Relation typrel;
+ HeapTuple tup;
+ Form_pg_type typTup;
+ ObjectAddress address = InvalidObjectAddress;
+
+ /* Make a TypeName so we can use standard type lookup machinery */
+ typename = makeTypeNameFromNameList(names);
+ domainoid = typenameTypeId(NULL, typename);
+
+ /* Look up the domain in the type table */
+ typrel = table_open(TypeRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(domainoid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", domainoid);
+ typTup = (Form_pg_type) GETSTRUCT(tup);
+
+ /* Check it's a domain and check user has permission for ALTER DOMAIN */
+ checkDomainOwner(tup);
+
+ /* Is the domain already set to the desired constraint? */
+ if (typTup->typnotnull == notNull)
+ {
+ table_close(typrel, RowExclusiveLock);
+ return address;
+ }
+
+ /* Adding a NOT NULL constraint requires checking existing columns */
+ if (notNull)
+ {
+ List *rels;
+ ListCell *rt;
+
+ /* Fetch relation list with attributes based on this domain */
+ /* ShareLock is sufficient to prevent concurrent data changes */
+
+ rels = get_rels_with_domain(domainoid, ShareLock);
+
+ foreach(rt, rels)
+ {
+ RelToCheck *rtc = (RelToCheck *) lfirst(rt);
+ Relation testrel = rtc->rel;
+ TupleDesc tupdesc = RelationGetDescr(testrel);
+ TupleTableSlot *slot;
+ TableScanDesc scan;
+ Snapshot snapshot;
+
+ /* Scan all tuples in this relation */
+ snapshot = RegisterSnapshot(GetLatestSnapshot());
+ scan = table_beginscan(testrel, snapshot, 0, NULL);
+ slot = table_slot_create(testrel, NULL);
+ while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
+ {
+ int i;
+
+ /* Test attributes that are of the domain */
+ for (i = 0; i < rtc->natts; i++)
+ {
+ int attnum = rtc->atts[i];
+ Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1);
+
+ if (slot_attisnull(slot, attnum))
+ {
+ /*
+ * In principle the auxiliary information for this
+ * error should be errdatatype(), but errtablecol()
+ * seems considerably more useful in practice. Since
+ * this code only executes in an ALTER DOMAIN command,
+ * the client should already know which domain is in
+ * question.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_NOT_NULL_VIOLATION),
+ errmsg("column \"%s\" of table \"%s\" contains null values",
+ NameStr(attr->attname),
+ RelationGetRelationName(testrel)),
+ errtablecol(testrel, attnum)));
+ }
+ }
+ }
+ ExecDropSingleTupleTableSlot(slot);
+ table_endscan(scan);
+ UnregisterSnapshot(snapshot);
+
+ /* Close each rel after processing, but keep lock */
+ table_close(testrel, NoLock);
+ }
+ }
+
+ /*
+ * Okay to update pg_type row. We can scribble on typTup because it's a
+ * copy.
+ */
+ typTup->typnotnull = notNull;
+
+ CatalogTupleUpdate(typrel, &tup->t_self, tup);
+
+ InvokeObjectPostAlterHook(TypeRelationId, domainoid, 0);
+
+ ObjectAddressSet(address, TypeRelationId, domainoid);
+
+ /* Clean up */
+ heap_freetuple(tup);
+ table_close(typrel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * AlterDomainDropConstraint
+ *
+ * Implements the ALTER DOMAIN DROP CONSTRAINT statement
+ *
+ * Returns ObjectAddress of the modified domain.
+ */
+ObjectAddress
+AlterDomainDropConstraint(List *names, const char *constrName,
+ DropBehavior behavior, bool missing_ok)
+{
+ TypeName *typename;
+ Oid domainoid;
+ HeapTuple tup;
+ Relation rel;
+ Relation conrel;
+ SysScanDesc conscan;
+ ScanKeyData skey[3];
+ HeapTuple contup;
+ bool found = false;
+ ObjectAddress address;
+
+ /* Make a TypeName so we can use standard type lookup machinery */
+ typename = makeTypeNameFromNameList(names);
+ domainoid = typenameTypeId(NULL, typename);
+
+ /* Look up the domain in the type table */
+ rel = table_open(TypeRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(domainoid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", domainoid);
+
+ /* Check it's a domain and check user has permission for ALTER DOMAIN */
+ checkDomainOwner(tup);
+
+ /* Grab an appropriate lock on the pg_constraint relation */
+ conrel = table_open(ConstraintRelationId, RowExclusiveLock);
+
+ /* Find and remove the target constraint */
+ ScanKeyInit(&skey[0],
+ Anum_pg_constraint_conrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(InvalidOid));
+ ScanKeyInit(&skey[1],
+ Anum_pg_constraint_contypid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(domainoid));
+ ScanKeyInit(&skey[2],
+ Anum_pg_constraint_conname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(constrName));
+
+ conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
+ NULL, 3, skey);
+
+ /* There can be at most one matching row */
+ if ((contup = systable_getnext(conscan)) != NULL)
+ {
+ ObjectAddress conobj;
+
+ conobj.classId = ConstraintRelationId;
+ conobj.objectId = ((Form_pg_constraint) GETSTRUCT(contup))->oid;
+ conobj.objectSubId = 0;
+
+ performDeletion(&conobj, behavior, 0);
+ found = true;
+ }
+
+ /* Clean up after the scan */
+ systable_endscan(conscan);
+ table_close(conrel, RowExclusiveLock);
+
+ if (!found)
+ {
+ if (!missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("constraint \"%s\" of domain \"%s\" does not exist",
+ constrName, TypeNameToString(typename))));
+ else
+ ereport(NOTICE,
+ (errmsg("constraint \"%s\" of domain \"%s\" does not exist, skipping",
+ constrName, TypeNameToString(typename))));
+ }
+
+ /*
+ * We must send out an sinval message for the domain, to ensure that any
+ * dependent plans get rebuilt. Since this command doesn't change the
+ * domain's pg_type row, that won't happen automatically; do it manually.
+ */
+ CacheInvalidateHeapTuple(rel, tup, NULL);
+
+ ObjectAddressSet(address, TypeRelationId, domainoid);
+
+ /* Clean up */
+ table_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * AlterDomainAddConstraint
+ *
+ * Implements the ALTER DOMAIN .. ADD CONSTRAINT statement.
+ */
+ObjectAddress
+AlterDomainAddConstraint(List *names, Node *newConstraint,
+ ObjectAddress *constrAddr)
+{
+ TypeName *typename;
+ Oid domainoid;
+ Relation typrel;
+ HeapTuple tup;
+ Form_pg_type typTup;
+ Constraint *constr;
+ char *ccbin;
+ ObjectAddress address;
+
+ /* Make a TypeName so we can use standard type lookup machinery */
+ typename = makeTypeNameFromNameList(names);
+ domainoid = typenameTypeId(NULL, typename);
+
+ /* Look up the domain in the type table */
+ typrel = table_open(TypeRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(domainoid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", domainoid);
+ typTup = (Form_pg_type) GETSTRUCT(tup);
+
+ /* Check it's a domain and check user has permission for ALTER DOMAIN */
+ checkDomainOwner(tup);
+
+ if (!IsA(newConstraint, Constraint))
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(newConstraint));
+
+ constr = (Constraint *) newConstraint;
+
+ switch (constr->contype)
+ {
+ case CONSTR_CHECK:
+ /* processed below */
+ break;
+
+ case CONSTR_UNIQUE:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unique constraints not possible for domains")));
+ break;
+
+ case CONSTR_PRIMARY:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("primary key constraints not possible for domains")));
+ break;
+
+ case CONSTR_EXCLUSION:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("exclusion constraints not possible for domains")));
+ break;
+
+ case CONSTR_FOREIGN:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("foreign key constraints not possible for domains")));
+ break;
+
+ case CONSTR_ATTR_DEFERRABLE:
+ case CONSTR_ATTR_NOT_DEFERRABLE:
+ case CONSTR_ATTR_DEFERRED:
+ case CONSTR_ATTR_IMMEDIATE:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("specifying constraint deferrability not supported for domains")));
+ break;
+
+ default:
+ elog(ERROR, "unrecognized constraint subtype: %d",
+ (int) constr->contype);
+ break;
+ }
+
+ /*
+ * Since all other constraint types throw errors, this must be a check
+ * constraint. First, process the constraint expression and add an entry
+ * to pg_constraint.
+ */
+
+ ccbin = domainAddConstraint(domainoid, typTup->typnamespace,
+ typTup->typbasetype, typTup->typtypmod,
+ constr, NameStr(typTup->typname), constrAddr);
+
+ /*
+ * If requested to validate the constraint, test all values stored in the
+ * attributes based on the domain the constraint is being added to.
+ */
+ if (!constr->skip_validation)
+ validateDomainConstraint(domainoid, ccbin);
+
+ /*
+ * We must send out an sinval message for the domain, to ensure that any
+ * dependent plans get rebuilt. Since this command doesn't change the
+ * domain's pg_type row, that won't happen automatically; do it manually.
+ */
+ CacheInvalidateHeapTuple(typrel, tup, NULL);
+
+ ObjectAddressSet(address, TypeRelationId, domainoid);
+
+ /* Clean up */
+ table_close(typrel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * AlterDomainValidateConstraint
+ *
+ * Implements the ALTER DOMAIN .. VALIDATE CONSTRAINT statement.
+ */
+ObjectAddress
+AlterDomainValidateConstraint(List *names, const char *constrName)
+{
+ TypeName *typename;
+ Oid domainoid;
+ Relation typrel;
+ Relation conrel;
+ HeapTuple tup;
+ Form_pg_constraint con;
+ Form_pg_constraint copy_con;
+ char *conbin;
+ SysScanDesc scan;
+ Datum val;
+ bool isnull;
+ HeapTuple tuple;
+ HeapTuple copyTuple;
+ ScanKeyData skey[3];
+ ObjectAddress address;
+
+ /* Make a TypeName so we can use standard type lookup machinery */
+ typename = makeTypeNameFromNameList(names);
+ domainoid = typenameTypeId(NULL, typename);
+
+ /* Look up the domain in the type table */
+ typrel = table_open(TypeRelationId, AccessShareLock);
+
+ tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(domainoid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", domainoid);
+
+ /* Check it's a domain and check user has permission for ALTER DOMAIN */
+ checkDomainOwner(tup);
+
+ /*
+ * Find and check the target constraint
+ */
+ conrel = table_open(ConstraintRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_constraint_conrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(InvalidOid));
+ ScanKeyInit(&skey[1],
+ Anum_pg_constraint_contypid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(domainoid));
+ ScanKeyInit(&skey[2],
+ Anum_pg_constraint_conname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(constrName));
+
+ scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
+ NULL, 3, skey);
+
+ /* There can be at most one matching row */
+ if (!HeapTupleIsValid(tuple = systable_getnext(scan)))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("constraint \"%s\" of domain \"%s\" does not exist",
+ constrName, TypeNameToString(typename))));
+
+ con = (Form_pg_constraint) GETSTRUCT(tuple);
+ if (con->contype != CONSTRAINT_CHECK)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("constraint \"%s\" of domain \"%s\" is not a check constraint",
+ constrName, TypeNameToString(typename))));
+
+ val = SysCacheGetAttr(CONSTROID, tuple,
+ Anum_pg_constraint_conbin,
+ &isnull);
+ if (isnull)
+ elog(ERROR, "null conbin for constraint %u",
+ con->oid);
+ conbin = TextDatumGetCString(val);
+
+ validateDomainConstraint(domainoid, conbin);
+
+ /*
+ * Now update the catalog, while we have the door open.
+ */
+ copyTuple = heap_copytuple(tuple);
+ copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple);
+ copy_con->convalidated = true;
+ CatalogTupleUpdate(conrel, &copyTuple->t_self, copyTuple);
+
+ InvokeObjectPostAlterHook(ConstraintRelationId, con->oid, 0);
+
+ ObjectAddressSet(address, TypeRelationId, domainoid);
+
+ heap_freetuple(copyTuple);
+
+ systable_endscan(scan);
+
+ table_close(typrel, AccessShareLock);
+ table_close(conrel, RowExclusiveLock);
+
+ ReleaseSysCache(tup);
+
+ return address;
+}
+
+static void
+validateDomainConstraint(Oid domainoid, char *ccbin)
+{
+ Expr *expr = (Expr *) stringToNode(ccbin);
+ List *rels;
+ ListCell *rt;
+ EState *estate;
+ ExprContext *econtext;
+ ExprState *exprstate;
+
+ /* Need an EState to run ExecEvalExpr */
+ estate = CreateExecutorState();
+ econtext = GetPerTupleExprContext(estate);
+
+ /* build execution state for expr */
+ exprstate = ExecPrepareExpr(expr, estate);
+
+ /* Fetch relation list with attributes based on this domain */
+ /* ShareLock is sufficient to prevent concurrent data changes */
+
+ rels = get_rels_with_domain(domainoid, ShareLock);
+
+ foreach(rt, rels)
+ {
+ RelToCheck *rtc = (RelToCheck *) lfirst(rt);
+ Relation testrel = rtc->rel;
+ TupleDesc tupdesc = RelationGetDescr(testrel);
+ TupleTableSlot *slot;
+ TableScanDesc scan;
+ Snapshot snapshot;
+
+ /* Scan all tuples in this relation */
+ snapshot = RegisterSnapshot(GetLatestSnapshot());
+ scan = table_beginscan(testrel, snapshot, 0, NULL);
+ slot = table_slot_create(testrel, NULL);
+ while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
+ {
+ int i;
+
+ /* Test attributes that are of the domain */
+ for (i = 0; i < rtc->natts; i++)
+ {
+ int attnum = rtc->atts[i];
+ Datum d;
+ bool isNull;
+ Datum conResult;
+ Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1);
+
+ d = slot_getattr(slot, attnum, &isNull);
+
+ econtext->domainValue_datum = d;
+ econtext->domainValue_isNull = isNull;
+
+ conResult = ExecEvalExprSwitchContext(exprstate,
+ econtext,
+ &isNull);
+
+ if (!isNull && !DatumGetBool(conResult))
+ {
+ /*
+ * In principle the auxiliary information for this error
+ * should be errdomainconstraint(), but errtablecol()
+ * seems considerably more useful in practice. Since this
+ * code only executes in an ALTER DOMAIN command, the
+ * client should already know which domain is in question,
+ * and which constraint too.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_CHECK_VIOLATION),
+ errmsg("column \"%s\" of table \"%s\" contains values that violate the new constraint",
+ NameStr(attr->attname),
+ RelationGetRelationName(testrel)),
+ errtablecol(testrel, attnum)));
+ }
+ }
+
+ ResetExprContext(econtext);
+ }
+ ExecDropSingleTupleTableSlot(slot);
+ table_endscan(scan);
+ UnregisterSnapshot(snapshot);
+
+ /* Hold relation lock till commit (XXX bad for concurrency) */
+ table_close(testrel, NoLock);
+ }
+
+ FreeExecutorState(estate);
+}
+
+/*
+ * get_rels_with_domain
+ *
+ * Fetch all relations / attributes which are using the domain
+ *
+ * The result is a list of RelToCheck structs, one for each distinct
+ * relation, each containing one or more attribute numbers that are of
+ * the domain type. We have opened each rel and acquired the specified lock
+ * type on it.
+ *
+ * We support nested domains by including attributes that are of derived
+ * domain types. Current callers do not need to distinguish between attributes
+ * that are of exactly the given domain and those that are of derived domains.
+ *
+ * XXX this is completely broken because there is no way to lock the domain
+ * to prevent columns from being added or dropped while our command runs.
+ * We can partially protect against column drops by locking relations as we
+ * come across them, but there is still a race condition (the window between
+ * seeing a pg_depend entry and acquiring lock on the relation it references).
+ * Also, holding locks on all these relations simultaneously creates a non-
+ * trivial risk of deadlock. We can minimize but not eliminate the deadlock
+ * risk by using the weakest suitable lock (ShareLock for most callers).
+ *
+ * XXX the API for this is not sufficient to support checking domain values
+ * that are inside container types, such as composite types, arrays, or
+ * ranges. Currently we just error out if a container type containing the
+ * target domain is stored anywhere.
+ *
+ * Generally used for retrieving a list of tests when adding
+ * new constraints to a domain.
+ */
+static List *
+get_rels_with_domain(Oid domainOid, LOCKMODE lockmode)
+{
+ List *result = NIL;
+ char *domainTypeName = format_type_be(domainOid);
+ Relation depRel;
+ ScanKeyData key[2];
+ SysScanDesc depScan;
+ HeapTuple depTup;
+
+ Assert(lockmode != NoLock);
+
+ /* since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
+ /*
+ * We scan pg_depend to find those things that depend on the domain. (We
+ * assume we can ignore refobjsubid for a domain.)
+ */
+ depRel = table_open(DependRelationId, AccessShareLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_depend_refclassid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(TypeRelationId));
+ ScanKeyInit(&key[1],
+ Anum_pg_depend_refobjid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(domainOid));
+
+ depScan = systable_beginscan(depRel, DependReferenceIndexId, true,
+ NULL, 2, key);
+
+ while (HeapTupleIsValid(depTup = systable_getnext(depScan)))
+ {
+ Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup);
+ RelToCheck *rtc = NULL;
+ ListCell *rellist;
+ Form_pg_attribute pg_att;
+ int ptr;
+
+ /* Check for directly dependent types */
+ if (pg_depend->classid == TypeRelationId)
+ {
+ if (get_typtype(pg_depend->objid) == TYPTYPE_DOMAIN)
+ {
+ /*
+ * This is a sub-domain, so recursively add dependent columns
+ * to the output list. This is a bit inefficient since we may
+ * fail to combine RelToCheck entries when attributes of the
+ * same rel have different derived domain types, but it's
+ * probably not worth improving.
+ */
+ result = list_concat(result,
+ get_rels_with_domain(pg_depend->objid,
+ lockmode));
+ }
+ else
+ {
+ /*
+ * Otherwise, it is some container type using the domain, so
+ * fail if there are any columns of this type.
+ */
+ find_composite_type_dependencies(pg_depend->objid,
+ NULL,
+ domainTypeName);
+ }
+ continue;
+ }
+
+ /* Else, ignore dependees that aren't user columns of relations */
+ /* (we assume system columns are never of domain types) */
+ if (pg_depend->classid != RelationRelationId ||
+ pg_depend->objsubid <= 0)
+ continue;
+
+ /* See if we already have an entry for this relation */
+ foreach(rellist, result)
+ {
+ RelToCheck *rt = (RelToCheck *) lfirst(rellist);
+
+ if (RelationGetRelid(rt->rel) == pg_depend->objid)
+ {
+ rtc = rt;
+ break;
+ }
+ }
+
+ if (rtc == NULL)
+ {
+ /* First attribute found for this relation */
+ Relation rel;
+
+ /* Acquire requested lock on relation */
+ rel = relation_open(pg_depend->objid, lockmode);
+
+ /*
+ * Check to see if rowtype is stored anyplace as a composite-type
+ * column; if so we have to fail, for now anyway.
+ */
+ if (OidIsValid(rel->rd_rel->reltype))
+ find_composite_type_dependencies(rel->rd_rel->reltype,
+ NULL,
+ domainTypeName);
+
+ /*
+ * Otherwise, we can ignore relations except those with both
+ * storage and user-chosen column types.
+ *
+ * XXX If an index-only scan could satisfy "col::some_domain" from
+ * a suitable expression index, this should also check expression
+ * index columns.
+ */
+ if (rel->rd_rel->relkind != RELKIND_RELATION &&
+ rel->rd_rel->relkind != RELKIND_MATVIEW)
+ {
+ relation_close(rel, lockmode);
+ continue;
+ }
+
+ /* Build the RelToCheck entry with enough space for all atts */
+ rtc = (RelToCheck *) palloc(sizeof(RelToCheck));
+ rtc->rel = rel;
+ rtc->natts = 0;
+ rtc->atts = (int *) palloc(sizeof(int) * RelationGetNumberOfAttributes(rel));
+ result = lappend(result, rtc);
+ }
+
+ /*
+ * Confirm column has not been dropped, and is of the expected type.
+ * This defends against an ALTER DROP COLUMN occurring just before we
+ * acquired lock ... but if the whole table were dropped, we'd still
+ * have a problem.
+ */
+ if (pg_depend->objsubid > RelationGetNumberOfAttributes(rtc->rel))
+ continue;
+ pg_att = TupleDescAttr(rtc->rel->rd_att, pg_depend->objsubid - 1);
+ if (pg_att->attisdropped || pg_att->atttypid != domainOid)
+ continue;
+
+ /*
+ * Okay, add column to result. We store the columns in column-number
+ * order; this is just a hack to improve predictability of regression
+ * test output ...
+ */
+ Assert(rtc->natts < RelationGetNumberOfAttributes(rtc->rel));
+
+ ptr = rtc->natts++;
+ while (ptr > 0 && rtc->atts[ptr - 1] > pg_depend->objsubid)
+ {
+ rtc->atts[ptr] = rtc->atts[ptr - 1];
+ ptr--;
+ }
+ rtc->atts[ptr] = pg_depend->objsubid;
+ }
+
+ systable_endscan(depScan);
+
+ relation_close(depRel, AccessShareLock);
+
+ return result;
+}
+
+/*
+ * checkDomainOwner
+ *
+ * Check that the type is actually a domain and that the current user
+ * has permission to do ALTER DOMAIN on it. Throw an error if not.
+ */
+void
+checkDomainOwner(HeapTuple tup)
+{
+ Form_pg_type typTup = (Form_pg_type) GETSTRUCT(tup);
+
+ /* Check that this is actually a domain */
+ if (typTup->typtype != TYPTYPE_DOMAIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("%s is not a domain",
+ format_type_be(typTup->oid))));
+
+ /* Permission check: must own type */
+ if (!pg_type_ownercheck(typTup->oid, GetUserId()))
+ aclcheck_error_type(ACLCHECK_NOT_OWNER, typTup->oid);
+}
+
+/*
+ * domainAddConstraint - code shared between CREATE and ALTER DOMAIN
+ */
+static char *
+domainAddConstraint(Oid domainOid, Oid domainNamespace, Oid baseTypeOid,
+ int typMod, Constraint *constr,
+ const char *domainName, ObjectAddress *constrAddr)
+{
+ Node *expr;
+ char *ccbin;
+ ParseState *pstate;
+ CoerceToDomainValue *domVal;
+ Oid ccoid;
+
+ /*
+ * Assign or validate constraint name
+ */
+ if (constr->conname)
+ {
+ if (ConstraintNameIsUsed(CONSTRAINT_DOMAIN,
+ domainOid,
+ constr->conname))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("constraint \"%s\" for domain \"%s\" already exists",
+ constr->conname, domainName)));
+ }
+ else
+ constr->conname = ChooseConstraintName(domainName,
+ NULL,
+ "check",
+ domainNamespace,
+ NIL);
+
+ /*
+ * Convert the A_EXPR in raw_expr into an EXPR
+ */
+ pstate = make_parsestate(NULL);
+
+ /*
+ * Set up a CoerceToDomainValue to represent the occurrence of VALUE in
+ * the expression. Note that it will appear to have the type of the base
+ * type, not the domain. This seems correct since within the check
+ * expression, we should not assume the input value can be considered a
+ * member of the domain.
+ */
+ domVal = makeNode(CoerceToDomainValue);
+ domVal->typeId = baseTypeOid;
+ domVal->typeMod = typMod;
+ domVal->collation = get_typcollation(baseTypeOid);
+ domVal->location = -1; /* will be set when/if used */
+
+ pstate->p_pre_columnref_hook = replace_domain_constraint_value;
+ pstate->p_ref_hook_state = (void *) domVal;
+
+ expr = transformExpr(pstate, constr->raw_expr, EXPR_KIND_DOMAIN_CHECK);
+
+ /*
+ * Make sure it yields a boolean result.
+ */
+ expr = coerce_to_boolean(pstate, expr, "CHECK");
+
+ /*
+ * Fix up collation information.
+ */
+ assign_expr_collations(pstate, expr);
+
+ /*
+ * Domains don't allow variables (this is probably dead code now that
+ * add_missing_from is history, but let's be sure).
+ */
+ if (list_length(pstate->p_rtable) != 0 ||
+ contain_var_clause(expr))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("cannot use table references in domain check constraint")));
+
+ /*
+ * Convert to string form for storage.
+ */
+ ccbin = nodeToString(expr);
+
+ /*
+ * Store the constraint in pg_constraint
+ */
+ ccoid =
+ CreateConstraintEntry(constr->conname, /* Constraint Name */
+ domainNamespace, /* namespace */
+ CONSTRAINT_CHECK, /* Constraint Type */
+ false, /* Is Deferrable */
+ false, /* Is Deferred */
+ !constr->skip_validation, /* Is Validated */
+ InvalidOid, /* no parent constraint */
+ InvalidOid, /* not a relation constraint */
+ NULL,
+ 0,
+ 0,
+ domainOid, /* domain constraint */
+ InvalidOid, /* no associated index */
+ InvalidOid, /* Foreign key fields */
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ 0,
+ ' ',
+ ' ',
+ NULL,
+ 0,
+ ' ',
+ NULL, /* not an exclusion constraint */
+ expr, /* Tree form of check constraint */
+ ccbin, /* Binary form of check constraint */
+ true, /* is local */
+ 0, /* inhcount */
+ false, /* connoinherit */
+ false); /* is_internal */
+ if (constrAddr)
+ ObjectAddressSet(*constrAddr, ConstraintRelationId, ccoid);
+
+ /*
+ * Return the compiled constraint expression so the calling routine can
+ * perform any additional required tests.
+ */
+ return ccbin;
+}
+
+/* Parser pre_columnref_hook for domain CHECK constraint parsing */
+static Node *
+replace_domain_constraint_value(ParseState *pstate, ColumnRef *cref)
+{
+ /*
+ * Check for a reference to "value", and if that's what it is, replace
+ * with a CoerceToDomainValue as prepared for us by domainAddConstraint.
+ * (We handle VALUE as a name, not a keyword, to avoid breaking a lot of
+ * applications that have used VALUE as a column name in the past.)
+ */
+ if (list_length(cref->fields) == 1)
+ {
+ Node *field1 = (Node *) linitial(cref->fields);
+ char *colname;
+
+ Assert(IsA(field1, String));
+ colname = strVal(field1);
+ if (strcmp(colname, "value") == 0)
+ {
+ CoerceToDomainValue *domVal = copyObject(pstate->p_ref_hook_state);
+
+ /* Propagate location knowledge, if any */
+ domVal->location = cref->location;
+ return (Node *) domVal;
+ }
+ }
+ return NULL;
+}
+
+
+/*
+ * Execute ALTER TYPE RENAME
+ */
+ObjectAddress
+RenameType(RenameStmt *stmt)
+{
+ List *names = castNode(List, stmt->object);
+ const char *newTypeName = stmt->newname;
+ TypeName *typename;
+ Oid typeOid;
+ Relation rel;
+ HeapTuple tup;
+ Form_pg_type typTup;
+ ObjectAddress address;
+
+ /* Make a TypeName so we can use standard type lookup machinery */
+ typename = makeTypeNameFromNameList(names);
+ typeOid = typenameTypeId(NULL, typename);
+
+ /* Look up the type in the type table */
+ rel = table_open(TypeRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(typeOid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", typeOid);
+ typTup = (Form_pg_type) GETSTRUCT(tup);
+
+ /* check permissions on type */
+ if (!pg_type_ownercheck(typeOid, GetUserId()))
+ aclcheck_error_type(ACLCHECK_NOT_OWNER, typeOid);
+
+ /* ALTER DOMAIN used on a non-domain? */
+ if (stmt->renameType == OBJECT_DOMAIN && typTup->typtype != TYPTYPE_DOMAIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("%s is not a domain",
+ format_type_be(typeOid))));
+
+ /*
+ * If it's a composite type, we need to check that it really is a
+ * free-standing composite type, and not a table's rowtype. We want people
+ * to use ALTER TABLE not ALTER TYPE for that case.
+ */
+ if (typTup->typtype == TYPTYPE_COMPOSITE &&
+ get_rel_relkind(typTup->typrelid) != RELKIND_COMPOSITE_TYPE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("%s is a table's row type",
+ format_type_be(typeOid)),
+ errhint("Use ALTER TABLE instead.")));
+
+ /* don't allow direct alteration of array types, either */
+ if (IsTrueArrayType(typTup))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot alter array type %s",
+ format_type_be(typeOid)),
+ errhint("You can alter type %s, which will alter the array type as well.",
+ format_type_be(typTup->typelem))));
+
+ /*
+ * If type is composite we need to rename associated pg_class entry too.
+ * RenameRelationInternal will call RenameTypeInternal automatically.
+ */
+ if (typTup->typtype == TYPTYPE_COMPOSITE)
+ RenameRelationInternal(typTup->typrelid, newTypeName, false, false);
+ else
+ RenameTypeInternal(typeOid, newTypeName,
+ typTup->typnamespace);
+
+ ObjectAddressSet(address, TypeRelationId, typeOid);
+ /* Clean up */
+ table_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * Change the owner of a type.
+ */
+ObjectAddress
+AlterTypeOwner(List *names, Oid newOwnerId, ObjectType objecttype)
+{
+ TypeName *typename;
+ Oid typeOid;
+ Relation rel;
+ HeapTuple tup;
+ HeapTuple newtup;
+ Form_pg_type typTup;
+ AclResult aclresult;
+ ObjectAddress address;
+
+ rel = table_open(TypeRelationId, RowExclusiveLock);
+
+ /* Make a TypeName so we can use standard type lookup machinery */
+ typename = makeTypeNameFromNameList(names);
+
+ /* Use LookupTypeName here so that shell types can be processed */
+ tup = LookupTypeName(NULL, typename, NULL, false);
+ if (tup == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("type \"%s\" does not exist",
+ TypeNameToString(typename))));
+ typeOid = typeTypeId(tup);
+
+ /* Copy the syscache entry so we can scribble on it below */
+ newtup = heap_copytuple(tup);
+ ReleaseSysCache(tup);
+ tup = newtup;
+ typTup = (Form_pg_type) GETSTRUCT(tup);
+
+ /* Don't allow ALTER DOMAIN on a type */
+ if (objecttype == OBJECT_DOMAIN && typTup->typtype != TYPTYPE_DOMAIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("%s is not a domain",
+ format_type_be(typeOid))));
+
+ /*
+ * If it's a composite type, we need to check that it really is a
+ * free-standing composite type, and not a table's rowtype. We want people
+ * to use ALTER TABLE not ALTER TYPE for that case.
+ */
+ if (typTup->typtype == TYPTYPE_COMPOSITE &&
+ get_rel_relkind(typTup->typrelid) != RELKIND_COMPOSITE_TYPE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("%s is a table's row type",
+ format_type_be(typeOid)),
+ errhint("Use ALTER TABLE instead.")));
+
+ /* don't allow direct alteration of array types, either */
+ if (IsTrueArrayType(typTup))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot alter array type %s",
+ format_type_be(typeOid)),
+ errhint("You can alter type %s, which will alter the array type as well.",
+ format_type_be(typTup->typelem))));
+
+ /*
+ * If the new owner is the same as the existing owner, consider the
+ * command to have succeeded. This is for dump restoration purposes.
+ */
+ if (typTup->typowner != newOwnerId)
+ {
+ /* Superusers can always do it */
+ if (!superuser())
+ {
+ /* Otherwise, must be owner of the existing object */
+ if (!pg_type_ownercheck(typTup->oid, GetUserId()))
+ aclcheck_error_type(ACLCHECK_NOT_OWNER, typTup->oid);
+
+ /* Must be able to become new owner */
+ check_is_member_of_role(GetUserId(), newOwnerId);
+
+ /* New owner must have CREATE privilege on namespace */
+ aclresult = pg_namespace_aclcheck(typTup->typnamespace,
+ newOwnerId,
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_SCHEMA,
+ get_namespace_name(typTup->typnamespace));
+ }
+
+ AlterTypeOwner_oid(typeOid, newOwnerId, true);
+ }
+
+ ObjectAddressSet(address, TypeRelationId, typeOid);
+
+ /* Clean up */
+ table_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * AlterTypeOwner_oid - change type owner unconditionally
+ *
+ * This function recurses to handle a pg_class entry, if necessary. It
+ * invokes any necessary access object hooks. If hasDependEntry is true, this
+ * function modifies the pg_shdepend entry appropriately (this should be
+ * passed as false only for table rowtypes and array types).
+ *
+ * This is used by ALTER TABLE/TYPE OWNER commands, as well as by REASSIGN
+ * OWNED BY. It assumes the caller has done all needed check.
+ */
+void
+AlterTypeOwner_oid(Oid typeOid, Oid newOwnerId, bool hasDependEntry)
+{
+ Relation rel;
+ HeapTuple tup;
+ Form_pg_type typTup;
+
+ rel = table_open(TypeRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeOid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", typeOid);
+ typTup = (Form_pg_type) GETSTRUCT(tup);
+
+ /*
+ * If it's a composite type, invoke ATExecChangeOwner so that we fix up
+ * the pg_class entry properly. That will call back to
+ * AlterTypeOwnerInternal to take care of the pg_type entry(s).
+ */
+ if (typTup->typtype == TYPTYPE_COMPOSITE)
+ ATExecChangeOwner(typTup->typrelid, newOwnerId, true, AccessExclusiveLock);
+ else
+ AlterTypeOwnerInternal(typeOid, newOwnerId);
+
+ /* Update owner dependency reference */
+ if (hasDependEntry)
+ changeDependencyOnOwner(TypeRelationId, typeOid, newOwnerId);
+
+ InvokeObjectPostAlterHook(TypeRelationId, typeOid, 0);
+
+ ReleaseSysCache(tup);
+ table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * AlterTypeOwnerInternal - bare-bones type owner change.
+ *
+ * This routine simply modifies the owner of a pg_type entry, and recurses
+ * to handle a possible array type.
+ */
+void
+AlterTypeOwnerInternal(Oid typeOid, Oid newOwnerId)
+{
+ Relation rel;
+ HeapTuple tup;
+ Form_pg_type typTup;
+ Datum repl_val[Natts_pg_type];
+ bool repl_null[Natts_pg_type];
+ bool repl_repl[Natts_pg_type];
+ Acl *newAcl;
+ Datum aclDatum;
+ bool isNull;
+
+ rel = table_open(TypeRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(typeOid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", typeOid);
+ typTup = (Form_pg_type) GETSTRUCT(tup);
+
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ repl_repl[Anum_pg_type_typowner - 1] = true;
+ repl_val[Anum_pg_type_typowner - 1] = ObjectIdGetDatum(newOwnerId);
+
+ aclDatum = heap_getattr(tup,
+ Anum_pg_type_typacl,
+ RelationGetDescr(rel),
+ &isNull);
+ /* Null ACLs do not require changes */
+ if (!isNull)
+ {
+ newAcl = aclnewowner(DatumGetAclP(aclDatum),
+ typTup->typowner, newOwnerId);
+ repl_repl[Anum_pg_type_typacl - 1] = true;
+ repl_val[Anum_pg_type_typacl - 1] = PointerGetDatum(newAcl);
+ }
+
+ tup = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, repl_null,
+ repl_repl);
+
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+ /* If it has an array type, update that too */
+ if (OidIsValid(typTup->typarray))
+ AlterTypeOwnerInternal(typTup->typarray, newOwnerId);
+
+ /* Clean up */
+ table_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Execute ALTER TYPE SET SCHEMA
+ */
+ObjectAddress
+AlterTypeNamespace(List *names, const char *newschema, ObjectType objecttype,
+ Oid *oldschema)
+{
+ TypeName *typename;
+ Oid typeOid;
+ Oid nspOid;
+ Oid oldNspOid;
+ ObjectAddresses *objsMoved;
+ ObjectAddress myself;
+
+ /* Make a TypeName so we can use standard type lookup machinery */
+ typename = makeTypeNameFromNameList(names);
+ typeOid = typenameTypeId(NULL, typename);
+
+ /* Don't allow ALTER DOMAIN on a type */
+ if (objecttype == OBJECT_DOMAIN && get_typtype(typeOid) != TYPTYPE_DOMAIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("%s is not a domain",
+ format_type_be(typeOid))));
+
+ /* get schema OID and check its permissions */
+ nspOid = LookupCreationNamespace(newschema);
+
+ objsMoved = new_object_addresses();
+ oldNspOid = AlterTypeNamespace_oid(typeOid, nspOid, objsMoved);
+ free_object_addresses(objsMoved);
+
+ if (oldschema)
+ *oldschema = oldNspOid;
+
+ ObjectAddressSet(myself, TypeRelationId, typeOid);
+
+ return myself;
+}
+
+Oid
+AlterTypeNamespace_oid(Oid typeOid, Oid nspOid, ObjectAddresses *objsMoved)
+{
+ Oid elemOid;
+
+ /* check permissions on type */
+ if (!pg_type_ownercheck(typeOid, GetUserId()))
+ aclcheck_error_type(ACLCHECK_NOT_OWNER, typeOid);
+
+ /* don't allow direct alteration of array types */
+ elemOid = get_element_type(typeOid);
+ if (OidIsValid(elemOid) && get_array_type(elemOid) == typeOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot alter array type %s",
+ format_type_be(typeOid)),
+ errhint("You can alter type %s, which will alter the array type as well.",
+ format_type_be(elemOid))));
+
+ /* and do the work */
+ return AlterTypeNamespaceInternal(typeOid, nspOid, false, true, objsMoved);
+}
+
+/*
+ * Move specified type to new namespace.
+ *
+ * Caller must have already checked privileges.
+ *
+ * The function automatically recurses to process the type's array type,
+ * if any. isImplicitArray should be true only when doing this internal
+ * recursion (outside callers must never try to move an array type directly).
+ *
+ * If errorOnTableType is true, the function errors out if the type is
+ * a table type. ALTER TABLE has to be used to move a table to a new
+ * namespace.
+ *
+ * Returns the type's old namespace OID.
+ */
+Oid
+AlterTypeNamespaceInternal(Oid typeOid, Oid nspOid,
+ bool isImplicitArray,
+ bool errorOnTableType,
+ ObjectAddresses *objsMoved)
+{
+ Relation rel;
+ HeapTuple tup;
+ Form_pg_type typform;
+ Oid oldNspOid;
+ Oid arrayOid;
+ bool isCompositeType;
+ ObjectAddress thisobj;
+
+ /*
+ * Make sure we haven't moved this object previously.
+ */
+ thisobj.classId = TypeRelationId;
+ thisobj.objectId = typeOid;
+ thisobj.objectSubId = 0;
+
+ if (object_address_present(&thisobj, objsMoved))
+ return InvalidOid;
+
+ rel = table_open(TypeRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(typeOid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", typeOid);
+ typform = (Form_pg_type) GETSTRUCT(tup);
+
+ oldNspOid = typform->typnamespace;
+ arrayOid = typform->typarray;
+
+ /* If the type is already there, we scan skip these next few checks. */
+ if (oldNspOid != nspOid)
+ {
+ /* common checks on switching namespaces */
+ CheckSetNamespace(oldNspOid, nspOid);
+
+ /* check for duplicate name (more friendly than unique-index failure) */
+ if (SearchSysCacheExists2(TYPENAMENSP,
+ NameGetDatum(&typform->typname),
+ ObjectIdGetDatum(nspOid)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("type \"%s\" already exists in schema \"%s\"",
+ NameStr(typform->typname),
+ get_namespace_name(nspOid))));
+ }
+
+ /* Detect whether type is a composite type (but not a table rowtype) */
+ isCompositeType =
+ (typform->typtype == TYPTYPE_COMPOSITE &&
+ get_rel_relkind(typform->typrelid) == RELKIND_COMPOSITE_TYPE);
+
+ /* Enforce not-table-type if requested */
+ if (typform->typtype == TYPTYPE_COMPOSITE && !isCompositeType &&
+ errorOnTableType)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("%s is a table's row type",
+ format_type_be(typeOid)),
+ errhint("Use ALTER TABLE instead.")));
+
+ if (oldNspOid != nspOid)
+ {
+ /* OK, modify the pg_type row */
+
+ /* tup is a copy, so we can scribble directly on it */
+ typform->typnamespace = nspOid;
+
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+ }
+
+ /*
+ * Composite types have pg_class entries.
+ *
+ * We need to modify the pg_class tuple as well to reflect the change of
+ * schema.
+ */
+ if (isCompositeType)
+ {
+ Relation classRel;
+
+ classRel = table_open(RelationRelationId, RowExclusiveLock);
+
+ AlterRelationNamespaceInternal(classRel, typform->typrelid,
+ oldNspOid, nspOid,
+ false, objsMoved);
+
+ table_close(classRel, RowExclusiveLock);
+
+ /*
+ * Check for constraints associated with the composite type (we don't
+ * currently support this, but probably will someday).
+ */
+ AlterConstraintNamespaces(typform->typrelid, oldNspOid,
+ nspOid, false, objsMoved);
+ }
+ else
+ {
+ /* If it's a domain, it might have constraints */
+ if (typform->typtype == TYPTYPE_DOMAIN)
+ AlterConstraintNamespaces(typeOid, oldNspOid, nspOid, true,
+ objsMoved);
+ }
+
+ /*
+ * Update dependency on schema, if any --- a table rowtype has not got
+ * one, and neither does an implicit array.
+ */
+ if (oldNspOid != nspOid &&
+ (isCompositeType || typform->typtype != TYPTYPE_COMPOSITE) &&
+ !isImplicitArray)
+ if (changeDependencyFor(TypeRelationId, typeOid,
+ NamespaceRelationId, oldNspOid, nspOid) != 1)
+ elog(ERROR, "failed to change schema dependency for type %s",
+ format_type_be(typeOid));
+
+ InvokeObjectPostAlterHook(TypeRelationId, typeOid, 0);
+
+ heap_freetuple(tup);
+
+ table_close(rel, RowExclusiveLock);
+
+ add_exact_object_address(&thisobj, objsMoved);
+
+ /* Recursively alter the associated array type, if any */
+ if (OidIsValid(arrayOid))
+ AlterTypeNamespaceInternal(arrayOid, nspOid, true, true, objsMoved);
+
+ return oldNspOid;
+}
+
+/*
+ * AlterType
+ * ALTER TYPE <type> SET (option = ...)
+ *
+ * NOTE: the set of changes that can be allowed here is constrained by many
+ * non-obvious implementation restrictions. Tread carefully when considering
+ * adding new flexibility.
+ */
+ObjectAddress
+AlterType(AlterTypeStmt *stmt)
+{
+ ObjectAddress address;
+ Relation catalog;
+ TypeName *typename;
+ HeapTuple tup;
+ Oid typeOid;
+ Form_pg_type typForm;
+ bool requireSuper = false;
+ AlterTypeRecurseParams atparams;
+ ListCell *pl;
+
+ catalog = table_open(TypeRelationId, RowExclusiveLock);
+
+ /* Make a TypeName so we can use standard type lookup machinery */
+ typename = makeTypeNameFromNameList(stmt->typeName);
+ tup = typenameType(NULL, typename, NULL);
+
+ typeOid = typeTypeId(tup);
+ typForm = (Form_pg_type) GETSTRUCT(tup);
+
+ /* Process options */
+ memset(&atparams, 0, sizeof(atparams));
+ foreach(pl, stmt->options)
+ {
+ DefElem *defel = (DefElem *) lfirst(pl);
+
+ if (strcmp(defel->defname, "storage") == 0)
+ {
+ char *a = defGetString(defel);
+
+ if (pg_strcasecmp(a, "plain") == 0)
+ atparams.storage = TYPSTORAGE_PLAIN;
+ else if (pg_strcasecmp(a, "external") == 0)
+ atparams.storage = TYPSTORAGE_EXTERNAL;
+ else if (pg_strcasecmp(a, "extended") == 0)
+ atparams.storage = TYPSTORAGE_EXTENDED;
+ else if (pg_strcasecmp(a, "main") == 0)
+ atparams.storage = TYPSTORAGE_MAIN;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("storage \"%s\" not recognized", a)));
+
+ /*
+ * Validate the storage request. If the type isn't varlena, it
+ * certainly doesn't support non-PLAIN storage.
+ */
+ if (atparams.storage != TYPSTORAGE_PLAIN && typForm->typlen != -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("fixed-size types must have storage PLAIN")));
+
+ /*
+ * Switching from PLAIN to non-PLAIN is allowed, but it requires
+ * superuser, since we can't validate that the type's C functions
+ * will support it. Switching from non-PLAIN to PLAIN is
+ * disallowed outright, because it's not practical to ensure that
+ * no tables have toasted values of the type. Switching among
+ * different non-PLAIN settings is OK, since it just constitutes a
+ * change in the strategy requested for columns created in the
+ * future.
+ */
+ if (atparams.storage != TYPSTORAGE_PLAIN &&
+ typForm->typstorage == TYPSTORAGE_PLAIN)
+ requireSuper = true;
+ else if (atparams.storage == TYPSTORAGE_PLAIN &&
+ typForm->typstorage != TYPSTORAGE_PLAIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot change type's storage to PLAIN")));
+
+ atparams.updateStorage = true;
+ }
+ else if (strcmp(defel->defname, "receive") == 0)
+ {
+ if (defel->arg != NULL)
+ atparams.receiveOid =
+ findTypeReceiveFunction(defGetQualifiedName(defel),
+ typeOid);
+ else
+ atparams.receiveOid = InvalidOid; /* NONE, remove function */
+ atparams.updateReceive = true;
+ /* Replacing an I/O function requires superuser. */
+ requireSuper = true;
+ }
+ else if (strcmp(defel->defname, "send") == 0)
+ {
+ if (defel->arg != NULL)
+ atparams.sendOid =
+ findTypeSendFunction(defGetQualifiedName(defel),
+ typeOid);
+ else
+ atparams.sendOid = InvalidOid; /* NONE, remove function */
+ atparams.updateSend = true;
+ /* Replacing an I/O function requires superuser. */
+ requireSuper = true;
+ }
+ else if (strcmp(defel->defname, "typmod_in") == 0)
+ {
+ if (defel->arg != NULL)
+ atparams.typmodinOid =
+ findTypeTypmodinFunction(defGetQualifiedName(defel));
+ else
+ atparams.typmodinOid = InvalidOid; /* NONE, remove function */
+ atparams.updateTypmodin = true;
+ /* Replacing an I/O function requires superuser. */
+ requireSuper = true;
+ }
+ else if (strcmp(defel->defname, "typmod_out") == 0)
+ {
+ if (defel->arg != NULL)
+ atparams.typmodoutOid =
+ findTypeTypmodoutFunction(defGetQualifiedName(defel));
+ else
+ atparams.typmodoutOid = InvalidOid; /* NONE, remove function */
+ atparams.updateTypmodout = true;
+ /* Replacing an I/O function requires superuser. */
+ requireSuper = true;
+ }
+ else if (strcmp(defel->defname, "analyze") == 0)
+ {
+ if (defel->arg != NULL)
+ atparams.analyzeOid =
+ findTypeAnalyzeFunction(defGetQualifiedName(defel),
+ typeOid);
+ else
+ atparams.analyzeOid = InvalidOid; /* NONE, remove function */
+ atparams.updateAnalyze = true;
+ /* Replacing an analyze function requires superuser. */
+ requireSuper = true;
+ }
+ else if (strcmp(defel->defname, "subscript") == 0)
+ {
+ if (defel->arg != NULL)
+ atparams.subscriptOid =
+ findTypeSubscriptingFunction(defGetQualifiedName(defel),
+ typeOid);
+ else
+ atparams.subscriptOid = InvalidOid; /* NONE, remove function */
+ atparams.updateSubscript = true;
+ /* Replacing a subscript function requires superuser. */
+ requireSuper = true;
+ }
+
+ /*
+ * The rest of the options that CREATE accepts cannot be changed.
+ * Check for them so that we can give a meaningful error message.
+ */
+ else if (strcmp(defel->defname, "input") == 0 ||
+ strcmp(defel->defname, "output") == 0 ||
+ strcmp(defel->defname, "internallength") == 0 ||
+ strcmp(defel->defname, "passedbyvalue") == 0 ||
+ strcmp(defel->defname, "alignment") == 0 ||
+ strcmp(defel->defname, "like") == 0 ||
+ strcmp(defel->defname, "category") == 0 ||
+ strcmp(defel->defname, "preferred") == 0 ||
+ strcmp(defel->defname, "default") == 0 ||
+ strcmp(defel->defname, "element") == 0 ||
+ strcmp(defel->defname, "delimiter") == 0 ||
+ strcmp(defel->defname, "collatable") == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("type attribute \"%s\" cannot be changed",
+ defel->defname)));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("type attribute \"%s\" not recognized",
+ defel->defname)));
+ }
+
+ /*
+ * Permissions check. Require superuser if we decided the command
+ * requires that, else must own the type.
+ */
+ if (requireSuper)
+ {
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to alter a type")));
+ }
+ else
+ {
+ if (!pg_type_ownercheck(typeOid, GetUserId()))
+ aclcheck_error_type(ACLCHECK_NOT_OWNER, typeOid);
+ }
+
+ /*
+ * We disallow all forms of ALTER TYPE SET on types that aren't plain base
+ * types. It would for example be highly unsafe, not to mention
+ * pointless, to change the send/receive functions for a composite type.
+ * Moreover, pg_dump has no support for changing these properties on
+ * non-base types. We might weaken this someday, but not now.
+ *
+ * Note: if you weaken this enough to allow composite types, be sure to
+ * adjust the GenerateTypeDependencies call in AlterTypeRecurse.
+ */
+ if (typForm->typtype != TYPTYPE_BASE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("%s is not a base type",
+ format_type_be(typeOid))));
+
+ /*
+ * For the same reasons, don't allow direct alteration of array types.
+ */
+ if (IsTrueArrayType(typForm))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("%s is not a base type",
+ format_type_be(typeOid))));
+
+ /* OK, recursively update this type and any arrays/domains over it */
+ AlterTypeRecurse(typeOid, false, tup, catalog, &atparams);
+
+ /* Clean up */
+ ReleaseSysCache(tup);
+
+ table_close(catalog, RowExclusiveLock);
+
+ ObjectAddressSet(address, TypeRelationId, typeOid);
+
+ return address;
+}
+
+/*
+ * AlterTypeRecurse: one recursion step for AlterType()
+ *
+ * Apply the changes specified by "atparams" to the type identified by
+ * "typeOid", whose existing pg_type tuple is "tup". If necessary,
+ * recursively update its array type as well. Then search for any domains
+ * over this type, and recursively apply (most of) the same changes to those
+ * domains.
+ *
+ * We need this because the system generally assumes that a domain inherits
+ * many properties from its base type. See DefineDomain() above for details
+ * of what is inherited. Arrays inherit a smaller number of properties,
+ * but not none.
+ *
+ * There's a race condition here, in that some other transaction could
+ * concurrently add another domain atop this base type; we'd miss updating
+ * that one. Hence, be wary of allowing ALTER TYPE to change properties for
+ * which it'd be really fatal for a domain to be out of sync with its base
+ * type (typlen, for example). In practice, races seem unlikely to be an
+ * issue for plausible use-cases for ALTER TYPE. If one does happen, it could
+ * be fixed by re-doing the same ALTER TYPE once all prior transactions have
+ * committed.
+ */
+static void
+AlterTypeRecurse(Oid typeOid, bool isImplicitArray,
+ HeapTuple tup, Relation catalog,
+ AlterTypeRecurseParams *atparams)
+{
+ Datum values[Natts_pg_type];
+ bool nulls[Natts_pg_type];
+ bool replaces[Natts_pg_type];
+ HeapTuple newtup;
+ SysScanDesc scan;
+ ScanKeyData key[1];
+ HeapTuple domainTup;
+
+ /* Since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
+ /* Update the current type's tuple */
+ memset(values, 0, sizeof(values));
+ memset(nulls, 0, sizeof(nulls));
+ memset(replaces, 0, sizeof(replaces));
+
+ if (atparams->updateStorage)
+ {
+ replaces[Anum_pg_type_typstorage - 1] = true;
+ values[Anum_pg_type_typstorage - 1] = CharGetDatum(atparams->storage);
+ }
+ if (atparams->updateReceive)
+ {
+ replaces[Anum_pg_type_typreceive - 1] = true;
+ values[Anum_pg_type_typreceive - 1] = ObjectIdGetDatum(atparams->receiveOid);
+ }
+ if (atparams->updateSend)
+ {
+ replaces[Anum_pg_type_typsend - 1] = true;
+ values[Anum_pg_type_typsend - 1] = ObjectIdGetDatum(atparams->sendOid);
+ }
+ if (atparams->updateTypmodin)
+ {
+ replaces[Anum_pg_type_typmodin - 1] = true;
+ values[Anum_pg_type_typmodin - 1] = ObjectIdGetDatum(atparams->typmodinOid);
+ }
+ if (atparams->updateTypmodout)
+ {
+ replaces[Anum_pg_type_typmodout - 1] = true;
+ values[Anum_pg_type_typmodout - 1] = ObjectIdGetDatum(atparams->typmodoutOid);
+ }
+ if (atparams->updateAnalyze)
+ {
+ replaces[Anum_pg_type_typanalyze - 1] = true;
+ values[Anum_pg_type_typanalyze - 1] = ObjectIdGetDatum(atparams->analyzeOid);
+ }
+ if (atparams->updateSubscript)
+ {
+ replaces[Anum_pg_type_typsubscript - 1] = true;
+ values[Anum_pg_type_typsubscript - 1] = ObjectIdGetDatum(atparams->subscriptOid);
+ }
+
+ newtup = heap_modify_tuple(tup, RelationGetDescr(catalog),
+ values, nulls, replaces);
+
+ CatalogTupleUpdate(catalog, &newtup->t_self, newtup);
+
+ /* Rebuild dependencies for this type */
+ GenerateTypeDependencies(newtup,
+ catalog,
+ NULL, /* don't have defaultExpr handy */
+ NULL, /* don't have typacl handy */
+ 0, /* we rejected composite types above */
+ isImplicitArray, /* it might be an array */
+ isImplicitArray, /* dependent iff it's array */
+ false, /* don't touch extension membership */
+ true);
+
+ InvokeObjectPostAlterHook(TypeRelationId, typeOid, 0);
+
+ /*
+ * Arrays inherit their base type's typmodin and typmodout, but none of
+ * the other properties we're concerned with here. Recurse to the array
+ * type if needed.
+ */
+ if (!isImplicitArray &&
+ (atparams->updateTypmodin || atparams->updateTypmodout))
+ {
+ Oid arrtypoid = ((Form_pg_type) GETSTRUCT(newtup))->typarray;
+
+ if (OidIsValid(arrtypoid))
+ {
+ HeapTuple arrtup;
+ AlterTypeRecurseParams arrparams;
+
+ arrtup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(arrtypoid));
+ if (!HeapTupleIsValid(arrtup))
+ elog(ERROR, "cache lookup failed for type %u", arrtypoid);
+
+ memset(&arrparams, 0, sizeof(arrparams));
+ arrparams.updateTypmodin = atparams->updateTypmodin;
+ arrparams.updateTypmodout = atparams->updateTypmodout;
+ arrparams.typmodinOid = atparams->typmodinOid;
+ arrparams.typmodoutOid = atparams->typmodoutOid;
+
+ AlterTypeRecurse(arrtypoid, true, arrtup, catalog, &arrparams);
+
+ ReleaseSysCache(arrtup);
+ }
+ }
+
+ /*
+ * Now we need to recurse to domains. However, some properties are not
+ * inherited by domains, so clear the update flags for those.
+ */
+ atparams->updateReceive = false; /* domains use F_DOMAIN_RECV */
+ atparams->updateTypmodin = false; /* domains don't have typmods */
+ atparams->updateTypmodout = false;
+ atparams->updateSubscript = false; /* domains don't have subscriptors */
+
+ /* Skip the scan if nothing remains to be done */
+ if (!(atparams->updateStorage ||
+ atparams->updateSend ||
+ atparams->updateAnalyze))
+ return;
+
+ /* Search pg_type for possible domains over this type */
+ ScanKeyInit(&key[0],
+ Anum_pg_type_typbasetype,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(typeOid));
+
+ scan = systable_beginscan(catalog, InvalidOid, false,
+ NULL, 1, key);
+
+ while ((domainTup = systable_getnext(scan)) != NULL)
+ {
+ Form_pg_type domainForm = (Form_pg_type) GETSTRUCT(domainTup);
+
+ /*
+ * Shouldn't have a nonzero typbasetype in a non-domain, but let's
+ * check
+ */
+ if (domainForm->typtype != TYPTYPE_DOMAIN)
+ continue;
+
+ AlterTypeRecurse(domainForm->oid, false, domainTup, catalog, atparams);
+ }
+
+ systable_endscan(scan);
+}
diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c
new file mode 100644
index 0000000..cba8e19
--- /dev/null
+++ b/src/backend/commands/user.c
@@ -0,0 +1,1645 @@
+/*-------------------------------------------------------------------------
+ *
+ * user.c
+ * Commands for manipulating roles (formerly called users).
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/commands/user.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/binary_upgrade.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_auth_members.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_database.h"
+#include "catalog/pg_db_role_setting.h"
+#include "commands/comment.h"
+#include "commands/dbcommands.h"
+#include "commands/defrem.h"
+#include "commands/seclabel.h"
+#include "commands/user.h"
+#include "libpq/crypt.h"
+#include "miscadmin.h"
+#include "storage/lmgr.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/syscache.h"
+#include "utils/timestamp.h"
+
+/* Potentially set by pg_upgrade_support functions */
+Oid binary_upgrade_next_pg_authid_oid = InvalidOid;
+
+
+/* GUC parameter */
+int Password_encryption = PASSWORD_TYPE_SCRAM_SHA_256;
+
+/* Hook to check passwords in CreateRole() and AlterRole() */
+check_password_hook_type check_password_hook = NULL;
+
+static void AddRoleMems(const char *rolename, Oid roleid,
+ List *memberSpecs, List *memberIds,
+ Oid grantorId, bool admin_opt);
+static void DelRoleMems(const char *rolename, Oid roleid,
+ List *memberSpecs, List *memberIds,
+ bool admin_opt);
+
+
+/* Check if current user has createrole privileges */
+static bool
+have_createrole_privilege(void)
+{
+ return has_createrole_privilege(GetUserId());
+}
+
+
+/*
+ * CREATE ROLE
+ */
+Oid
+CreateRole(ParseState *pstate, CreateRoleStmt *stmt)
+{
+ Relation pg_authid_rel;
+ TupleDesc pg_authid_dsc;
+ HeapTuple tuple;
+ Datum new_record[Natts_pg_authid];
+ bool new_record_nulls[Natts_pg_authid];
+ Oid roleid;
+ ListCell *item;
+ ListCell *option;
+ char *password = NULL; /* user password */
+ bool issuper = false; /* Make the user a superuser? */
+ bool inherit = true; /* Auto inherit privileges? */
+ bool createrole = false; /* Can this user create roles? */
+ bool createdb = false; /* Can the user create databases? */
+ bool canlogin = false; /* Can this user login? */
+ bool isreplication = false; /* Is this a replication role? */
+ bool bypassrls = false; /* Is this a row security enabled role? */
+ int connlimit = -1; /* maximum connections allowed */
+ List *addroleto = NIL; /* roles to make this a member of */
+ List *rolemembers = NIL; /* roles to be members of this role */
+ List *adminmembers = NIL; /* roles to be admins of this role */
+ char *validUntil = NULL; /* time the login is valid until */
+ Datum validUntil_datum; /* same, as timestamptz Datum */
+ bool validUntil_null;
+ DefElem *dpassword = NULL;
+ DefElem *dissuper = NULL;
+ DefElem *dinherit = NULL;
+ DefElem *dcreaterole = NULL;
+ DefElem *dcreatedb = NULL;
+ DefElem *dcanlogin = NULL;
+ DefElem *disreplication = NULL;
+ DefElem *dconnlimit = NULL;
+ DefElem *daddroleto = NULL;
+ DefElem *drolemembers = NULL;
+ DefElem *dadminmembers = NULL;
+ DefElem *dvalidUntil = NULL;
+ DefElem *dbypassRLS = NULL;
+
+ /* The defaults can vary depending on the original statement type */
+ switch (stmt->stmt_type)
+ {
+ case ROLESTMT_ROLE:
+ break;
+ case ROLESTMT_USER:
+ canlogin = true;
+ /* may eventually want inherit to default to false here */
+ break;
+ case ROLESTMT_GROUP:
+ break;
+ }
+
+ /* Extract options from the statement node tree */
+ foreach(option, stmt->options)
+ {
+ DefElem *defel = (DefElem *) lfirst(option);
+
+ if (strcmp(defel->defname, "password") == 0)
+ {
+ if (dpassword)
+ errorConflictingDefElem(defel, pstate);
+ dpassword = defel;
+ }
+ else if (strcmp(defel->defname, "sysid") == 0)
+ {
+ ereport(NOTICE,
+ (errmsg("SYSID can no longer be specified")));
+ }
+ else if (strcmp(defel->defname, "superuser") == 0)
+ {
+ if (dissuper)
+ errorConflictingDefElem(defel, pstate);
+ dissuper = defel;
+ }
+ else if (strcmp(defel->defname, "inherit") == 0)
+ {
+ if (dinherit)
+ errorConflictingDefElem(defel, pstate);
+ dinherit = defel;
+ }
+ else if (strcmp(defel->defname, "createrole") == 0)
+ {
+ if (dcreaterole)
+ errorConflictingDefElem(defel, pstate);
+ dcreaterole = defel;
+ }
+ else if (strcmp(defel->defname, "createdb") == 0)
+ {
+ if (dcreatedb)
+ errorConflictingDefElem(defel, pstate);
+ dcreatedb = defel;
+ }
+ else if (strcmp(defel->defname, "canlogin") == 0)
+ {
+ if (dcanlogin)
+ errorConflictingDefElem(defel, pstate);
+ dcanlogin = defel;
+ }
+ else if (strcmp(defel->defname, "isreplication") == 0)
+ {
+ if (disreplication)
+ errorConflictingDefElem(defel, pstate);
+ disreplication = defel;
+ }
+ else if (strcmp(defel->defname, "connectionlimit") == 0)
+ {
+ if (dconnlimit)
+ errorConflictingDefElem(defel, pstate);
+ dconnlimit = defel;
+ }
+ else if (strcmp(defel->defname, "addroleto") == 0)
+ {
+ if (daddroleto)
+ errorConflictingDefElem(defel, pstate);
+ daddroleto = defel;
+ }
+ else if (strcmp(defel->defname, "rolemembers") == 0)
+ {
+ if (drolemembers)
+ errorConflictingDefElem(defel, pstate);
+ drolemembers = defel;
+ }
+ else if (strcmp(defel->defname, "adminmembers") == 0)
+ {
+ if (dadminmembers)
+ errorConflictingDefElem(defel, pstate);
+ dadminmembers = defel;
+ }
+ else if (strcmp(defel->defname, "validUntil") == 0)
+ {
+ if (dvalidUntil)
+ errorConflictingDefElem(defel, pstate);
+ dvalidUntil = defel;
+ }
+ else if (strcmp(defel->defname, "bypassrls") == 0)
+ {
+ if (dbypassRLS)
+ errorConflictingDefElem(defel, pstate);
+ dbypassRLS = defel;
+ }
+ else
+ elog(ERROR, "option \"%s\" not recognized",
+ defel->defname);
+ }
+
+ if (dpassword && dpassword->arg)
+ password = strVal(dpassword->arg);
+ if (dissuper)
+ issuper = boolVal(dissuper->arg);
+ if (dinherit)
+ inherit = boolVal(dinherit->arg);
+ if (dcreaterole)
+ createrole = boolVal(dcreaterole->arg);
+ if (dcreatedb)
+ createdb = boolVal(dcreatedb->arg);
+ if (dcanlogin)
+ canlogin = boolVal(dcanlogin->arg);
+ if (disreplication)
+ isreplication = boolVal(disreplication->arg);
+ if (dconnlimit)
+ {
+ connlimit = intVal(dconnlimit->arg);
+ if (connlimit < -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid connection limit: %d", connlimit)));
+ }
+ if (daddroleto)
+ addroleto = (List *) daddroleto->arg;
+ if (drolemembers)
+ rolemembers = (List *) drolemembers->arg;
+ if (dadminmembers)
+ adminmembers = (List *) dadminmembers->arg;
+ if (dvalidUntil)
+ validUntil = strVal(dvalidUntil->arg);
+ if (dbypassRLS)
+ bypassrls = boolVal(dbypassRLS->arg);
+
+ /* Check some permissions first */
+ if (issuper)
+ {
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to create superusers")));
+ }
+ else if (isreplication)
+ {
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to create replication users")));
+ }
+ else if (bypassrls)
+ {
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to create bypassrls users")));
+ }
+ else
+ {
+ if (!have_createrole_privilege())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to create role")));
+ }
+
+ /*
+ * Check that the user is not trying to create a role in the reserved
+ * "pg_" namespace.
+ */
+ if (IsReservedName(stmt->role))
+ ereport(ERROR,
+ (errcode(ERRCODE_RESERVED_NAME),
+ errmsg("role name \"%s\" is reserved",
+ stmt->role),
+ errdetail("Role names starting with \"pg_\" are reserved.")));
+
+ /*
+ * If built with appropriate switch, whine when regression-testing
+ * conventions for role names are violated.
+ */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+ if (strncmp(stmt->role, "regress_", 8) != 0)
+ elog(WARNING, "roles created by regression test cases should have names starting with \"regress_\"");
+#endif
+
+ /*
+ * Check the pg_authid relation to be certain the role doesn't already
+ * exist.
+ */
+ pg_authid_rel = table_open(AuthIdRelationId, RowExclusiveLock);
+ pg_authid_dsc = RelationGetDescr(pg_authid_rel);
+
+ if (OidIsValid(get_role_oid(stmt->role, true)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("role \"%s\" already exists",
+ stmt->role)));
+
+ /* Convert validuntil to internal form */
+ if (validUntil)
+ {
+ validUntil_datum = DirectFunctionCall3(timestamptz_in,
+ CStringGetDatum(validUntil),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1));
+ validUntil_null = false;
+ }
+ else
+ {
+ validUntil_datum = (Datum) 0;
+ validUntil_null = true;
+ }
+
+ /*
+ * Call the password checking hook if there is one defined
+ */
+ if (check_password_hook && password)
+ (*check_password_hook) (stmt->role,
+ password,
+ get_password_type(password),
+ validUntil_datum,
+ validUntil_null);
+
+ /*
+ * Build a tuple to insert
+ */
+ MemSet(new_record, 0, sizeof(new_record));
+ MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+
+ new_record[Anum_pg_authid_rolname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(stmt->role));
+
+ new_record[Anum_pg_authid_rolsuper - 1] = BoolGetDatum(issuper);
+ new_record[Anum_pg_authid_rolinherit - 1] = BoolGetDatum(inherit);
+ new_record[Anum_pg_authid_rolcreaterole - 1] = BoolGetDatum(createrole);
+ new_record[Anum_pg_authid_rolcreatedb - 1] = BoolGetDatum(createdb);
+ new_record[Anum_pg_authid_rolcanlogin - 1] = BoolGetDatum(canlogin);
+ new_record[Anum_pg_authid_rolreplication - 1] = BoolGetDatum(isreplication);
+ new_record[Anum_pg_authid_rolconnlimit - 1] = Int32GetDatum(connlimit);
+
+ if (password)
+ {
+ char *shadow_pass;
+ const char *logdetail = NULL;
+
+ /*
+ * Don't allow an empty password. Libpq treats an empty password the
+ * same as no password at all, and won't even try to authenticate. But
+ * other clients might, so allowing it would be confusing. By clearing
+ * the password when an empty string is specified, the account is
+ * consistently locked for all clients.
+ *
+ * Note that this only covers passwords stored in the database itself.
+ * There are also checks in the authentication code, to forbid an
+ * empty password from being used with authentication methods that
+ * fetch the password from an external system, like LDAP or PAM.
+ */
+ if (password[0] == '\0' ||
+ plain_crypt_verify(stmt->role, password, "", &logdetail) == STATUS_OK)
+ {
+ ereport(NOTICE,
+ (errmsg("empty string is not a valid password, clearing password")));
+ new_record_nulls[Anum_pg_authid_rolpassword - 1] = true;
+ }
+ else
+ {
+ /* Encrypt the password to the requested format. */
+ shadow_pass = encrypt_password(Password_encryption, stmt->role,
+ password);
+ new_record[Anum_pg_authid_rolpassword - 1] =
+ CStringGetTextDatum(shadow_pass);
+ }
+ }
+ else
+ new_record_nulls[Anum_pg_authid_rolpassword - 1] = true;
+
+ new_record[Anum_pg_authid_rolvaliduntil - 1] = validUntil_datum;
+ new_record_nulls[Anum_pg_authid_rolvaliduntil - 1] = validUntil_null;
+
+ new_record[Anum_pg_authid_rolbypassrls - 1] = BoolGetDatum(bypassrls);
+
+ /*
+ * pg_largeobject_metadata contains pg_authid.oid's, so we use the
+ * binary-upgrade override.
+ */
+ if (IsBinaryUpgrade)
+ {
+ if (!OidIsValid(binary_upgrade_next_pg_authid_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("pg_authid OID value not set when in binary upgrade mode")));
+
+ roleid = binary_upgrade_next_pg_authid_oid;
+ binary_upgrade_next_pg_authid_oid = InvalidOid;
+ }
+ else
+ {
+ roleid = GetNewOidWithIndex(pg_authid_rel, AuthIdOidIndexId,
+ Anum_pg_authid_oid);
+ }
+
+ new_record[Anum_pg_authid_oid - 1] = ObjectIdGetDatum(roleid);
+
+ tuple = heap_form_tuple(pg_authid_dsc, new_record, new_record_nulls);
+
+ /*
+ * Insert new record in the pg_authid table
+ */
+ CatalogTupleInsert(pg_authid_rel, tuple);
+
+ /*
+ * Advance command counter so we can see new record; else tests in
+ * AddRoleMems may fail.
+ */
+ if (addroleto || adminmembers || rolemembers)
+ CommandCounterIncrement();
+
+ /*
+ * Add the new role to the specified existing roles.
+ */
+ if (addroleto)
+ {
+ RoleSpec *thisrole = makeNode(RoleSpec);
+ List *thisrole_list = list_make1(thisrole);
+ List *thisrole_oidlist = list_make1_oid(roleid);
+
+ thisrole->roletype = ROLESPEC_CSTRING;
+ thisrole->rolename = stmt->role;
+ thisrole->location = -1;
+
+ foreach(item, addroleto)
+ {
+ RoleSpec *oldrole = lfirst(item);
+ HeapTuple oldroletup = get_rolespec_tuple(oldrole);
+ Form_pg_authid oldroleform = (Form_pg_authid) GETSTRUCT(oldroletup);
+ Oid oldroleid = oldroleform->oid;
+ char *oldrolename = NameStr(oldroleform->rolname);
+
+ AddRoleMems(oldrolename, oldroleid,
+ thisrole_list,
+ thisrole_oidlist,
+ GetUserId(), false);
+
+ ReleaseSysCache(oldroletup);
+ }
+ }
+
+ /*
+ * Add the specified members to this new role. adminmembers get the admin
+ * option, rolemembers don't.
+ */
+ AddRoleMems(stmt->role, roleid,
+ adminmembers, roleSpecsToIds(adminmembers),
+ GetUserId(), true);
+ AddRoleMems(stmt->role, roleid,
+ rolemembers, roleSpecsToIds(rolemembers),
+ GetUserId(), false);
+
+ /* Post creation hook for new role */
+ InvokeObjectPostCreateHook(AuthIdRelationId, roleid, 0);
+
+ /*
+ * Close pg_authid, but keep lock till commit.
+ */
+ table_close(pg_authid_rel, NoLock);
+
+ return roleid;
+}
+
+
+/*
+ * ALTER ROLE
+ *
+ * Note: the rolemembers option accepted here is intended to support the
+ * backwards-compatible ALTER GROUP syntax. Although it will work to say
+ * "ALTER ROLE role ROLE rolenames", we don't document it.
+ */
+Oid
+AlterRole(ParseState *pstate, AlterRoleStmt *stmt)
+{
+ Datum new_record[Natts_pg_authid];
+ bool new_record_nulls[Natts_pg_authid];
+ bool new_record_repl[Natts_pg_authid];
+ Relation pg_authid_rel;
+ TupleDesc pg_authid_dsc;
+ HeapTuple tuple,
+ new_tuple;
+ Form_pg_authid authform;
+ ListCell *option;
+ char *rolename;
+ char *password = NULL; /* user password */
+ int connlimit = -1; /* maximum connections allowed */
+ char *validUntil = NULL; /* time the login is valid until */
+ Datum validUntil_datum; /* same, as timestamptz Datum */
+ bool validUntil_null;
+ DefElem *dpassword = NULL;
+ DefElem *dissuper = NULL;
+ DefElem *dinherit = NULL;
+ DefElem *dcreaterole = NULL;
+ DefElem *dcreatedb = NULL;
+ DefElem *dcanlogin = NULL;
+ DefElem *disreplication = NULL;
+ DefElem *dconnlimit = NULL;
+ DefElem *drolemembers = NULL;
+ DefElem *dvalidUntil = NULL;
+ DefElem *dbypassRLS = NULL;
+ Oid roleid;
+
+ check_rolespec_name(stmt->role,
+ _("Cannot alter reserved roles."));
+
+ /* Extract options from the statement node tree */
+ foreach(option, stmt->options)
+ {
+ DefElem *defel = (DefElem *) lfirst(option);
+
+ if (strcmp(defel->defname, "password") == 0)
+ {
+ if (dpassword)
+ errorConflictingDefElem(defel, pstate);
+ dpassword = defel;
+ }
+ else if (strcmp(defel->defname, "superuser") == 0)
+ {
+ if (dissuper)
+ errorConflictingDefElem(defel, pstate);
+ dissuper = defel;
+ }
+ else if (strcmp(defel->defname, "inherit") == 0)
+ {
+ if (dinherit)
+ errorConflictingDefElem(defel, pstate);
+ dinherit = defel;
+ }
+ else if (strcmp(defel->defname, "createrole") == 0)
+ {
+ if (dcreaterole)
+ errorConflictingDefElem(defel, pstate);
+ dcreaterole = defel;
+ }
+ else if (strcmp(defel->defname, "createdb") == 0)
+ {
+ if (dcreatedb)
+ errorConflictingDefElem(defel, pstate);
+ dcreatedb = defel;
+ }
+ else if (strcmp(defel->defname, "canlogin") == 0)
+ {
+ if (dcanlogin)
+ errorConflictingDefElem(defel, pstate);
+ dcanlogin = defel;
+ }
+ else if (strcmp(defel->defname, "isreplication") == 0)
+ {
+ if (disreplication)
+ errorConflictingDefElem(defel, pstate);
+ disreplication = defel;
+ }
+ else if (strcmp(defel->defname, "connectionlimit") == 0)
+ {
+ if (dconnlimit)
+ errorConflictingDefElem(defel, pstate);
+ dconnlimit = defel;
+ }
+ else if (strcmp(defel->defname, "rolemembers") == 0 &&
+ stmt->action != 0)
+ {
+ if (drolemembers)
+ errorConflictingDefElem(defel, pstate);
+ drolemembers = defel;
+ }
+ else if (strcmp(defel->defname, "validUntil") == 0)
+ {
+ if (dvalidUntil)
+ errorConflictingDefElem(defel, pstate);
+ dvalidUntil = defel;
+ }
+ else if (strcmp(defel->defname, "bypassrls") == 0)
+ {
+ if (dbypassRLS)
+ errorConflictingDefElem(defel, pstate);
+ dbypassRLS = defel;
+ }
+ else
+ elog(ERROR, "option \"%s\" not recognized",
+ defel->defname);
+ }
+
+ if (dpassword && dpassword->arg)
+ password = strVal(dpassword->arg);
+ if (dconnlimit)
+ {
+ connlimit = intVal(dconnlimit->arg);
+ if (connlimit < -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid connection limit: %d", connlimit)));
+ }
+ if (dvalidUntil)
+ validUntil = strVal(dvalidUntil->arg);
+
+ /*
+ * Scan the pg_authid relation to be certain the user exists.
+ */
+ pg_authid_rel = table_open(AuthIdRelationId, RowExclusiveLock);
+ pg_authid_dsc = RelationGetDescr(pg_authid_rel);
+
+ tuple = get_rolespec_tuple(stmt->role);
+ authform = (Form_pg_authid) GETSTRUCT(tuple);
+ rolename = pstrdup(NameStr(authform->rolname));
+ roleid = authform->oid;
+
+ /*
+ * To mess with a superuser or replication role in any way you gotta be
+ * superuser. We also insist on superuser to change the BYPASSRLS
+ * property. Otherwise, if you don't have createrole, you're only allowed
+ * to change your own password.
+ */
+ if (authform->rolsuper || dissuper)
+ {
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to alter superuser roles or change superuser attribute")));
+ }
+ else if (authform->rolreplication || disreplication)
+ {
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to alter replication roles or change replication attribute")));
+ }
+ else if (dbypassRLS)
+ {
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to change bypassrls attribute")));
+ }
+ else if (!have_createrole_privilege())
+ {
+ /* check the rest */
+ if (dinherit || dcreaterole || dcreatedb || dcanlogin || dconnlimit ||
+ drolemembers || dvalidUntil || !dpassword || roleid != GetUserId())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied")));
+ }
+
+ /* Convert validuntil to internal form */
+ if (dvalidUntil)
+ {
+ validUntil_datum = DirectFunctionCall3(timestamptz_in,
+ CStringGetDatum(validUntil),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1));
+ validUntil_null = false;
+ }
+ else
+ {
+ /* fetch existing setting in case hook needs it */
+ validUntil_datum = SysCacheGetAttr(AUTHNAME, tuple,
+ Anum_pg_authid_rolvaliduntil,
+ &validUntil_null);
+ }
+
+ /*
+ * Call the password checking hook if there is one defined
+ */
+ if (check_password_hook && password)
+ (*check_password_hook) (rolename,
+ password,
+ get_password_type(password),
+ validUntil_datum,
+ validUntil_null);
+
+ /*
+ * Build an updated tuple, perusing the information just obtained
+ */
+ MemSet(new_record, 0, sizeof(new_record));
+ MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+ MemSet(new_record_repl, false, sizeof(new_record_repl));
+
+ /*
+ * issuper/createrole/etc
+ */
+ if (dissuper)
+ {
+ new_record[Anum_pg_authid_rolsuper - 1] = BoolGetDatum(boolVal(dissuper->arg));
+ new_record_repl[Anum_pg_authid_rolsuper - 1] = true;
+ }
+
+ if (dinherit)
+ {
+ new_record[Anum_pg_authid_rolinherit - 1] = BoolGetDatum(boolVal(dinherit->arg));
+ new_record_repl[Anum_pg_authid_rolinherit - 1] = true;
+ }
+
+ if (dcreaterole)
+ {
+ new_record[Anum_pg_authid_rolcreaterole - 1] = BoolGetDatum(boolVal(dcreaterole->arg));
+ new_record_repl[Anum_pg_authid_rolcreaterole - 1] = true;
+ }
+
+ if (dcreatedb)
+ {
+ new_record[Anum_pg_authid_rolcreatedb - 1] = BoolGetDatum(boolVal(dcreatedb->arg));
+ new_record_repl[Anum_pg_authid_rolcreatedb - 1] = true;
+ }
+
+ if (dcanlogin)
+ {
+ new_record[Anum_pg_authid_rolcanlogin - 1] = BoolGetDatum(boolVal(dcanlogin->arg));
+ new_record_repl[Anum_pg_authid_rolcanlogin - 1] = true;
+ }
+
+ if (disreplication)
+ {
+ new_record[Anum_pg_authid_rolreplication - 1] = BoolGetDatum(boolVal(disreplication->arg));
+ new_record_repl[Anum_pg_authid_rolreplication - 1] = true;
+ }
+
+ if (dconnlimit)
+ {
+ new_record[Anum_pg_authid_rolconnlimit - 1] = Int32GetDatum(connlimit);
+ new_record_repl[Anum_pg_authid_rolconnlimit - 1] = true;
+ }
+
+ /* password */
+ if (password)
+ {
+ char *shadow_pass;
+ const char *logdetail = NULL;
+
+ /* Like in CREATE USER, don't allow an empty password. */
+ if (password[0] == '\0' ||
+ plain_crypt_verify(rolename, password, "", &logdetail) == STATUS_OK)
+ {
+ ereport(NOTICE,
+ (errmsg("empty string is not a valid password, clearing password")));
+ new_record_nulls[Anum_pg_authid_rolpassword - 1] = true;
+ }
+ else
+ {
+ /* Encrypt the password to the requested format. */
+ shadow_pass = encrypt_password(Password_encryption, rolename,
+ password);
+ new_record[Anum_pg_authid_rolpassword - 1] =
+ CStringGetTextDatum(shadow_pass);
+ }
+ new_record_repl[Anum_pg_authid_rolpassword - 1] = true;
+ }
+
+ /* unset password */
+ if (dpassword && dpassword->arg == NULL)
+ {
+ new_record_repl[Anum_pg_authid_rolpassword - 1] = true;
+ new_record_nulls[Anum_pg_authid_rolpassword - 1] = true;
+ }
+
+ /* valid until */
+ new_record[Anum_pg_authid_rolvaliduntil - 1] = validUntil_datum;
+ new_record_nulls[Anum_pg_authid_rolvaliduntil - 1] = validUntil_null;
+ new_record_repl[Anum_pg_authid_rolvaliduntil - 1] = true;
+
+ if (dbypassRLS)
+ {
+ new_record[Anum_pg_authid_rolbypassrls - 1] = BoolGetDatum(boolVal(dbypassRLS->arg));
+ new_record_repl[Anum_pg_authid_rolbypassrls - 1] = true;
+ }
+
+ new_tuple = heap_modify_tuple(tuple, pg_authid_dsc, new_record,
+ new_record_nulls, new_record_repl);
+ CatalogTupleUpdate(pg_authid_rel, &tuple->t_self, new_tuple);
+
+ InvokeObjectPostAlterHook(AuthIdRelationId, roleid, 0);
+
+ ReleaseSysCache(tuple);
+ heap_freetuple(new_tuple);
+
+ /*
+ * Advance command counter so we can see new record; else tests in
+ * AddRoleMems may fail.
+ */
+ if (drolemembers)
+ {
+ List *rolemembers = (List *) drolemembers->arg;
+
+ CommandCounterIncrement();
+
+ if (stmt->action == +1) /* add members to role */
+ AddRoleMems(rolename, roleid,
+ rolemembers, roleSpecsToIds(rolemembers),
+ GetUserId(), false);
+ else if (stmt->action == -1) /* drop members from role */
+ DelRoleMems(rolename, roleid,
+ rolemembers, roleSpecsToIds(rolemembers),
+ false);
+ }
+
+ /*
+ * Close pg_authid, but keep lock till commit.
+ */
+ table_close(pg_authid_rel, NoLock);
+
+ return roleid;
+}
+
+
+/*
+ * ALTER ROLE ... SET
+ */
+Oid
+AlterRoleSet(AlterRoleSetStmt *stmt)
+{
+ HeapTuple roletuple;
+ Form_pg_authid roleform;
+ Oid databaseid = InvalidOid;
+ Oid roleid = InvalidOid;
+
+ if (stmt->role)
+ {
+ check_rolespec_name(stmt->role,
+ _("Cannot alter reserved roles."));
+
+ roletuple = get_rolespec_tuple(stmt->role);
+ roleform = (Form_pg_authid) GETSTRUCT(roletuple);
+ roleid = roleform->oid;
+
+ /*
+ * Obtain a lock on the role and make sure it didn't go away in the
+ * meantime.
+ */
+ shdepLockAndCheckObject(AuthIdRelationId, roleid);
+
+ /*
+ * To mess with a superuser you gotta be superuser; else you need
+ * createrole, or just want to change your own settings
+ */
+ if (roleform->rolsuper)
+ {
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to alter superusers")));
+ }
+ else
+ {
+ if (!have_createrole_privilege() && roleid != GetUserId())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied")));
+ }
+
+ ReleaseSysCache(roletuple);
+ }
+
+ /* look up and lock the database, if specified */
+ if (stmt->database != NULL)
+ {
+ databaseid = get_database_oid(stmt->database, false);
+ shdepLockAndCheckObject(DatabaseRelationId, databaseid);
+
+ if (!stmt->role)
+ {
+ /*
+ * If no role is specified, then this is effectively the same as
+ * ALTER DATABASE ... SET, so use the same permission check.
+ */
+ if (!pg_database_ownercheck(databaseid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
+ stmt->database);
+ }
+ }
+
+ if (!stmt->role && !stmt->database)
+ {
+ /* Must be superuser to alter settings globally. */
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to alter settings globally")));
+ }
+
+ AlterSetting(databaseid, roleid, stmt->setstmt);
+
+ return roleid;
+}
+
+
+/*
+ * DROP ROLE
+ */
+void
+DropRole(DropRoleStmt *stmt)
+{
+ Relation pg_authid_rel,
+ pg_auth_members_rel;
+ ListCell *item;
+
+ if (!have_createrole_privilege())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to drop role")));
+
+ /*
+ * Scan the pg_authid relation to find the Oid of the role(s) to be
+ * deleted.
+ */
+ pg_authid_rel = table_open(AuthIdRelationId, RowExclusiveLock);
+ pg_auth_members_rel = table_open(AuthMemRelationId, RowExclusiveLock);
+
+ foreach(item, stmt->roles)
+ {
+ RoleSpec *rolspec = lfirst(item);
+ char *role;
+ HeapTuple tuple,
+ tmp_tuple;
+ Form_pg_authid roleform;
+ ScanKeyData scankey;
+ char *detail;
+ char *detail_log;
+ SysScanDesc sscan;
+ Oid roleid;
+
+ if (rolspec->roletype != ROLESPEC_CSTRING)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot use special role specifier in DROP ROLE")));
+ role = rolspec->rolename;
+
+ tuple = SearchSysCache1(AUTHNAME, PointerGetDatum(role));
+ if (!HeapTupleIsValid(tuple))
+ {
+ if (!stmt->missing_ok)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("role \"%s\" does not exist", role)));
+ }
+ else
+ {
+ ereport(NOTICE,
+ (errmsg("role \"%s\" does not exist, skipping",
+ role)));
+ }
+
+ continue;
+ }
+
+ roleform = (Form_pg_authid) GETSTRUCT(tuple);
+ roleid = roleform->oid;
+
+ if (roleid == GetUserId())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ errmsg("current user cannot be dropped")));
+ if (roleid == GetOuterUserId())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ errmsg("current user cannot be dropped")));
+ if (roleid == GetSessionUserId())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ errmsg("session user cannot be dropped")));
+
+ /*
+ * For safety's sake, we allow createrole holders to drop ordinary
+ * roles but not superuser roles. This is mainly to avoid the
+ * scenario where you accidentally drop the last superuser.
+ */
+ if (roleform->rolsuper && !superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to drop superusers")));
+
+ /* DROP hook for the role being removed */
+ InvokeObjectDropHook(AuthIdRelationId, roleid, 0);
+
+ /*
+ * Lock the role, so nobody can add dependencies to her while we drop
+ * her. We keep the lock until the end of transaction.
+ */
+ LockSharedObject(AuthIdRelationId, roleid, 0, AccessExclusiveLock);
+
+ /* Check for pg_shdepend entries depending on this role */
+ if (checkSharedDependencies(AuthIdRelationId, roleid,
+ &detail, &detail_log))
+ ereport(ERROR,
+ (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST),
+ errmsg("role \"%s\" cannot be dropped because some objects depend on it",
+ role),
+ errdetail_internal("%s", detail),
+ errdetail_log("%s", detail_log)));
+
+ /*
+ * Remove the role from the pg_authid table
+ */
+ CatalogTupleDelete(pg_authid_rel, &tuple->t_self);
+
+ ReleaseSysCache(tuple);
+
+ /*
+ * Remove role from the pg_auth_members table. We have to remove all
+ * tuples that show it as either a role or a member.
+ *
+ * XXX what about grantor entries? Maybe we should do one heap scan.
+ */
+ ScanKeyInit(&scankey,
+ Anum_pg_auth_members_roleid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(roleid));
+
+ sscan = systable_beginscan(pg_auth_members_rel, AuthMemRoleMemIndexId,
+ true, NULL, 1, &scankey);
+
+ while (HeapTupleIsValid(tmp_tuple = systable_getnext(sscan)))
+ {
+ CatalogTupleDelete(pg_auth_members_rel, &tmp_tuple->t_self);
+ }
+
+ systable_endscan(sscan);
+
+ ScanKeyInit(&scankey,
+ Anum_pg_auth_members_member,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(roleid));
+
+ sscan = systable_beginscan(pg_auth_members_rel, AuthMemMemRoleIndexId,
+ true, NULL, 1, &scankey);
+
+ while (HeapTupleIsValid(tmp_tuple = systable_getnext(sscan)))
+ {
+ CatalogTupleDelete(pg_auth_members_rel, &tmp_tuple->t_self);
+ }
+
+ systable_endscan(sscan);
+
+ /*
+ * Remove any comments or security labels on this role.
+ */
+ DeleteSharedComments(roleid, AuthIdRelationId);
+ DeleteSharedSecurityLabel(roleid, AuthIdRelationId);
+
+ /*
+ * Remove settings for this role.
+ */
+ DropSetting(InvalidOid, roleid);
+
+ /*
+ * Advance command counter so that later iterations of this loop will
+ * see the changes already made. This is essential if, for example,
+ * we are trying to drop both a role and one of its direct members ---
+ * we'll get an error if we try to delete the linking pg_auth_members
+ * tuple twice. (We do not need a CCI between the two delete loops
+ * above, because it's not allowed for a role to directly contain
+ * itself.)
+ */
+ CommandCounterIncrement();
+ }
+
+ /*
+ * Now we can clean up; but keep locks until commit.
+ */
+ table_close(pg_auth_members_rel, NoLock);
+ table_close(pg_authid_rel, NoLock);
+}
+
+/*
+ * Rename role
+ */
+ObjectAddress
+RenameRole(const char *oldname, const char *newname)
+{
+ HeapTuple oldtuple,
+ newtuple;
+ TupleDesc dsc;
+ Relation rel;
+ Datum datum;
+ bool isnull;
+ Datum repl_val[Natts_pg_authid];
+ bool repl_null[Natts_pg_authid];
+ bool repl_repl[Natts_pg_authid];
+ int i;
+ Oid roleid;
+ ObjectAddress address;
+ Form_pg_authid authform;
+
+ rel = table_open(AuthIdRelationId, RowExclusiveLock);
+ dsc = RelationGetDescr(rel);
+
+ oldtuple = SearchSysCache1(AUTHNAME, CStringGetDatum(oldname));
+ if (!HeapTupleIsValid(oldtuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("role \"%s\" does not exist", oldname)));
+
+ /*
+ * XXX Client applications probably store the session user somewhere, so
+ * renaming it could cause confusion. On the other hand, there may not be
+ * an actual problem besides a little confusion, so think about this and
+ * decide. Same for SET ROLE ... we don't restrict renaming the current
+ * effective userid, though.
+ */
+
+ authform = (Form_pg_authid) GETSTRUCT(oldtuple);
+ roleid = authform->oid;
+
+ if (roleid == GetSessionUserId())
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("session user cannot be renamed")));
+ if (roleid == GetOuterUserId())
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("current user cannot be renamed")));
+
+ /*
+ * Check that the user is not trying to rename a system role and not
+ * trying to rename a role into the reserved "pg_" namespace.
+ */
+ if (IsReservedName(NameStr(authform->rolname)))
+ ereport(ERROR,
+ (errcode(ERRCODE_RESERVED_NAME),
+ errmsg("role name \"%s\" is reserved",
+ NameStr(authform->rolname)),
+ errdetail("Role names starting with \"pg_\" are reserved.")));
+
+ if (IsReservedName(newname))
+ ereport(ERROR,
+ (errcode(ERRCODE_RESERVED_NAME),
+ errmsg("role name \"%s\" is reserved",
+ newname),
+ errdetail("Role names starting with \"pg_\" are reserved.")));
+
+ /*
+ * If built with appropriate switch, whine when regression-testing
+ * conventions for role names are violated.
+ */
+#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
+ if (strncmp(newname, "regress_", 8) != 0)
+ elog(WARNING, "roles created by regression test cases should have names starting with \"regress_\"");
+#endif
+
+ /* make sure the new name doesn't exist */
+ if (SearchSysCacheExists1(AUTHNAME, CStringGetDatum(newname)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("role \"%s\" already exists", newname)));
+
+ /*
+ * createrole is enough privilege unless you want to mess with a superuser
+ */
+ if (((Form_pg_authid) GETSTRUCT(oldtuple))->rolsuper)
+ {
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to rename superusers")));
+ }
+ else
+ {
+ if (!have_createrole_privilege())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to rename role")));
+ }
+
+ /* OK, construct the modified tuple */
+ for (i = 0; i < Natts_pg_authid; i++)
+ repl_repl[i] = false;
+
+ repl_repl[Anum_pg_authid_rolname - 1] = true;
+ repl_val[Anum_pg_authid_rolname - 1] = DirectFunctionCall1(namein,
+ CStringGetDatum(newname));
+ repl_null[Anum_pg_authid_rolname - 1] = false;
+
+ datum = heap_getattr(oldtuple, Anum_pg_authid_rolpassword, dsc, &isnull);
+
+ if (!isnull && get_password_type(TextDatumGetCString(datum)) == PASSWORD_TYPE_MD5)
+ {
+ /* MD5 uses the username as salt, so just clear it on a rename */
+ repl_repl[Anum_pg_authid_rolpassword - 1] = true;
+ repl_null[Anum_pg_authid_rolpassword - 1] = true;
+
+ ereport(NOTICE,
+ (errmsg("MD5 password cleared because of role rename")));
+ }
+
+ newtuple = heap_modify_tuple(oldtuple, dsc, repl_val, repl_null, repl_repl);
+ CatalogTupleUpdate(rel, &oldtuple->t_self, newtuple);
+
+ InvokeObjectPostAlterHook(AuthIdRelationId, roleid, 0);
+
+ ObjectAddressSet(address, AuthIdRelationId, roleid);
+
+ ReleaseSysCache(oldtuple);
+
+ /*
+ * Close pg_authid, but keep lock till commit.
+ */
+ table_close(rel, NoLock);
+
+ return address;
+}
+
+/*
+ * GrantRoleStmt
+ *
+ * Grant/Revoke roles to/from roles
+ */
+void
+GrantRole(GrantRoleStmt *stmt)
+{
+ Relation pg_authid_rel;
+ Oid grantor;
+ List *grantee_ids;
+ ListCell *item;
+
+ if (stmt->grantor)
+ grantor = get_rolespec_oid(stmt->grantor, false);
+ else
+ grantor = GetUserId();
+
+ grantee_ids = roleSpecsToIds(stmt->grantee_roles);
+
+ /* AccessShareLock is enough since we aren't modifying pg_authid */
+ pg_authid_rel = table_open(AuthIdRelationId, AccessShareLock);
+
+ /*
+ * Step through all of the granted roles and add/remove entries for the
+ * grantees, or, if admin_opt is set, then just add/remove the admin
+ * option.
+ *
+ * Note: Permissions checking is done by AddRoleMems/DelRoleMems
+ */
+ foreach(item, stmt->granted_roles)
+ {
+ AccessPriv *priv = (AccessPriv *) lfirst(item);
+ char *rolename = priv->priv_name;
+ Oid roleid;
+
+ /* Must reject priv(columns) and ALL PRIVILEGES(columns) */
+ if (rolename == NULL || priv->cols != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_GRANT_OPERATION),
+ errmsg("column names cannot be included in GRANT/REVOKE ROLE")));
+
+ roleid = get_role_oid(rolename, false);
+ if (stmt->is_grant)
+ AddRoleMems(rolename, roleid,
+ stmt->grantee_roles, grantee_ids,
+ grantor, stmt->admin_opt);
+ else
+ DelRoleMems(rolename, roleid,
+ stmt->grantee_roles, grantee_ids,
+ stmt->admin_opt);
+ }
+
+ /*
+ * Close pg_authid, but keep lock till commit.
+ */
+ table_close(pg_authid_rel, NoLock);
+}
+
+/*
+ * DropOwnedObjects
+ *
+ * Drop the objects owned by a given list of roles.
+ */
+void
+DropOwnedObjects(DropOwnedStmt *stmt)
+{
+ List *role_ids = roleSpecsToIds(stmt->roles);
+ ListCell *cell;
+
+ /* Check privileges */
+ foreach(cell, role_ids)
+ {
+ Oid roleid = lfirst_oid(cell);
+
+ if (!has_privs_of_role(GetUserId(), roleid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to drop objects")));
+ }
+
+ /* Ok, do it */
+ shdepDropOwned(role_ids, stmt->behavior);
+}
+
+/*
+ * ReassignOwnedObjects
+ *
+ * Give the objects owned by a given list of roles away to another user.
+ */
+void
+ReassignOwnedObjects(ReassignOwnedStmt *stmt)
+{
+ List *role_ids = roleSpecsToIds(stmt->roles);
+ ListCell *cell;
+ Oid newrole;
+
+ /* Check privileges */
+ foreach(cell, role_ids)
+ {
+ Oid roleid = lfirst_oid(cell);
+
+ if (!has_privs_of_role(GetUserId(), roleid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to reassign objects")));
+ }
+
+ /* Must have privileges on the receiving side too */
+ newrole = get_rolespec_oid(stmt->newrole, false);
+
+ if (!has_privs_of_role(GetUserId(), newrole))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to reassign objects")));
+
+ /* Ok, do it */
+ shdepReassignOwned(role_ids, newrole);
+}
+
+/*
+ * roleSpecsToIds
+ *
+ * Given a list of RoleSpecs, generate a list of role OIDs in the same order.
+ *
+ * ROLESPEC_PUBLIC is not allowed.
+ */
+List *
+roleSpecsToIds(List *memberNames)
+{
+ List *result = NIL;
+ ListCell *l;
+
+ foreach(l, memberNames)
+ {
+ RoleSpec *rolespec = lfirst_node(RoleSpec, l);
+ Oid roleid;
+
+ roleid = get_rolespec_oid(rolespec, false);
+ result = lappend_oid(result, roleid);
+ }
+ return result;
+}
+
+/*
+ * AddRoleMems -- Add given members to the specified role
+ *
+ * rolename: name of role to add to (used only for error messages)
+ * roleid: OID of role to add to
+ * memberSpecs: list of RoleSpec of roles to add (used only for error messages)
+ * memberIds: OIDs of roles to add
+ * grantorId: who is granting the membership
+ * admin_opt: granting admin option?
+ */
+static void
+AddRoleMems(const char *rolename, Oid roleid,
+ List *memberSpecs, List *memberIds,
+ Oid grantorId, bool admin_opt)
+{
+ Relation pg_authmem_rel;
+ TupleDesc pg_authmem_dsc;
+ ListCell *specitem;
+ ListCell *iditem;
+
+ Assert(list_length(memberSpecs) == list_length(memberIds));
+
+ /* Skip permission check if nothing to do */
+ if (!memberIds)
+ return;
+
+ /*
+ * Check permissions: must have createrole or admin option on the role to
+ * be changed. To mess with a superuser role, you gotta be superuser.
+ */
+ if (superuser_arg(roleid))
+ {
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to alter superusers")));
+ }
+ else
+ {
+ if (!have_createrole_privilege() &&
+ !is_admin_of_role(grantorId, roleid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must have admin option on role \"%s\"",
+ rolename)));
+ }
+
+ /*
+ * The charter of pg_database_owner is to have exactly one, implicit,
+ * situation-dependent member. There's no technical need for this
+ * restriction. (One could lift it and take the further step of making
+ * pg_database_ownercheck() equivalent to has_privs_of_role(roleid,
+ * ROLE_PG_DATABASE_OWNER), in which case explicit, situation-independent
+ * members could act as the owner of any database.)
+ */
+ if (roleid == ROLE_PG_DATABASE_OWNER)
+ ereport(ERROR,
+ errmsg("role \"%s\" cannot have explicit members", rolename));
+
+ /*
+ * The role membership grantor of record has little significance at
+ * present. Nonetheless, inasmuch as users might look to it for a crude
+ * audit trail, let only superusers impute the grant to a third party.
+ */
+ if (grantorId != GetUserId() && !superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to set grantor")));
+
+ pg_authmem_rel = table_open(AuthMemRelationId, RowExclusiveLock);
+ pg_authmem_dsc = RelationGetDescr(pg_authmem_rel);
+
+ forboth(specitem, memberSpecs, iditem, memberIds)
+ {
+ RoleSpec *memberRole = lfirst_node(RoleSpec, specitem);
+ Oid memberid = lfirst_oid(iditem);
+ HeapTuple authmem_tuple;
+ HeapTuple tuple;
+ Datum new_record[Natts_pg_auth_members];
+ bool new_record_nulls[Natts_pg_auth_members];
+ bool new_record_repl[Natts_pg_auth_members];
+
+ /*
+ * pg_database_owner is never a role member. Lifting this restriction
+ * would require a policy decision about membership loops. One could
+ * prevent loops, which would include making "ALTER DATABASE x OWNER
+ * TO proposed_datdba" fail if is_member_of_role(pg_database_owner,
+ * proposed_datdba). Hence, gaining a membership could reduce what a
+ * role could do. Alternately, one could allow these memberships to
+ * complete loops. A role could then have actual WITH ADMIN OPTION on
+ * itself, prompting a decision about is_admin_of_role() treatment of
+ * the case.
+ *
+ * Lifting this restriction also has policy implications for ownership
+ * of shared objects (databases and tablespaces). We allow such
+ * ownership, but we might find cause to ban it in the future.
+ * Designing such a ban would more troublesome if the design had to
+ * address pg_database_owner being a member of role FOO that owns a
+ * shared object. (The effect of such ownership is that any owner of
+ * another database can act as the owner of affected shared objects.)
+ */
+ if (memberid == ROLE_PG_DATABASE_OWNER)
+ ereport(ERROR,
+ errmsg("role \"%s\" cannot be a member of any role",
+ get_rolespec_name(memberRole)));
+
+ /*
+ * Refuse creation of membership loops, including the trivial case
+ * where a role is made a member of itself. We do this by checking to
+ * see if the target role is already a member of the proposed member
+ * role. We have to ignore possible superuserness, however, else we
+ * could never grant membership in a superuser-privileged role.
+ */
+ if (is_member_of_role_nosuper(roleid, memberid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_GRANT_OPERATION),
+ errmsg("role \"%s\" is a member of role \"%s\"",
+ rolename, get_rolespec_name(memberRole))));
+
+ /*
+ * Check if entry for this role/member already exists; if so, give
+ * warning unless we are adding admin option.
+ */
+ authmem_tuple = SearchSysCache2(AUTHMEMROLEMEM,
+ ObjectIdGetDatum(roleid),
+ ObjectIdGetDatum(memberid));
+ if (HeapTupleIsValid(authmem_tuple) &&
+ (!admin_opt ||
+ ((Form_pg_auth_members) GETSTRUCT(authmem_tuple))->admin_option))
+ {
+ ereport(NOTICE,
+ (errmsg("role \"%s\" is already a member of role \"%s\"",
+ get_rolespec_name(memberRole), rolename)));
+ ReleaseSysCache(authmem_tuple);
+ continue;
+ }
+
+ /* Build a tuple to insert or update */
+ MemSet(new_record, 0, sizeof(new_record));
+ MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+ MemSet(new_record_repl, false, sizeof(new_record_repl));
+
+ new_record[Anum_pg_auth_members_roleid - 1] = ObjectIdGetDatum(roleid);
+ new_record[Anum_pg_auth_members_member - 1] = ObjectIdGetDatum(memberid);
+ new_record[Anum_pg_auth_members_grantor - 1] = ObjectIdGetDatum(grantorId);
+ new_record[Anum_pg_auth_members_admin_option - 1] = BoolGetDatum(admin_opt);
+
+ if (HeapTupleIsValid(authmem_tuple))
+ {
+ new_record_repl[Anum_pg_auth_members_grantor - 1] = true;
+ new_record_repl[Anum_pg_auth_members_admin_option - 1] = true;
+ tuple = heap_modify_tuple(authmem_tuple, pg_authmem_dsc,
+ new_record,
+ new_record_nulls, new_record_repl);
+ CatalogTupleUpdate(pg_authmem_rel, &tuple->t_self, tuple);
+ ReleaseSysCache(authmem_tuple);
+ }
+ else
+ {
+ tuple = heap_form_tuple(pg_authmem_dsc,
+ new_record, new_record_nulls);
+ CatalogTupleInsert(pg_authmem_rel, tuple);
+ }
+
+ /* CCI after each change, in case there are duplicates in list */
+ CommandCounterIncrement();
+ }
+
+ /*
+ * Close pg_authmem, but keep lock till commit.
+ */
+ table_close(pg_authmem_rel, NoLock);
+}
+
+/*
+ * DelRoleMems -- Remove given members from the specified role
+ *
+ * rolename: name of role to del from (used only for error messages)
+ * roleid: OID of role to del from
+ * memberSpecs: list of RoleSpec of roles to del (used only for error messages)
+ * memberIds: OIDs of roles to del
+ * admin_opt: remove admin option only?
+ */
+static void
+DelRoleMems(const char *rolename, Oid roleid,
+ List *memberSpecs, List *memberIds,
+ bool admin_opt)
+{
+ Relation pg_authmem_rel;
+ TupleDesc pg_authmem_dsc;
+ ListCell *specitem;
+ ListCell *iditem;
+
+ Assert(list_length(memberSpecs) == list_length(memberIds));
+
+ /* Skip permission check if nothing to do */
+ if (!memberIds)
+ return;
+
+ /*
+ * Check permissions: must have createrole or admin option on the role to
+ * be changed. To mess with a superuser role, you gotta be superuser.
+ */
+ if (superuser_arg(roleid))
+ {
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to alter superusers")));
+ }
+ else
+ {
+ if (!have_createrole_privilege() &&
+ !is_admin_of_role(GetUserId(), roleid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must have admin option on role \"%s\"",
+ rolename)));
+ }
+
+ pg_authmem_rel = table_open(AuthMemRelationId, RowExclusiveLock);
+ pg_authmem_dsc = RelationGetDescr(pg_authmem_rel);
+
+ forboth(specitem, memberSpecs, iditem, memberIds)
+ {
+ RoleSpec *memberRole = lfirst(specitem);
+ Oid memberid = lfirst_oid(iditem);
+ HeapTuple authmem_tuple;
+
+ /*
+ * Find entry for this role/member
+ */
+ authmem_tuple = SearchSysCache2(AUTHMEMROLEMEM,
+ ObjectIdGetDatum(roleid),
+ ObjectIdGetDatum(memberid));
+ if (!HeapTupleIsValid(authmem_tuple))
+ {
+ ereport(WARNING,
+ (errmsg("role \"%s\" is not a member of role \"%s\"",
+ get_rolespec_name(memberRole), rolename)));
+ continue;
+ }
+
+ if (!admin_opt)
+ {
+ /* Remove the entry altogether */
+ CatalogTupleDelete(pg_authmem_rel, &authmem_tuple->t_self);
+ }
+ else
+ {
+ /* Just turn off the admin option */
+ HeapTuple tuple;
+ Datum new_record[Natts_pg_auth_members];
+ bool new_record_nulls[Natts_pg_auth_members];
+ bool new_record_repl[Natts_pg_auth_members];
+
+ /* Build a tuple to update with */
+ MemSet(new_record, 0, sizeof(new_record));
+ MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+ MemSet(new_record_repl, false, sizeof(new_record_repl));
+
+ new_record[Anum_pg_auth_members_admin_option - 1] = BoolGetDatum(false);
+ new_record_repl[Anum_pg_auth_members_admin_option - 1] = true;
+
+ tuple = heap_modify_tuple(authmem_tuple, pg_authmem_dsc,
+ new_record,
+ new_record_nulls, new_record_repl);
+ CatalogTupleUpdate(pg_authmem_rel, &tuple->t_self, tuple);
+ }
+
+ ReleaseSysCache(authmem_tuple);
+
+ /* CCI after each change, in case there are duplicates in list */
+ CommandCounterIncrement();
+ }
+
+ /*
+ * Close pg_authmem, but keep lock till commit.
+ */
+ table_close(pg_authmem_rel, NoLock);
+}
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
new file mode 100644
index 0000000..75b0ca9
--- /dev/null
+++ b/src/backend/commands/vacuum.c
@@ -0,0 +1,2465 @@
+/*-------------------------------------------------------------------------
+ *
+ * vacuum.c
+ * The postgres vacuum cleaner.
+ *
+ * This file includes (a) control and dispatch code for VACUUM and ANALYZE
+ * commands, (b) code to compute various vacuum thresholds, and (c) index
+ * vacuum code.
+ *
+ * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
+ * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
+ * CLUSTER, handled in cluster.c.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/vacuum.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/clog.h"
+#include "access/commit_ts.h"
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/multixact.h"
+#include "access/tableam.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "catalog/namespace.h"
+#include "catalog/index.h"
+#include "catalog/pg_database.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_namespace.h"
+#include "commands/cluster.h"
+#include "commands/defrem.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "pgstat.h"
+#include "postmaster/autovacuum.h"
+#include "postmaster/bgworker_internals.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/pmsignal.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "utils/acl.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+#include "utils/pg_rusage.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+
+
+/*
+ * GUC parameters
+ */
+int vacuum_freeze_min_age;
+int vacuum_freeze_table_age;
+int vacuum_multixact_freeze_min_age;
+int vacuum_multixact_freeze_table_age;
+int vacuum_failsafe_age;
+int vacuum_multixact_failsafe_age;
+
+
+/* A few variables that don't seem worth passing around as parameters */
+static MemoryContext vac_context = NULL;
+static BufferAccessStrategy vac_strategy;
+
+
+/*
+ * Variables for cost-based parallel vacuum. See comments atop
+ * compute_parallel_delay to understand how it works.
+ */
+pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
+pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
+int VacuumCostBalanceLocal = 0;
+
+/* non-export function prototypes */
+static List *expand_vacuum_rel(VacuumRelation *vrel, int options);
+static List *get_all_vacuum_rels(int options);
+static void vac_truncate_clog(TransactionId frozenXID,
+ MultiXactId minMulti,
+ TransactionId lastSaneFrozenXid,
+ MultiXactId lastSaneMinMulti);
+static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params);
+static double compute_parallel_delay(void);
+static VacOptValue get_vacoptval_from_boolean(DefElem *def);
+static bool vac_tid_reaped(ItemPointer itemptr, void *state);
+static int vac_cmp_itemptr(const void *left, const void *right);
+
+/*
+ * Primary entry point for manual VACUUM and ANALYZE commands
+ *
+ * This is mainly a preparation wrapper for the real operations that will
+ * happen in vacuum().
+ */
+void
+ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
+{
+ VacuumParams params;
+ bool verbose = false;
+ bool skip_locked = false;
+ bool analyze = false;
+ bool freeze = false;
+ bool full = false;
+ bool disable_page_skipping = false;
+ bool process_toast = true;
+ ListCell *lc;
+
+ /* index_cleanup and truncate values unspecified for now */
+ params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
+ params.truncate = VACOPTVALUE_UNSPECIFIED;
+
+ /* By default parallel vacuum is enabled */
+ params.nworkers = 0;
+
+ /* Parse options list */
+ foreach(lc, vacstmt->options)
+ {
+ DefElem *opt = (DefElem *) lfirst(lc);
+
+ /* Parse common options for VACUUM and ANALYZE */
+ if (strcmp(opt->defname, "verbose") == 0)
+ verbose = defGetBoolean(opt);
+ else if (strcmp(opt->defname, "skip_locked") == 0)
+ skip_locked = defGetBoolean(opt);
+ else if (!vacstmt->is_vacuumcmd)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
+ parser_errposition(pstate, opt->location)));
+
+ /* Parse options available on VACUUM */
+ else if (strcmp(opt->defname, "analyze") == 0)
+ analyze = defGetBoolean(opt);
+ else if (strcmp(opt->defname, "freeze") == 0)
+ freeze = defGetBoolean(opt);
+ else if (strcmp(opt->defname, "full") == 0)
+ full = defGetBoolean(opt);
+ else if (strcmp(opt->defname, "disable_page_skipping") == 0)
+ disable_page_skipping = defGetBoolean(opt);
+ else if (strcmp(opt->defname, "index_cleanup") == 0)
+ {
+ /* Interpret no string as the default, which is 'auto' */
+ if (!opt->arg)
+ params.index_cleanup = VACOPTVALUE_AUTO;
+ else
+ {
+ char *sval = defGetString(opt);
+
+ /* Try matching on 'auto' string, or fall back on boolean */
+ if (pg_strcasecmp(sval, "auto") == 0)
+ params.index_cleanup = VACOPTVALUE_AUTO;
+ else
+ params.index_cleanup = get_vacoptval_from_boolean(opt);
+ }
+ }
+ else if (strcmp(opt->defname, "process_toast") == 0)
+ process_toast = defGetBoolean(opt);
+ else if (strcmp(opt->defname, "truncate") == 0)
+ params.truncate = get_vacoptval_from_boolean(opt);
+ else if (strcmp(opt->defname, "parallel") == 0)
+ {
+ if (opt->arg == NULL)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("parallel option requires a value between 0 and %d",
+ MAX_PARALLEL_WORKER_LIMIT),
+ parser_errposition(pstate, opt->location)));
+ }
+ else
+ {
+ int nworkers;
+
+ nworkers = defGetInt32(opt);
+ if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("parallel workers for vacuum must be between 0 and %d",
+ MAX_PARALLEL_WORKER_LIMIT),
+ parser_errposition(pstate, opt->location)));
+
+ /*
+ * Disable parallel vacuum, if user has specified parallel
+ * degree as zero.
+ */
+ if (nworkers == 0)
+ params.nworkers = -1;
+ else
+ params.nworkers = nworkers;
+ }
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
+ parser_errposition(pstate, opt->location)));
+ }
+
+ /* Set vacuum options */
+ params.options =
+ (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
+ (verbose ? VACOPT_VERBOSE : 0) |
+ (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
+ (analyze ? VACOPT_ANALYZE : 0) |
+ (freeze ? VACOPT_FREEZE : 0) |
+ (full ? VACOPT_FULL : 0) |
+ (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
+ (process_toast ? VACOPT_PROCESS_TOAST : 0);
+
+ /* sanity checks on options */
+ Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
+ Assert((params.options & VACOPT_VACUUM) ||
+ !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
+
+ if ((params.options & VACOPT_FULL) && params.nworkers > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("VACUUM FULL cannot be performed in parallel")));
+
+ /*
+ * Make sure VACOPT_ANALYZE is specified if any column lists are present.
+ */
+ if (!(params.options & VACOPT_ANALYZE))
+ {
+ ListCell *lc;
+
+ foreach(lc, vacstmt->rels)
+ {
+ VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
+
+ if (vrel->va_cols != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ANALYZE option must be specified when a column list is provided")));
+ }
+ }
+
+ /*
+ * All freeze ages are zero if the FREEZE option is given; otherwise pass
+ * them as -1 which means to use the default values.
+ */
+ if (params.options & VACOPT_FREEZE)
+ {
+ params.freeze_min_age = 0;
+ params.freeze_table_age = 0;
+ params.multixact_freeze_min_age = 0;
+ params.multixact_freeze_table_age = 0;
+ }
+ else
+ {
+ params.freeze_min_age = -1;
+ params.freeze_table_age = -1;
+ params.multixact_freeze_min_age = -1;
+ params.multixact_freeze_table_age = -1;
+ }
+
+ /* user-invoked vacuum is never "for wraparound" */
+ params.is_wraparound = false;
+
+ /* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */
+ params.log_min_duration = -1;
+
+ /* Now go through the common routine */
+ vacuum(vacstmt->rels, &params, NULL, isTopLevel);
+}
+
+/*
+ * Internal entry point for VACUUM and ANALYZE commands.
+ *
+ * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
+ * we process all relevant tables in the database. For each VacuumRelation,
+ * if a valid OID is supplied, the table with that OID is what to process;
+ * otherwise, the VacuumRelation's RangeVar indicates what to process.
+ *
+ * params contains a set of parameters that can be used to customize the
+ * behavior.
+ *
+ * bstrategy is normally given as NULL, but in autovacuum it can be passed
+ * in to use the same buffer strategy object across multiple vacuum() calls.
+ *
+ * isTopLevel should be passed down from ProcessUtility.
+ *
+ * It is the caller's responsibility that all parameters are allocated in a
+ * memory context that will not disappear at transaction commit.
+ */
+void
+vacuum(List *relations, VacuumParams *params,
+ BufferAccessStrategy bstrategy, bool isTopLevel)
+{
+ static bool in_vacuum = false;
+
+ const char *stmttype;
+ volatile bool in_outer_xact,
+ use_own_xacts;
+
+ Assert(params != NULL);
+
+ stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
+
+ /*
+ * We cannot run VACUUM inside a user transaction block; if we were inside
+ * a transaction, then our commit- and start-transaction-command calls
+ * would not have the intended effect! There are numerous other subtle
+ * dependencies on this, too.
+ *
+ * ANALYZE (without VACUUM) can run either way.
+ */
+ if (params->options & VACOPT_VACUUM)
+ {
+ PreventInTransactionBlock(isTopLevel, stmttype);
+ in_outer_xact = false;
+ }
+ else
+ in_outer_xact = IsInTransactionBlock(isTopLevel);
+
+ /*
+ * Due to static variables vac_context, anl_context and vac_strategy,
+ * vacuum() is not reentrant. This matters when VACUUM FULL or ANALYZE
+ * calls a hostile index expression that itself calls ANALYZE.
+ */
+ if (in_vacuum)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("%s cannot be executed from VACUUM or ANALYZE",
+ stmttype)));
+
+ /*
+ * Sanity check DISABLE_PAGE_SKIPPING option.
+ */
+ if ((params->options & VACOPT_FULL) != 0 &&
+ (params->options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
+
+ /* sanity check for PROCESS_TOAST */
+ if ((params->options & VACOPT_FULL) != 0 &&
+ (params->options & VACOPT_PROCESS_TOAST) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("PROCESS_TOAST required with VACUUM FULL")));
+
+ /*
+ * Create special memory context for cross-transaction storage.
+ *
+ * Since it is a child of PortalContext, it will go away eventually even
+ * if we suffer an error; there's no need for special abort cleanup logic.
+ */
+ vac_context = AllocSetContextCreate(PortalContext,
+ "Vacuum",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /*
+ * If caller didn't give us a buffer strategy object, make one in the
+ * cross-transaction memory context.
+ */
+ if (bstrategy == NULL)
+ {
+ MemoryContext old_context = MemoryContextSwitchTo(vac_context);
+
+ bstrategy = GetAccessStrategy(BAS_VACUUM);
+ MemoryContextSwitchTo(old_context);
+ }
+ vac_strategy = bstrategy;
+
+ /*
+ * Build list of relation(s) to process, putting any new data in
+ * vac_context for safekeeping.
+ */
+ if (relations != NIL)
+ {
+ List *newrels = NIL;
+ ListCell *lc;
+
+ foreach(lc, relations)
+ {
+ VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
+ List *sublist;
+ MemoryContext old_context;
+
+ sublist = expand_vacuum_rel(vrel, params->options);
+ old_context = MemoryContextSwitchTo(vac_context);
+ newrels = list_concat(newrels, sublist);
+ MemoryContextSwitchTo(old_context);
+ }
+ relations = newrels;
+ }
+ else
+ relations = get_all_vacuum_rels(params->options);
+
+ /*
+ * Decide whether we need to start/commit our own transactions.
+ *
+ * For VACUUM (with or without ANALYZE): always do so, so that we can
+ * release locks as soon as possible. (We could possibly use the outer
+ * transaction for a one-table VACUUM, but handling TOAST tables would be
+ * problematic.)
+ *
+ * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
+ * start/commit our own transactions. Also, there's no need to do so if
+ * only processing one relation. For multiple relations when not within a
+ * transaction block, and also in an autovacuum worker, use own
+ * transactions so we can release locks sooner.
+ */
+ if (params->options & VACOPT_VACUUM)
+ use_own_xacts = true;
+ else
+ {
+ Assert(params->options & VACOPT_ANALYZE);
+ if (IsAutoVacuumWorkerProcess())
+ use_own_xacts = true;
+ else if (in_outer_xact)
+ use_own_xacts = false;
+ else if (list_length(relations) > 1)
+ use_own_xacts = true;
+ else
+ use_own_xacts = false;
+ }
+
+ /*
+ * vacuum_rel expects to be entered with no transaction active; it will
+ * start and commit its own transaction. But we are called by an SQL
+ * command, and so we are executing inside a transaction already. We
+ * commit the transaction started in PostgresMain() here, and start
+ * another one before exiting to match the commit waiting for us back in
+ * PostgresMain().
+ */
+ if (use_own_xacts)
+ {
+ Assert(!in_outer_xact);
+
+ /* ActiveSnapshot is not set by autovacuum */
+ if (ActiveSnapshotSet())
+ PopActiveSnapshot();
+
+ /* matches the StartTransaction in PostgresMain() */
+ CommitTransactionCommand();
+ }
+
+ /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
+ PG_TRY();
+ {
+ ListCell *cur;
+
+ in_vacuum = true;
+ VacuumCostActive = (VacuumCostDelay > 0);
+ VacuumCostBalance = 0;
+ VacuumPageHit = 0;
+ VacuumPageMiss = 0;
+ VacuumPageDirty = 0;
+ VacuumCostBalanceLocal = 0;
+ VacuumSharedCostBalance = NULL;
+ VacuumActiveNWorkers = NULL;
+
+ /*
+ * Loop to process each selected relation.
+ */
+ foreach(cur, relations)
+ {
+ VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
+
+ if (params->options & VACOPT_VACUUM)
+ {
+ if (!vacuum_rel(vrel->oid, vrel->relation, params))
+ continue;
+ }
+
+ if (params->options & VACOPT_ANALYZE)
+ {
+ /*
+ * If using separate xacts, start one for analyze. Otherwise,
+ * we can use the outer transaction.
+ */
+ if (use_own_xacts)
+ {
+ StartTransactionCommand();
+ /* functions in indexes may want a snapshot set */
+ PushActiveSnapshot(GetTransactionSnapshot());
+ }
+
+ analyze_rel(vrel->oid, vrel->relation, params,
+ vrel->va_cols, in_outer_xact, vac_strategy);
+
+ if (use_own_xacts)
+ {
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+ }
+ else
+ {
+ /*
+ * If we're not using separate xacts, better separate the
+ * ANALYZE actions with CCIs. This avoids trouble if user
+ * says "ANALYZE t, t".
+ */
+ CommandCounterIncrement();
+ }
+ }
+ }
+ }
+ PG_FINALLY();
+ {
+ in_vacuum = false;
+ VacuumCostActive = false;
+ }
+ PG_END_TRY();
+
+ /*
+ * Finish up processing.
+ */
+ if (use_own_xacts)
+ {
+ /* here, we are not in a transaction */
+
+ /*
+ * This matches the CommitTransaction waiting for us in
+ * PostgresMain().
+ */
+ StartTransactionCommand();
+ }
+
+ if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
+ {
+ /*
+ * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
+ * (autovacuum.c does this for itself.)
+ */
+ vac_update_datfrozenxid();
+ }
+
+ /*
+ * Clean up working storage --- note we must do this after
+ * StartTransactionCommand, else we might be trying to delete the active
+ * context!
+ */
+ MemoryContextDelete(vac_context);
+ vac_context = NULL;
+}
+
+/*
+ * Check if a given relation can be safely vacuumed or analyzed. If the
+ * user is not the relation owner, issue a WARNING log message and return
+ * false to let the caller decide what to do with this relation. This
+ * routine is used to decide if a relation can be processed for VACUUM or
+ * ANALYZE.
+ */
+bool
+vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, bits32 options)
+{
+ char *relname;
+
+ Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
+
+ /*
+ * Check permissions.
+ *
+ * We allow the user to vacuum or analyze a table if he is superuser, the
+ * table owner, or the database owner (but in the latter case, only if
+ * it's not a shared relation). pg_class_ownercheck includes the
+ * superuser case.
+ *
+ * Note we choose to treat permissions failure as a WARNING and keep
+ * trying to vacuum or analyze the rest of the DB --- is this appropriate?
+ */
+ if (pg_class_ownercheck(relid, GetUserId()) ||
+ (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !reltuple->relisshared))
+ return true;
+
+ relname = NameStr(reltuple->relname);
+
+ if ((options & VACOPT_VACUUM) != 0)
+ {
+ if (reltuple->relisshared)
+ ereport(WARNING,
+ (errmsg("skipping \"%s\" --- only superuser can vacuum it",
+ relname)));
+ else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
+ ereport(WARNING,
+ (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
+ relname)));
+ else
+ ereport(WARNING,
+ (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
+ relname)));
+
+ /*
+ * For VACUUM ANALYZE, both logs could show up, but just generate
+ * information for VACUUM as that would be the first one to be
+ * processed.
+ */
+ return false;
+ }
+
+ if ((options & VACOPT_ANALYZE) != 0)
+ {
+ if (reltuple->relisshared)
+ ereport(WARNING,
+ (errmsg("skipping \"%s\" --- only superuser can analyze it",
+ relname)));
+ else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
+ ereport(WARNING,
+ (errmsg("skipping \"%s\" --- only superuser or database owner can analyze it",
+ relname)));
+ else
+ ereport(WARNING,
+ (errmsg("skipping \"%s\" --- only table or database owner can analyze it",
+ relname)));
+ }
+
+ return false;
+}
+
+
+/*
+ * vacuum_open_relation
+ *
+ * This routine is used for attempting to open and lock a relation which
+ * is going to be vacuumed or analyzed. If the relation cannot be opened
+ * or locked, a log is emitted if possible.
+ */
+Relation
+vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options,
+ bool verbose, LOCKMODE lmode)
+{
+ Relation rel;
+ bool rel_lock = true;
+ int elevel;
+
+ Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
+
+ /*
+ * Open the relation and get the appropriate lock on it.
+ *
+ * There's a race condition here: the relation may have gone away since
+ * the last time we saw it. If so, we don't need to vacuum or analyze it.
+ *
+ * If we've been asked not to wait for the relation lock, acquire it first
+ * in non-blocking mode, before calling try_relation_open().
+ */
+ if (!(options & VACOPT_SKIP_LOCKED))
+ rel = try_relation_open(relid, lmode);
+ else if (ConditionalLockRelationOid(relid, lmode))
+ rel = try_relation_open(relid, NoLock);
+ else
+ {
+ rel = NULL;
+ rel_lock = false;
+ }
+
+ /* if relation is opened, leave */
+ if (rel)
+ return rel;
+
+ /*
+ * Relation could not be opened, hence generate if possible a log
+ * informing on the situation.
+ *
+ * If the RangeVar is not defined, we do not have enough information to
+ * provide a meaningful log statement. Chances are that the caller has
+ * intentionally not provided this information so that this logging is
+ * skipped, anyway.
+ */
+ if (relation == NULL)
+ return NULL;
+
+ /*
+ * Determine the log level.
+ *
+ * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
+ * statements in the permission checks; otherwise, only log if the caller
+ * so requested.
+ */
+ if (!IsAutoVacuumWorkerProcess())
+ elevel = WARNING;
+ else if (verbose)
+ elevel = LOG;
+ else
+ return NULL;
+
+ if ((options & VACOPT_VACUUM) != 0)
+ {
+ if (!rel_lock)
+ ereport(elevel,
+ (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+ errmsg("skipping vacuum of \"%s\" --- lock not available",
+ relation->relname)));
+ else
+ ereport(elevel,
+ (errcode(ERRCODE_UNDEFINED_TABLE),
+ errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
+ relation->relname)));
+
+ /*
+ * For VACUUM ANALYZE, both logs could show up, but just generate
+ * information for VACUUM as that would be the first one to be
+ * processed.
+ */
+ return NULL;
+ }
+
+ if ((options & VACOPT_ANALYZE) != 0)
+ {
+ if (!rel_lock)
+ ereport(elevel,
+ (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+ errmsg("skipping analyze of \"%s\" --- lock not available",
+ relation->relname)));
+ else
+ ereport(elevel,
+ (errcode(ERRCODE_UNDEFINED_TABLE),
+ errmsg("skipping analyze of \"%s\" --- relation no longer exists",
+ relation->relname)));
+ }
+
+ return NULL;
+}
+
+
+/*
+ * Given a VacuumRelation, fill in the table OID if it wasn't specified,
+ * and optionally add VacuumRelations for partitions of the table.
+ *
+ * If a VacuumRelation does not have an OID supplied and is a partitioned
+ * table, an extra entry will be added to the output for each partition.
+ * Presently, only autovacuum supplies OIDs when calling vacuum(), and
+ * it does not want us to expand partitioned tables.
+ *
+ * We take care not to modify the input data structure, but instead build
+ * new VacuumRelation(s) to return. (But note that they will reference
+ * unmodified parts of the input, eg column lists.) New data structures
+ * are made in vac_context.
+ */
+static List *
+expand_vacuum_rel(VacuumRelation *vrel, int options)
+{
+ List *vacrels = NIL;
+ MemoryContext oldcontext;
+
+ /* If caller supplied OID, there's nothing we need do here. */
+ if (OidIsValid(vrel->oid))
+ {
+ oldcontext = MemoryContextSwitchTo(vac_context);
+ vacrels = lappend(vacrels, vrel);
+ MemoryContextSwitchTo(oldcontext);
+ }
+ else
+ {
+ /* Process a specific relation, and possibly partitions thereof */
+ Oid relid;
+ HeapTuple tuple;
+ Form_pg_class classForm;
+ bool include_parts;
+ int rvr_opts;
+
+ /*
+ * Since autovacuum workers supply OIDs when calling vacuum(), no
+ * autovacuum worker should reach this code.
+ */
+ Assert(!IsAutoVacuumWorkerProcess());
+
+ /*
+ * We transiently take AccessShareLock to protect the syscache lookup
+ * below, as well as find_all_inheritors's expectation that the caller
+ * holds some lock on the starting relation.
+ */
+ rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
+ relid = RangeVarGetRelidExtended(vrel->relation,
+ AccessShareLock,
+ rvr_opts,
+ NULL, NULL);
+
+ /*
+ * If the lock is unavailable, emit the same log statement that
+ * vacuum_rel() and analyze_rel() would.
+ */
+ if (!OidIsValid(relid))
+ {
+ if (options & VACOPT_VACUUM)
+ ereport(WARNING,
+ (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+ errmsg("skipping vacuum of \"%s\" --- lock not available",
+ vrel->relation->relname)));
+ else
+ ereport(WARNING,
+ (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+ errmsg("skipping analyze of \"%s\" --- lock not available",
+ vrel->relation->relname)));
+ return vacrels;
+ }
+
+ /*
+ * To check whether the relation is a partitioned table and its
+ * ownership, fetch its syscache entry.
+ */
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", relid);
+ classForm = (Form_pg_class) GETSTRUCT(tuple);
+
+ /*
+ * Make a returnable VacuumRelation for this rel if user is a proper
+ * owner.
+ */
+ if (vacuum_is_relation_owner(relid, classForm, options))
+ {
+ oldcontext = MemoryContextSwitchTo(vac_context);
+ vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
+ relid,
+ vrel->va_cols));
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+
+ include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
+ ReleaseSysCache(tuple);
+
+ /*
+ * If it is, make relation list entries for its partitions. Note that
+ * the list returned by find_all_inheritors() includes the passed-in
+ * OID, so we have to skip that. There's no point in taking locks on
+ * the individual partitions yet, and doing so would just add
+ * unnecessary deadlock risk. For this last reason we do not check
+ * yet the ownership of the partitions, which get added to the list to
+ * process. Ownership will be checked later on anyway.
+ */
+ if (include_parts)
+ {
+ List *part_oids = find_all_inheritors(relid, NoLock, NULL);
+ ListCell *part_lc;
+
+ foreach(part_lc, part_oids)
+ {
+ Oid part_oid = lfirst_oid(part_lc);
+
+ if (part_oid == relid)
+ continue; /* ignore original table */
+
+ /*
+ * We omit a RangeVar since it wouldn't be appropriate to
+ * complain about failure to open one of these relations
+ * later.
+ */
+ oldcontext = MemoryContextSwitchTo(vac_context);
+ vacrels = lappend(vacrels, makeVacuumRelation(NULL,
+ part_oid,
+ vrel->va_cols));
+ MemoryContextSwitchTo(oldcontext);
+ }
+ }
+
+ /*
+ * Release lock again. This means that by the time we actually try to
+ * process the table, it might be gone or renamed. In the former case
+ * we'll silently ignore it; in the latter case we'll process it
+ * anyway, but we must beware that the RangeVar doesn't necessarily
+ * identify it anymore. This isn't ideal, perhaps, but there's little
+ * practical alternative, since we're typically going to commit this
+ * transaction and begin a new one between now and then. Moreover,
+ * holding locks on multiple relations would create significant risk
+ * of deadlock.
+ */
+ UnlockRelationOid(relid, AccessShareLock);
+ }
+
+ return vacrels;
+}
+
+/*
+ * Construct a list of VacuumRelations for all vacuumable rels in
+ * the current database. The list is built in vac_context.
+ */
+static List *
+get_all_vacuum_rels(int options)
+{
+ List *vacrels = NIL;
+ Relation pgclass;
+ TableScanDesc scan;
+ HeapTuple tuple;
+
+ pgclass = table_open(RelationRelationId, AccessShareLock);
+
+ scan = table_beginscan_catalog(pgclass, 0, NULL);
+
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
+ MemoryContext oldcontext;
+ Oid relid = classForm->oid;
+
+ /* check permissions of relation */
+ if (!vacuum_is_relation_owner(relid, classForm, options))
+ continue;
+
+ /*
+ * We include partitioned tables here; depending on which operation is
+ * to be performed, caller will decide whether to process or ignore
+ * them.
+ */
+ if (classForm->relkind != RELKIND_RELATION &&
+ classForm->relkind != RELKIND_MATVIEW &&
+ classForm->relkind != RELKIND_PARTITIONED_TABLE)
+ continue;
+
+ /*
+ * Build VacuumRelation(s) specifying the table OIDs to be processed.
+ * We omit a RangeVar since it wouldn't be appropriate to complain
+ * about failure to open one of these relations later.
+ */
+ oldcontext = MemoryContextSwitchTo(vac_context);
+ vacrels = lappend(vacrels, makeVacuumRelation(NULL,
+ relid,
+ NIL));
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ table_endscan(scan);
+ table_close(pgclass, AccessShareLock);
+
+ return vacrels;
+}
+
+/*
+ * vacuum_set_xid_limits() -- compute oldestXmin and freeze cutoff points
+ *
+ * Input parameters are the target relation, applicable freeze age settings.
+ *
+ * The output parameters are:
+ * - oldestXmin is the Xid below which tuples deleted by any xact (that
+ * committed) should be considered DEAD, not just RECENTLY_DEAD.
+ * - oldestMxact is the Mxid below which MultiXacts are definitely not
+ * seen as visible by any running transaction.
+ * - freezeLimit is the Xid below which all Xids are definitely replaced by
+ * FrozenTransactionId during aggressive vacuums.
+ * - multiXactCutoff is the value below which all MultiXactIds are definitely
+ * removed from Xmax during aggressive vacuums.
+ *
+ * Return value indicates if vacuumlazy.c caller should make its VACUUM
+ * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to
+ * FreezeLimit (at a minimum), and relminmxid up to multiXactCutoff (at a
+ * minimum).
+ *
+ * oldestXmin and oldestMxact are the most recent values that can ever be
+ * passed to vac_update_relstats() as frozenxid and minmulti arguments by our
+ * vacuumlazy.c caller later on. These values should be passed when it turns
+ * out that VACUUM will leave no unfrozen XIDs/XMIDs behind in the table.
+ */
+bool
+vacuum_set_xid_limits(Relation rel,
+ int freeze_min_age,
+ int freeze_table_age,
+ int multixact_freeze_min_age,
+ int multixact_freeze_table_age,
+ TransactionId *oldestXmin,
+ MultiXactId *oldestMxact,
+ TransactionId *freezeLimit,
+ MultiXactId *multiXactCutoff)
+{
+ int freezemin;
+ int mxid_freezemin;
+ int effective_multixact_freeze_max_age;
+ TransactionId limit;
+ TransactionId safeLimit;
+ MultiXactId mxactLimit;
+ MultiXactId safeMxactLimit;
+ int freezetable;
+
+ /*
+ * We can always ignore processes running lazy vacuum. This is because we
+ * use these values only for deciding which tuples we must keep in the
+ * tables. Since lazy vacuum doesn't write its XID anywhere (usually no
+ * XID assigned), it's safe to ignore it. In theory it could be
+ * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
+ * that only one vacuum process can be working on a particular table at
+ * any time, and that each vacuum is always an independent transaction.
+ */
+ *oldestXmin = GetOldestNonRemovableTransactionId(rel);
+
+ if (OldSnapshotThresholdActive())
+ {
+ TransactionId limit_xmin;
+ TimestampTz limit_ts;
+
+ if (TransactionIdLimitedForOldSnapshots(*oldestXmin, rel,
+ &limit_xmin, &limit_ts))
+ {
+ /*
+ * TODO: We should only set the threshold if we are pruning on the
+ * basis of the increased limits. Not as crucial here as it is
+ * for opportunistic pruning (which often happens at a much higher
+ * frequency), but would still be a significant improvement.
+ */
+ SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin);
+ *oldestXmin = limit_xmin;
+ }
+ }
+
+ Assert(TransactionIdIsNormal(*oldestXmin));
+
+ /*
+ * Determine the minimum freeze age to use: as specified by the caller, or
+ * vacuum_freeze_min_age, but in any case not more than half
+ * autovacuum_freeze_max_age, so that autovacuums to prevent XID
+ * wraparound won't occur too frequently.
+ */
+ freezemin = freeze_min_age;
+ if (freezemin < 0)
+ freezemin = vacuum_freeze_min_age;
+ freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
+ Assert(freezemin >= 0);
+
+ /*
+ * Compute the cutoff XID, being careful not to generate a "permanent" XID
+ */
+ limit = *oldestXmin - freezemin;
+ if (!TransactionIdIsNormal(limit))
+ limit = FirstNormalTransactionId;
+
+ /*
+ * If oldestXmin is very far back (in practice, more than
+ * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
+ * freeze age of zero.
+ */
+ safeLimit = ReadNextTransactionId() - autovacuum_freeze_max_age;
+ if (!TransactionIdIsNormal(safeLimit))
+ safeLimit = FirstNormalTransactionId;
+
+ if (TransactionIdPrecedes(limit, safeLimit))
+ {
+ ereport(WARNING,
+ (errmsg("oldest xmin is far in the past"),
+ errhint("Close open transactions soon to avoid wraparound problems.\n"
+ "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
+ limit = *oldestXmin;
+ }
+
+ *freezeLimit = limit;
+
+ /*
+ * Compute the multixact age for which freezing is urgent. This is
+ * normally autovacuum_multixact_freeze_max_age, but may be less if we are
+ * short of multixact member space.
+ */
+ effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
+
+ /*
+ * Determine the minimum multixact freeze age to use: as specified by
+ * caller, or vacuum_multixact_freeze_min_age, but in any case not more
+ * than half effective_multixact_freeze_max_age, so that autovacuums to
+ * prevent MultiXact wraparound won't occur too frequently.
+ */
+ mxid_freezemin = multixact_freeze_min_age;
+ if (mxid_freezemin < 0)
+ mxid_freezemin = vacuum_multixact_freeze_min_age;
+ mxid_freezemin = Min(mxid_freezemin,
+ effective_multixact_freeze_max_age / 2);
+ Assert(mxid_freezemin >= 0);
+
+ /* Remember for caller */
+ *oldestMxact = GetOldestMultiXactId();
+
+ /* compute the cutoff multi, being careful to generate a valid value */
+ mxactLimit = *oldestMxact - mxid_freezemin;
+ if (mxactLimit < FirstMultiXactId)
+ mxactLimit = FirstMultiXactId;
+
+ safeMxactLimit =
+ ReadNextMultiXactId() - effective_multixact_freeze_max_age;
+ if (safeMxactLimit < FirstMultiXactId)
+ safeMxactLimit = FirstMultiXactId;
+
+ if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
+ {
+ ereport(WARNING,
+ (errmsg("oldest multixact is far in the past"),
+ errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
+ /* Use the safe limit, unless an older mxact is still running */
+ if (MultiXactIdPrecedes(*oldestMxact, safeMxactLimit))
+ mxactLimit = *oldestMxact;
+ else
+ mxactLimit = safeMxactLimit;
+ }
+
+ *multiXactCutoff = mxactLimit;
+
+ /*
+ * Done setting output parameters; just need to figure out if caller needs
+ * to do an aggressive VACUUM or not.
+ *
+ * Determine the table freeze age to use: as specified by the caller, or
+ * vacuum_freeze_table_age, but in any case not more than
+ * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
+ * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
+ * before anti-wraparound autovacuum is launched.
+ */
+ freezetable = freeze_table_age;
+ if (freezetable < 0)
+ freezetable = vacuum_freeze_table_age;
+ freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
+ Assert(freezetable >= 0);
+
+ /*
+ * Compute XID limit causing an aggressive vacuum, being careful not to
+ * generate a "permanent" XID
+ */
+ limit = ReadNextTransactionId() - freezetable;
+ if (!TransactionIdIsNormal(limit))
+ limit = FirstNormalTransactionId;
+ if (TransactionIdPrecedesOrEquals(rel->rd_rel->relfrozenxid,
+ limit))
+ return true;
+
+ /*
+ * Similar to the above, determine the table freeze age to use for
+ * multixacts: as specified by the caller, or
+ * vacuum_multixact_freeze_table_age, but in any case not more than
+ * autovacuum_multixact_freeze_table_age * 0.95, so that if you have e.g.
+ * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
+ * multixacts before anti-wraparound autovacuum is launched.
+ */
+ freezetable = multixact_freeze_table_age;
+ if (freezetable < 0)
+ freezetable = vacuum_multixact_freeze_table_age;
+ freezetable = Min(freezetable,
+ effective_multixact_freeze_max_age * 0.95);
+ Assert(freezetable >= 0);
+
+ /*
+ * Compute MultiXact limit causing an aggressive vacuum, being careful to
+ * generate a valid MultiXact value
+ */
+ mxactLimit = ReadNextMultiXactId() - freezetable;
+ if (mxactLimit < FirstMultiXactId)
+ mxactLimit = FirstMultiXactId;
+ if (MultiXactIdPrecedesOrEquals(rel->rd_rel->relminmxid,
+ mxactLimit))
+ return true;
+
+ return false;
+}
+
+/*
+ * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
+ * mechanism to determine if its table's relfrozenxid and relminmxid are now
+ * dangerously far in the past.
+ *
+ * Input parameters are the target relation's relfrozenxid and relminmxid.
+ *
+ * When we return true, VACUUM caller triggers the failsafe.
+ */
+bool
+vacuum_xid_failsafe_check(TransactionId relfrozenxid, MultiXactId relminmxid)
+{
+ TransactionId xid_skip_limit;
+ MultiXactId multi_skip_limit;
+ int skip_index_vacuum;
+
+ Assert(TransactionIdIsNormal(relfrozenxid));
+ Assert(MultiXactIdIsValid(relminmxid));
+
+ /*
+ * Determine the index skipping age to use. In any case no less than
+ * autovacuum_freeze_max_age * 1.05.
+ */
+ skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
+
+ xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
+ if (!TransactionIdIsNormal(xid_skip_limit))
+ xid_skip_limit = FirstNormalTransactionId;
+
+ if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
+ {
+ /* The table's relfrozenxid is too old */
+ return true;
+ }
+
+ /*
+ * Similar to above, determine the index skipping age to use for
+ * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
+ * 1.05.
+ */
+ skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
+ autovacuum_multixact_freeze_max_age * 1.05);
+
+ multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
+ if (multi_skip_limit < FirstMultiXactId)
+ multi_skip_limit = FirstMultiXactId;
+
+ if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
+ {
+ /* The table's relminmxid is too old */
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
+ *
+ * If we scanned the whole relation then we should just use the count of
+ * live tuples seen; but if we did not, we should not blindly extrapolate
+ * from that number, since VACUUM may have scanned a quite nonrandom
+ * subset of the table. When we have only partial information, we take
+ * the old value of pg_class.reltuples/pg_class.relpages as a measurement
+ * of the tuple density in the unscanned pages.
+ *
+ * Note: scanned_tuples should count only *live* tuples, since
+ * pg_class.reltuples is defined that way.
+ */
+double
+vac_estimate_reltuples(Relation relation,
+ BlockNumber total_pages,
+ BlockNumber scanned_pages,
+ double scanned_tuples)
+{
+ BlockNumber old_rel_pages = relation->rd_rel->relpages;
+ double old_rel_tuples = relation->rd_rel->reltuples;
+ double old_density;
+ double unscanned_pages;
+ double total_tuples;
+
+ /* If we did scan the whole table, just use the count as-is */
+ if (scanned_pages >= total_pages)
+ return scanned_tuples;
+
+ /*
+ * When successive VACUUM commands scan the same few pages again and
+ * again, without anything from the table really changing, there is a risk
+ * that our beliefs about tuple density will gradually become distorted.
+ * This might be caused by vacuumlazy.c implementation details, such as
+ * its tendency to always scan the last heap page. Handle that here.
+ *
+ * If the relation is _exactly_ the same size according to the existing
+ * pg_class entry, and only a few of its pages (less than 2%) were
+ * scanned, keep the existing value of reltuples. Also keep the existing
+ * value when only a subset of rel's pages <= a single page were scanned.
+ *
+ * (Note: we might be returning -1 here.)
+ */
+ if (old_rel_pages == total_pages &&
+ scanned_pages < (double) total_pages * 0.02)
+ return old_rel_tuples;
+ if (scanned_pages <= 1)
+ return old_rel_tuples;
+
+ /*
+ * If old density is unknown, we can't do much except scale up
+ * scanned_tuples to match total_pages.
+ */
+ if (old_rel_tuples < 0 || old_rel_pages == 0)
+ return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
+
+ /*
+ * Okay, we've covered the corner cases. The normal calculation is to
+ * convert the old measurement to a density (tuples per page), then
+ * estimate the number of tuples in the unscanned pages using that figure,
+ * and finally add on the number of tuples in the scanned pages.
+ */
+ old_density = old_rel_tuples / old_rel_pages;
+ unscanned_pages = (double) total_pages - (double) scanned_pages;
+ total_tuples = old_density * unscanned_pages + scanned_tuples;
+ return floor(total_tuples + 0.5);
+}
+
+
+/*
+ * vac_update_relstats() -- update statistics for one relation
+ *
+ * Update the whole-relation statistics that are kept in its pg_class
+ * row. There are additional stats that will be updated if we are
+ * doing ANALYZE, but we always update these stats. This routine works
+ * for both index and heap relation entries in pg_class.
+ *
+ * We violate transaction semantics here by overwriting the rel's
+ * existing pg_class tuple with the new values. This is reasonably
+ * safe as long as we're sure that the new values are correct whether or
+ * not this transaction commits. The reason for doing this is that if
+ * we updated these tuples in the usual way, vacuuming pg_class itself
+ * wouldn't work very well --- by the time we got done with a vacuum
+ * cycle, most of the tuples in pg_class would've been obsoleted. Of
+ * course, this only works for fixed-size not-null columns, but these are.
+ *
+ * Another reason for doing it this way is that when we are in a lazy
+ * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
+ * Somebody vacuuming pg_class might think they could delete a tuple
+ * marked with xmin = our xid.
+ *
+ * In addition to fundamentally nontransactional statistics such as
+ * relpages and relallvisible, we try to maintain certain lazily-updated
+ * DDL flags such as relhasindex, by clearing them if no longer correct.
+ * It's safe to do this in VACUUM, which can't run in parallel with
+ * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
+ * However, it's *not* safe to do it in an ANALYZE that's within an
+ * outer transaction, because for example the current transaction might
+ * have dropped the last index; then we'd think relhasindex should be
+ * cleared, but if the transaction later rolls back this would be wrong.
+ * So we refrain from updating the DDL flags if we're inside an outer
+ * transaction. This is OK since postponing the flag maintenance is
+ * always allowable.
+ *
+ * Note: num_tuples should count only *live* tuples, since
+ * pg_class.reltuples is defined that way.
+ *
+ * This routine is shared by VACUUM and ANALYZE.
+ */
+void
+vac_update_relstats(Relation relation,
+ BlockNumber num_pages, double num_tuples,
+ BlockNumber num_all_visible_pages,
+ bool hasindex, TransactionId frozenxid,
+ MultiXactId minmulti,
+ bool *frozenxid_updated, bool *minmulti_updated,
+ bool in_outer_xact)
+{
+ Oid relid = RelationGetRelid(relation);
+ Relation rd;
+ HeapTuple ctup;
+ Form_pg_class pgcform;
+ bool dirty,
+ futurexid,
+ futuremxid;
+ TransactionId oldfrozenxid;
+ MultiXactId oldminmulti;
+
+ rd = table_open(RelationRelationId, RowExclusiveLock);
+
+ /* Fetch a copy of the tuple to scribble on */
+ ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(ctup))
+ elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
+ relid);
+ pgcform = (Form_pg_class) GETSTRUCT(ctup);
+
+ /* Apply statistical updates, if any, to copied tuple */
+
+ dirty = false;
+ if (pgcform->relpages != (int32) num_pages)
+ {
+ pgcform->relpages = (int32) num_pages;
+ dirty = true;
+ }
+ if (pgcform->reltuples != (float4) num_tuples)
+ {
+ pgcform->reltuples = (float4) num_tuples;
+ dirty = true;
+ }
+ if (pgcform->relallvisible != (int32) num_all_visible_pages)
+ {
+ pgcform->relallvisible = (int32) num_all_visible_pages;
+ dirty = true;
+ }
+
+ /* Apply DDL updates, but not inside an outer transaction (see above) */
+
+ if (!in_outer_xact)
+ {
+ /*
+ * If we didn't find any indexes, reset relhasindex.
+ */
+ if (pgcform->relhasindex && !hasindex)
+ {
+ pgcform->relhasindex = false;
+ dirty = true;
+ }
+
+ /* We also clear relhasrules and relhastriggers if needed */
+ if (pgcform->relhasrules && relation->rd_rules == NULL)
+ {
+ pgcform->relhasrules = false;
+ dirty = true;
+ }
+ if (pgcform->relhastriggers && relation->trigdesc == NULL)
+ {
+ pgcform->relhastriggers = false;
+ dirty = true;
+ }
+ }
+
+ /*
+ * Update relfrozenxid, unless caller passed InvalidTransactionId
+ * indicating it has no new data.
+ *
+ * Ordinarily, we don't let relfrozenxid go backwards. However, if the
+ * stored relfrozenxid is "in the future" then it seems best to assume
+ * it's corrupt, and overwrite with the oldest remaining XID in the table.
+ * This should match vac_update_datfrozenxid() concerning what we consider
+ * to be "in the future".
+ */
+ oldfrozenxid = pgcform->relfrozenxid;
+ futurexid = false;
+ if (frozenxid_updated)
+ *frozenxid_updated = false;
+ if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
+ {
+ bool update = false;
+
+ if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
+ update = true;
+ else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
+ futurexid = update = true;
+
+ if (update)
+ {
+ pgcform->relfrozenxid = frozenxid;
+ dirty = true;
+ if (frozenxid_updated)
+ *frozenxid_updated = true;
+ }
+ }
+
+ /* Similarly for relminmxid */
+ oldminmulti = pgcform->relminmxid;
+ futuremxid = false;
+ if (minmulti_updated)
+ *minmulti_updated = false;
+ if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
+ {
+ bool update = false;
+
+ if (MultiXactIdPrecedes(oldminmulti, minmulti))
+ update = true;
+ else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
+ futuremxid = update = true;
+
+ if (update)
+ {
+ pgcform->relminmxid = minmulti;
+ dirty = true;
+ if (minmulti_updated)
+ *minmulti_updated = true;
+ }
+ }
+
+ /* If anything changed, write out the tuple. */
+ if (dirty)
+ heap_inplace_update(rd, ctup);
+
+ table_close(rd, RowExclusiveLock);
+
+ if (futurexid)
+ ereport(WARNING,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
+ oldfrozenxid, frozenxid,
+ RelationGetRelationName(relation))));
+ if (futuremxid)
+ ereport(WARNING,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
+ oldminmulti, minmulti,
+ RelationGetRelationName(relation))));
+}
+
+
+/*
+ * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
+ *
+ * Update pg_database's datfrozenxid entry for our database to be the
+ * minimum of the pg_class.relfrozenxid values.
+ *
+ * Similarly, update our datminmxid to be the minimum of the
+ * pg_class.relminmxid values.
+ *
+ * If we are able to advance either pg_database value, also try to
+ * truncate pg_xact and pg_multixact.
+ *
+ * We violate transaction semantics here by overwriting the database's
+ * existing pg_database tuple with the new values. This is reasonably
+ * safe since the new values are correct whether or not this transaction
+ * commits. As with vac_update_relstats, this avoids leaving dead tuples
+ * behind after a VACUUM.
+ */
+void
+vac_update_datfrozenxid(void)
+{
+ HeapTuple tuple;
+ Form_pg_database dbform;
+ Relation relation;
+ SysScanDesc scan;
+ HeapTuple classTup;
+ TransactionId newFrozenXid;
+ MultiXactId newMinMulti;
+ TransactionId lastSaneFrozenXid;
+ MultiXactId lastSaneMinMulti;
+ bool bogus = false;
+ bool dirty = false;
+ ScanKeyData key[1];
+
+ /*
+ * Restrict this task to one backend per database. This avoids race
+ * conditions that would move datfrozenxid or datminmxid backward. It
+ * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
+ * datfrozenxid passed to an earlier vac_truncate_clog() call.
+ */
+ LockDatabaseFrozenIds(ExclusiveLock);
+
+ /*
+ * Initialize the "min" calculation with
+ * GetOldestNonRemovableTransactionId(), which is a reasonable
+ * approximation to the minimum relfrozenxid for not-yet-committed
+ * pg_class entries for new tables; see AddNewRelationTuple(). So we
+ * cannot produce a wrong minimum by starting with this.
+ */
+ newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
+
+ /*
+ * Similarly, initialize the MultiXact "min" with the value that would be
+ * used on pg_class for new tables. See AddNewRelationTuple().
+ */
+ newMinMulti = GetOldestMultiXactId();
+
+ /*
+ * Identify the latest relfrozenxid and relminmxid values that we could
+ * validly see during the scan. These are conservative values, but it's
+ * not really worth trying to be more exact.
+ */
+ lastSaneFrozenXid = ReadNextTransactionId();
+ lastSaneMinMulti = ReadNextMultiXactId();
+
+ /*
+ * We must seqscan pg_class to find the minimum Xid, because there is no
+ * index that can help us here.
+ */
+ relation = table_open(RelationRelationId, AccessShareLock);
+
+ scan = systable_beginscan(relation, InvalidOid, false,
+ NULL, 0, NULL);
+
+ while ((classTup = systable_getnext(scan)) != NULL)
+ {
+ Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
+
+ /*
+ * Only consider relations able to hold unfrozen XIDs (anything else
+ * should have InvalidTransactionId in relfrozenxid anyway).
+ */
+ if (classForm->relkind != RELKIND_RELATION &&
+ classForm->relkind != RELKIND_MATVIEW &&
+ classForm->relkind != RELKIND_TOASTVALUE)
+ {
+ Assert(!TransactionIdIsValid(classForm->relfrozenxid));
+ Assert(!MultiXactIdIsValid(classForm->relminmxid));
+ continue;
+ }
+
+ /*
+ * Some table AMs might not need per-relation xid / multixid horizons.
+ * It therefore seems reasonable to allow relfrozenxid and relminmxid
+ * to not be set (i.e. set to their respective Invalid*Id)
+ * independently. Thus validate and compute horizon for each only if
+ * set.
+ *
+ * If things are working properly, no relation should have a
+ * relfrozenxid or relminmxid that is "in the future". However, such
+ * cases have been known to arise due to bugs in pg_upgrade. If we
+ * see any entries that are "in the future", chicken out and don't do
+ * anything. This ensures we won't truncate clog & multixact SLRUs
+ * before those relations have been scanned and cleaned up.
+ */
+
+ if (TransactionIdIsValid(classForm->relfrozenxid))
+ {
+ Assert(TransactionIdIsNormal(classForm->relfrozenxid));
+
+ /* check for values in the future */
+ if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid))
+ {
+ bogus = true;
+ break;
+ }
+
+ /* determine new horizon */
+ if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
+ newFrozenXid = classForm->relfrozenxid;
+ }
+
+ if (MultiXactIdIsValid(classForm->relminmxid))
+ {
+ /* check for values in the future */
+ if (MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
+ {
+ bogus = true;
+ break;
+ }
+
+ /* determine new horizon */
+ if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
+ newMinMulti = classForm->relminmxid;
+ }
+ }
+
+ /* we're done with pg_class */
+ systable_endscan(scan);
+ table_close(relation, AccessShareLock);
+
+ /* chicken out if bogus data found */
+ if (bogus)
+ return;
+
+ Assert(TransactionIdIsNormal(newFrozenXid));
+ Assert(MultiXactIdIsValid(newMinMulti));
+
+ /* Now fetch the pg_database tuple we need to update. */
+ relation = table_open(DatabaseRelationId, RowExclusiveLock);
+
+ /*
+ * Get the pg_database tuple to scribble on. Note that this does not
+ * directly rely on the syscache to avoid issues with flattened toast
+ * values for the in-place update.
+ */
+ ScanKeyInit(&key[0],
+ Anum_pg_database_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(MyDatabaseId));
+
+ scan = systable_beginscan(relation, DatabaseOidIndexId, true,
+ NULL, 1, key);
+ tuple = systable_getnext(scan);
+ tuple = heap_copytuple(tuple);
+ systable_endscan(scan);
+
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
+
+ dbform = (Form_pg_database) GETSTRUCT(tuple);
+
+ /*
+ * As in vac_update_relstats(), we ordinarily don't want to let
+ * datfrozenxid go backward; but if it's "in the future" then it must be
+ * corrupt and it seems best to overwrite it.
+ */
+ if (dbform->datfrozenxid != newFrozenXid &&
+ (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
+ TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
+ {
+ dbform->datfrozenxid = newFrozenXid;
+ dirty = true;
+ }
+ else
+ newFrozenXid = dbform->datfrozenxid;
+
+ /* Ditto for datminmxid */
+ if (dbform->datminmxid != newMinMulti &&
+ (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
+ MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
+ {
+ dbform->datminmxid = newMinMulti;
+ dirty = true;
+ }
+ else
+ newMinMulti = dbform->datminmxid;
+
+ if (dirty)
+ heap_inplace_update(relation, tuple);
+
+ heap_freetuple(tuple);
+ table_close(relation, RowExclusiveLock);
+
+ /*
+ * If we were able to advance datfrozenxid or datminmxid, see if we can
+ * truncate pg_xact and/or pg_multixact. Also do it if the shared
+ * XID-wrap-limit info is stale, since this action will update that too.
+ */
+ if (dirty || ForceTransactionIdLimitUpdate())
+ vac_truncate_clog(newFrozenXid, newMinMulti,
+ lastSaneFrozenXid, lastSaneMinMulti);
+}
+
+
+/*
+ * vac_truncate_clog() -- attempt to truncate the commit log
+ *
+ * Scan pg_database to determine the system-wide oldest datfrozenxid,
+ * and use it to truncate the transaction commit log (pg_xact).
+ * Also update the XID wrap limit info maintained by varsup.c.
+ * Likewise for datminmxid.
+ *
+ * The passed frozenXID and minMulti are the updated values for my own
+ * pg_database entry. They're used to initialize the "min" calculations.
+ * The caller also passes the "last sane" XID and MXID, since it has
+ * those at hand already.
+ *
+ * This routine is only invoked when we've managed to change our
+ * DB's datfrozenxid/datminmxid values, or we found that the shared
+ * XID-wrap-limit info is stale.
+ */
+static void
+vac_truncate_clog(TransactionId frozenXID,
+ MultiXactId minMulti,
+ TransactionId lastSaneFrozenXid,
+ MultiXactId lastSaneMinMulti)
+{
+ TransactionId nextXID = ReadNextTransactionId();
+ Relation relation;
+ TableScanDesc scan;
+ HeapTuple tuple;
+ Oid oldestxid_datoid;
+ Oid minmulti_datoid;
+ bool bogus = false;
+ bool frozenAlreadyWrapped = false;
+
+ /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
+ LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
+
+ /* init oldest datoids to sync with my frozenXID/minMulti values */
+ oldestxid_datoid = MyDatabaseId;
+ minmulti_datoid = MyDatabaseId;
+
+ /*
+ * Scan pg_database to compute the minimum datfrozenxid/datminmxid
+ *
+ * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
+ * the values could change while we look at them. Fetch each one just
+ * once to ensure sane behavior of the comparison logic. (Here, as in
+ * many other places, we assume that fetching or updating an XID in shared
+ * storage is atomic.)
+ *
+ * Note: we need not worry about a race condition with new entries being
+ * inserted by CREATE DATABASE. Any such entry will have a copy of some
+ * existing DB's datfrozenxid, and that source DB cannot be ours because
+ * of the interlock against copying a DB containing an active backend.
+ * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
+ * concurrently modify the datfrozenxid's of different databases, the
+ * worst possible outcome is that pg_xact is not truncated as aggressively
+ * as it could be.
+ */
+ relation = table_open(DatabaseRelationId, AccessShareLock);
+
+ scan = table_beginscan_catalog(relation, 0, NULL);
+
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
+ TransactionId datfrozenxid = dbform->datfrozenxid;
+ TransactionId datminmxid = dbform->datminmxid;
+
+ Assert(TransactionIdIsNormal(datfrozenxid));
+ Assert(MultiXactIdIsValid(datminmxid));
+
+ /*
+ * If database is in the process of getting dropped, or has been
+ * interrupted while doing so, no connections to it are possible
+ * anymore. Therefore we don't need to take it into account here.
+ * Which is good, because it can't be processed by autovacuum either.
+ */
+ if (database_is_invalid_form((Form_pg_database) dbform))
+ {
+ elog(DEBUG2,
+ "skipping invalid database \"%s\" while computing relfrozenxid",
+ NameStr(dbform->datname));
+ continue;
+ }
+
+ /*
+ * If things are working properly, no database should have a
+ * datfrozenxid or datminmxid that is "in the future". However, such
+ * cases have been known to arise due to bugs in pg_upgrade. If we
+ * see any entries that are "in the future", chicken out and don't do
+ * anything. This ensures we won't truncate clog before those
+ * databases have been scanned and cleaned up. (We will issue the
+ * "already wrapped" warning if appropriate, though.)
+ */
+ if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
+ MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
+ bogus = true;
+
+ if (TransactionIdPrecedes(nextXID, datfrozenxid))
+ frozenAlreadyWrapped = true;
+ else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
+ {
+ frozenXID = datfrozenxid;
+ oldestxid_datoid = dbform->oid;
+ }
+
+ if (MultiXactIdPrecedes(datminmxid, minMulti))
+ {
+ minMulti = datminmxid;
+ minmulti_datoid = dbform->oid;
+ }
+ }
+
+ table_endscan(scan);
+
+ table_close(relation, AccessShareLock);
+
+ /*
+ * Do not truncate CLOG if we seem to have suffered wraparound already;
+ * the computed minimum XID might be bogus. This case should now be
+ * impossible due to the defenses in GetNewTransactionId, but we keep the
+ * test anyway.
+ */
+ if (frozenAlreadyWrapped)
+ {
+ ereport(WARNING,
+ (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
+ errdetail("You might have already suffered transaction-wraparound data loss.")));
+ LWLockRelease(WrapLimitsVacuumLock);
+ return;
+ }
+
+ /* chicken out if data is bogus in any other way */
+ if (bogus)
+ {
+ LWLockRelease(WrapLimitsVacuumLock);
+ return;
+ }
+
+ /*
+ * Advance the oldest value for commit timestamps before truncating, so
+ * that if a user requests a timestamp for a transaction we're truncating
+ * away right after this point, they get NULL instead of an ugly "file not
+ * found" error from slru.c. This doesn't matter for xact/multixact
+ * because they are not subject to arbitrary lookups from users.
+ */
+ AdvanceOldestCommitTsXid(frozenXID);
+
+ /*
+ * Truncate CLOG, multixact and CommitTs to the oldest computed value.
+ */
+ TruncateCLOG(frozenXID, oldestxid_datoid);
+ TruncateCommitTs(frozenXID);
+ TruncateMultiXact(minMulti, minmulti_datoid);
+
+ /*
+ * Update the wrap limit for GetNewTransactionId and creation of new
+ * MultiXactIds. Note: these functions will also signal the postmaster
+ * for an(other) autovac cycle if needed. XXX should we avoid possibly
+ * signaling twice?
+ */
+ SetTransactionIdLimit(frozenXID, oldestxid_datoid);
+ SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
+
+ LWLockRelease(WrapLimitsVacuumLock);
+}
+
+
+/*
+ * vacuum_rel() -- vacuum one heap relation
+ *
+ * relid identifies the relation to vacuum. If relation is supplied,
+ * use the name therein for reporting any failure to open/lock the rel;
+ * do not use it once we've successfully opened the rel, since it might
+ * be stale.
+ *
+ * Returns true if it's okay to proceed with a requested ANALYZE
+ * operation on this table.
+ *
+ * Doing one heap at a time incurs extra overhead, since we need to
+ * check that the heap exists again just before we vacuum it. The
+ * reason that we do this is so that vacuuming can be spread across
+ * many small transactions. Otherwise, two-phase locking would require
+ * us to lock the entire database during one pass of the vacuum cleaner.
+ *
+ * At entry and exit, we are not inside a transaction.
+ */
+static bool
+vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
+{
+ LOCKMODE lmode;
+ Relation rel;
+ LockRelId lockrelid;
+ Oid toast_relid;
+ Oid save_userid;
+ int save_sec_context;
+ int save_nestlevel;
+
+ Assert(params != NULL);
+
+ /* Begin a transaction for vacuuming this relation */
+ StartTransactionCommand();
+
+ if (!(params->options & VACOPT_FULL))
+ {
+ /*
+ * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
+ * other concurrent VACUUMs know that they can ignore this one while
+ * determining their OldestXmin. (The reason we don't set it during a
+ * full VACUUM is exactly that we may have to run user-defined
+ * functions for functional indexes, and we want to make sure that if
+ * they use the snapshot set above, any tuples it requires can't get
+ * removed from other tables. An index function that depends on the
+ * contents of other tables is arguably broken, but we won't break it
+ * here by violating transaction semantics.)
+ *
+ * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
+ * autovacuum; it's used to avoid canceling a vacuum that was invoked
+ * in an emergency.
+ *
+ * Note: these flags remain set until CommitTransaction or
+ * AbortTransaction. We don't want to clear them until we reset
+ * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
+ * might appear to go backwards, which is probably Not Good. (We also
+ * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
+ * xmin doesn't become visible ahead of setting the flag.)
+ */
+ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ MyProc->statusFlags |= PROC_IN_VACUUM;
+ if (params->is_wraparound)
+ MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
+ ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
+ LWLockRelease(ProcArrayLock);
+ }
+
+ /*
+ * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
+ * cutoff xids in local memory wrapping around, and to have updated xmin
+ * horizons.
+ */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
+ /*
+ * Check for user-requested abort. Note we want this to be inside a
+ * transaction, so xact.c doesn't issue useless WARNING.
+ */
+ CHECK_FOR_INTERRUPTS();
+
+ /*
+ * Determine the type of lock we want --- hard exclusive lock for a FULL
+ * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
+ * way, we can be sure that no other backend is vacuuming the same table.
+ */
+ lmode = (params->options & VACOPT_FULL) ?
+ AccessExclusiveLock : ShareUpdateExclusiveLock;
+
+ /* open the relation and get the appropriate lock on it */
+ rel = vacuum_open_relation(relid, relation, params->options,
+ params->log_min_duration >= 0, lmode);
+
+ /* leave if relation could not be opened or locked */
+ if (!rel)
+ {
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+ return false;
+ }
+
+ /*
+ * Check if relation needs to be skipped based on ownership. This check
+ * happens also when building the relation list to vacuum for a manual
+ * operation, and needs to be done additionally here as VACUUM could
+ * happen across multiple transactions where relation ownership could have
+ * changed in-between. Make sure to only generate logs for VACUUM in this
+ * case.
+ */
+ if (!vacuum_is_relation_owner(RelationGetRelid(rel),
+ rel->rd_rel,
+ params->options & VACOPT_VACUUM))
+ {
+ relation_close(rel, lmode);
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+ return false;
+ }
+
+ /*
+ * Check that it's of a vacuumable relkind.
+ */
+ if (rel->rd_rel->relkind != RELKIND_RELATION &&
+ rel->rd_rel->relkind != RELKIND_MATVIEW &&
+ rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ {
+ ereport(WARNING,
+ (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
+ RelationGetRelationName(rel))));
+ relation_close(rel, lmode);
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+ return false;
+ }
+
+ /*
+ * Silently ignore tables that are temp tables of other backends ---
+ * trying to vacuum these will lead to great unhappiness, since their
+ * contents are probably not up-to-date on disk. (We don't throw a
+ * warning here; it would just lead to chatter during a database-wide
+ * VACUUM.)
+ */
+ if (RELATION_IS_OTHER_TEMP(rel))
+ {
+ relation_close(rel, lmode);
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+ return false;
+ }
+
+ /*
+ * Silently ignore partitioned tables as there is no work to be done. The
+ * useful work is on their child partitions, which have been queued up for
+ * us separately.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ relation_close(rel, lmode);
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+ /* It's OK to proceed with ANALYZE on this table */
+ return true;
+ }
+
+ /*
+ * Get a session-level lock too. This will protect our access to the
+ * relation across multiple transactions, so that we can vacuum the
+ * relation's TOAST table (if any) secure in the knowledge that no one is
+ * deleting the parent relation.
+ *
+ * NOTE: this cannot block, even if someone else is waiting for access,
+ * because the lock manager knows that both lock requests are from the
+ * same process.
+ */
+ lockrelid = rel->rd_lockInfo.lockRelId;
+ LockRelationIdForSession(&lockrelid, lmode);
+
+ /*
+ * Set index_cleanup option based on index_cleanup reloption if it wasn't
+ * specified in VACUUM command, or when running in an autovacuum worker
+ */
+ if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED)
+ {
+ StdRdOptIndexCleanup vacuum_index_cleanup;
+
+ if (rel->rd_options == NULL)
+ vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
+ else
+ vacuum_index_cleanup =
+ ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
+
+ if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
+ params->index_cleanup = VACOPTVALUE_AUTO;
+ else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
+ params->index_cleanup = VACOPTVALUE_ENABLED;
+ else
+ {
+ Assert(vacuum_index_cleanup ==
+ STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
+ params->index_cleanup = VACOPTVALUE_DISABLED;
+ }
+ }
+
+ /*
+ * Set truncate option based on truncate reloption if it wasn't specified
+ * in VACUUM command, or when running in an autovacuum worker
+ */
+ if (params->truncate == VACOPTVALUE_UNSPECIFIED)
+ {
+ if (rel->rd_options == NULL ||
+ ((StdRdOptions *) rel->rd_options)->vacuum_truncate)
+ params->truncate = VACOPTVALUE_ENABLED;
+ else
+ params->truncate = VACOPTVALUE_DISABLED;
+ }
+
+ /*
+ * Remember the relation's TOAST relation for later, if the caller asked
+ * us to process it. In VACUUM FULL, though, the toast table is
+ * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
+ */
+ if ((params->options & VACOPT_PROCESS_TOAST) != 0 &&
+ (params->options & VACOPT_FULL) == 0)
+ toast_relid = rel->rd_rel->reltoastrelid;
+ else
+ toast_relid = InvalidOid;
+
+ /*
+ * Switch to the table owner's userid, so that any index functions are run
+ * as that user. Also lock down security-restricted operations and
+ * arrange to make GUC variable changes local to this command. (This is
+ * unnecessary, but harmless, for lazy VACUUM.)
+ */
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+ SetUserIdAndSecContext(rel->rd_rel->relowner,
+ save_sec_context | SECURITY_RESTRICTED_OPERATION);
+ save_nestlevel = NewGUCNestLevel();
+
+ /*
+ * Do the actual work --- either FULL or "lazy" vacuum
+ */
+ if (params->options & VACOPT_FULL)
+ {
+ ClusterParams cluster_params = {0};
+
+ /* close relation before vacuuming, but hold lock until commit */
+ relation_close(rel, NoLock);
+ rel = NULL;
+
+ if ((params->options & VACOPT_VERBOSE) != 0)
+ cluster_params.options |= CLUOPT_VERBOSE;
+
+ /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
+ cluster_rel(relid, InvalidOid, &cluster_params);
+ }
+ else
+ table_relation_vacuum(rel, params, vac_strategy);
+
+ /* Roll back any GUC changes executed by index functions */
+ AtEOXact_GUC(false, save_nestlevel);
+
+ /* Restore userid and security context */
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+
+ /* all done with this class, but hold lock until commit */
+ if (rel)
+ relation_close(rel, NoLock);
+
+ /*
+ * Complete the transaction and free all temporary memory used.
+ */
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+
+ /*
+ * If the relation has a secondary toast rel, vacuum that too while we
+ * still hold the session lock on the main table. Note however that
+ * "analyze" will not get done on the toast table. This is good, because
+ * the toaster always uses hardcoded index access and statistics are
+ * totally unimportant for toast relations.
+ */
+ if (toast_relid != InvalidOid)
+ vacuum_rel(toast_relid, NULL, params);
+
+ /*
+ * Now release the session-level lock on the main table.
+ */
+ UnlockRelationIdForSession(&lockrelid, lmode);
+
+ /* Report that we really did it. */
+ return true;
+}
+
+
+/*
+ * Open all the vacuumable indexes of the given relation, obtaining the
+ * specified kind of lock on each. Return an array of Relation pointers for
+ * the indexes into *Irel, and the number of indexes into *nindexes.
+ *
+ * We consider an index vacuumable if it is marked insertable (indisready).
+ * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
+ * execution, and what we have is too corrupt to be processable. We will
+ * vacuum even if the index isn't indisvalid; this is important because in a
+ * unique index, uniqueness checks will be performed anyway and had better not
+ * hit dangling index pointers.
+ */
+void
+vac_open_indexes(Relation relation, LOCKMODE lockmode,
+ int *nindexes, Relation **Irel)
+{
+ List *indexoidlist;
+ ListCell *indexoidscan;
+ int i;
+
+ Assert(lockmode != NoLock);
+
+ indexoidlist = RelationGetIndexList(relation);
+
+ /* allocate enough memory for all indexes */
+ i = list_length(indexoidlist);
+
+ if (i > 0)
+ *Irel = (Relation *) palloc(i * sizeof(Relation));
+ else
+ *Irel = NULL;
+
+ /* collect just the ready indexes */
+ i = 0;
+ foreach(indexoidscan, indexoidlist)
+ {
+ Oid indexoid = lfirst_oid(indexoidscan);
+ Relation indrel;
+
+ indrel = index_open(indexoid, lockmode);
+ if (indrel->rd_index->indisready)
+ (*Irel)[i++] = indrel;
+ else
+ index_close(indrel, lockmode);
+ }
+
+ *nindexes = i;
+
+ list_free(indexoidlist);
+}
+
+/*
+ * Release the resources acquired by vac_open_indexes. Optionally release
+ * the locks (say NoLock to keep 'em).
+ */
+void
+vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
+{
+ if (Irel == NULL)
+ return;
+
+ while (nindexes--)
+ {
+ Relation ind = Irel[nindexes];
+
+ index_close(ind, lockmode);
+ }
+ pfree(Irel);
+}
+
+/*
+ * vacuum_delay_point --- check for interrupts and cost-based delay.
+ *
+ * This should be called in each major loop of VACUUM processing,
+ * typically once per page processed.
+ */
+void
+vacuum_delay_point(void)
+{
+ double msec = 0;
+
+ /* Always check for interrupts */
+ CHECK_FOR_INTERRUPTS();
+
+ if (!VacuumCostActive || InterruptPending)
+ return;
+
+ /*
+ * For parallel vacuum, the delay is computed based on the shared cost
+ * balance. See compute_parallel_delay.
+ */
+ if (VacuumSharedCostBalance != NULL)
+ msec = compute_parallel_delay();
+ else if (VacuumCostBalance >= VacuumCostLimit)
+ msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
+
+ /* Nap if appropriate */
+ if (msec > 0)
+ {
+ if (msec > VacuumCostDelay * 4)
+ msec = VacuumCostDelay * 4;
+
+ pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
+ pg_usleep(msec * 1000);
+ pgstat_report_wait_end();
+
+ /*
+ * We don't want to ignore postmaster death during very long vacuums
+ * with vacuum_cost_delay configured. We can't use the usual
+ * WaitLatch() approach here because we want microsecond-based sleep
+ * durations above.
+ */
+ if (IsUnderPostmaster && !PostmasterIsAlive())
+ exit(1);
+
+ VacuumCostBalance = 0;
+
+ /* update balance values for workers */
+ AutoVacuumUpdateDelay();
+
+ /* Might have gotten an interrupt while sleeping */
+ CHECK_FOR_INTERRUPTS();
+ }
+}
+
+/*
+ * Computes the vacuum delay for parallel workers.
+ *
+ * The basic idea of a cost-based delay for parallel vacuum is to allow each
+ * worker to sleep in proportion to the share of work it's done. We achieve this
+ * by allowing all parallel vacuum workers including the leader process to
+ * have a shared view of cost related parameters (mainly VacuumCostBalance).
+ * We allow each worker to update it as and when it has incurred any cost and
+ * then based on that decide whether it needs to sleep. We compute the time
+ * to sleep for a worker based on the cost it has incurred
+ * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
+ * that amount. This avoids putting to sleep those workers which have done less
+ * I/O than other workers and therefore ensure that workers
+ * which are doing more I/O got throttled more.
+ *
+ * We allow a worker to sleep only if it has performed I/O above a certain
+ * threshold, which is calculated based on the number of active workers
+ * (VacuumActiveNWorkers), and the overall cost balance is more than
+ * VacuumCostLimit set by the system. Testing reveals that we achieve
+ * the required throttling if we force a worker that has done more than 50%
+ * of its share of work to sleep.
+ */
+static double
+compute_parallel_delay(void)
+{
+ double msec = 0;
+ uint32 shared_balance;
+ int nworkers;
+
+ /* Parallel vacuum must be active */
+ Assert(VacuumSharedCostBalance);
+
+ nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
+
+ /* At least count itself */
+ Assert(nworkers >= 1);
+
+ /* Update the shared cost balance value atomically */
+ shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
+
+ /* Compute the total local balance for the current worker */
+ VacuumCostBalanceLocal += VacuumCostBalance;
+
+ if ((shared_balance >= VacuumCostLimit) &&
+ (VacuumCostBalanceLocal > 0.5 * ((double) VacuumCostLimit / nworkers)))
+ {
+ /* Compute sleep time based on the local cost balance */
+ msec = VacuumCostDelay * VacuumCostBalanceLocal / VacuumCostLimit;
+ pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
+ VacuumCostBalanceLocal = 0;
+ }
+
+ /*
+ * Reset the local balance as we accumulated it into the shared value.
+ */
+ VacuumCostBalance = 0;
+
+ return msec;
+}
+
+/*
+ * A wrapper function of defGetBoolean().
+ *
+ * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
+ * of true and false.
+ */
+static VacOptValue
+get_vacoptval_from_boolean(DefElem *def)
+{
+ return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
+}
+
+/*
+ * vac_bulkdel_one_index() -- bulk-deletion for index relation.
+ *
+ * Returns bulk delete stats derived from input stats
+ */
+IndexBulkDeleteResult *
+vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
+ VacDeadItems *dead_items)
+{
+ /* Do bulk deletion */
+ istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
+ (void *) dead_items);
+
+ ereport(ivinfo->message_level,
+ (errmsg("scanned index \"%s\" to remove %d row versions",
+ RelationGetRelationName(ivinfo->index),
+ dead_items->num_items)));
+
+ return istat;
+}
+
+/*
+ * vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
+ *
+ * Returns bulk delete stats derived from input stats
+ */
+IndexBulkDeleteResult *
+vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
+{
+ istat = index_vacuum_cleanup(ivinfo, istat);
+
+ if (istat)
+ ereport(ivinfo->message_level,
+ (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
+ RelationGetRelationName(ivinfo->index),
+ istat->num_index_tuples,
+ istat->num_pages),
+ errdetail("%.0f index row versions were removed.\n"
+ "%u index pages were newly deleted.\n"
+ "%u index pages are currently deleted, of which %u are currently reusable.",
+ istat->tuples_removed,
+ istat->pages_newly_deleted,
+ istat->pages_deleted, istat->pages_free)));
+
+ return istat;
+}
+
+/*
+ * Returns the total required space for VACUUM's dead_items array given a
+ * max_items value.
+ */
+Size
+vac_max_items_to_alloc_size(int max_items)
+{
+ Assert(max_items <= MAXDEADITEMS(MaxAllocSize));
+
+ return offsetof(VacDeadItems, items) + sizeof(ItemPointerData) * max_items;
+}
+
+/*
+ * vac_tid_reaped() -- is a particular tid deletable?
+ *
+ * This has the right signature to be an IndexBulkDeleteCallback.
+ *
+ * Assumes dead_items array is sorted (in ascending TID order).
+ */
+static bool
+vac_tid_reaped(ItemPointer itemptr, void *state)
+{
+ VacDeadItems *dead_items = (VacDeadItems *) state;
+ int64 litem,
+ ritem,
+ item;
+ ItemPointer res;
+
+ litem = itemptr_encode(&dead_items->items[0]);
+ ritem = itemptr_encode(&dead_items->items[dead_items->num_items - 1]);
+ item = itemptr_encode(itemptr);
+
+ /*
+ * Doing a simple bound check before bsearch() is useful to avoid the
+ * extra cost of bsearch(), especially if dead items on the heap are
+ * concentrated in a certain range. Since this function is called for
+ * every index tuple, it pays to be really fast.
+ */
+ if (item < litem || item > ritem)
+ return false;
+
+ res = (ItemPointer) bsearch((void *) itemptr,
+ (void *) dead_items->items,
+ dead_items->num_items,
+ sizeof(ItemPointerData),
+ vac_cmp_itemptr);
+
+ return (res != NULL);
+}
+
+/*
+ * Comparator routines for use with qsort() and bsearch().
+ */
+static int
+vac_cmp_itemptr(const void *left, const void *right)
+{
+ BlockNumber lblk,
+ rblk;
+ OffsetNumber loff,
+ roff;
+
+ lblk = ItemPointerGetBlockNumber((ItemPointer) left);
+ rblk = ItemPointerGetBlockNumber((ItemPointer) right);
+
+ if (lblk < rblk)
+ return -1;
+ if (lblk > rblk)
+ return 1;
+
+ loff = ItemPointerGetOffsetNumber((ItemPointer) left);
+ roff = ItemPointerGetOffsetNumber((ItemPointer) right);
+
+ if (loff < roff)
+ return -1;
+ if (loff > roff)
+ return 1;
+
+ return 0;
+}
diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c
new file mode 100644
index 0000000..f26d796
--- /dev/null
+++ b/src/backend/commands/vacuumparallel.c
@@ -0,0 +1,1074 @@
+/*-------------------------------------------------------------------------
+ *
+ * vacuumparallel.c
+ * Support routines for parallel vacuum execution.
+ *
+ * This file contains routines that are intended to support setting up, using,
+ * and tearing down a ParallelVacuumState.
+ *
+ * In a parallel vacuum, we perform both index bulk deletion and index cleanup
+ * with parallel worker processes. Individual indexes are processed by one
+ * vacuum process. ParalleVacuumState contains shared information as well as
+ * the memory space for storing dead items allocated in the DSM segment. We
+ * launch parallel worker processes at the start of parallel index
+ * bulk-deletion and index cleanup and once all indexes are processed, the
+ * parallel worker processes exit. Each time we process indexes in parallel,
+ * the parallel context is re-initialized so that the same DSM can be used for
+ * multiple passes of index bulk-deletion and index cleanup.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/commands/vacuumparallel.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/amapi.h"
+#include "access/table.h"
+#include "access/xact.h"
+#include "catalog/index.h"
+#include "commands/vacuum.h"
+#include "optimizer/paths.h"
+#include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "tcop/tcopprot.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+
+/*
+ * DSM keys for parallel vacuum. Unlike other parallel execution code, since
+ * we don't need to worry about DSM keys conflicting with plan_node_id we can
+ * use small integers.
+ */
+#define PARALLEL_VACUUM_KEY_SHARED 1
+#define PARALLEL_VACUUM_KEY_DEAD_ITEMS 2
+#define PARALLEL_VACUUM_KEY_QUERY_TEXT 3
+#define PARALLEL_VACUUM_KEY_BUFFER_USAGE 4
+#define PARALLEL_VACUUM_KEY_WAL_USAGE 5
+#define PARALLEL_VACUUM_KEY_INDEX_STATS 6
+
+/*
+ * Shared information among parallel workers. So this is allocated in the DSM
+ * segment.
+ */
+typedef struct PVShared
+{
+ /*
+ * Target table relid and log level (for messages about parallel workers
+ * launched during VACUUM VERBOSE). These fields are not modified during
+ * the parallel vacuum.
+ */
+ Oid relid;
+ int elevel;
+
+ /*
+ * Fields for both index vacuum and cleanup.
+ *
+ * reltuples is the total number of input heap tuples. We set either old
+ * live tuples in the index vacuum case or the new live tuples in the
+ * index cleanup case.
+ *
+ * estimated_count is true if reltuples is an estimated value. (Note that
+ * reltuples could be -1 in this case, indicating we have no idea.)
+ */
+ double reltuples;
+ bool estimated_count;
+
+ /*
+ * In single process vacuum we could consume more memory during index
+ * vacuuming or cleanup apart from the memory for heap scanning. In
+ * parallel vacuum, since individual vacuum workers can consume memory
+ * equal to maintenance_work_mem, the new maintenance_work_mem for each
+ * worker is set such that the parallel operation doesn't consume more
+ * memory than single process vacuum.
+ */
+ int maintenance_work_mem_worker;
+
+ /*
+ * Shared vacuum cost balance. During parallel vacuum,
+ * VacuumSharedCostBalance points to this value and it accumulates the
+ * balance of each parallel vacuum worker.
+ */
+ pg_atomic_uint32 cost_balance;
+
+ /*
+ * Number of active parallel workers. This is used for computing the
+ * minimum threshold of the vacuum cost balance before a worker sleeps for
+ * cost-based delay.
+ */
+ pg_atomic_uint32 active_nworkers;
+
+ /* Counter for vacuuming and cleanup */
+ pg_atomic_uint32 idx;
+} PVShared;
+
+/* Status used during parallel index vacuum or cleanup */
+typedef enum PVIndVacStatus
+{
+ PARALLEL_INDVAC_STATUS_INITIAL = 0,
+ PARALLEL_INDVAC_STATUS_NEED_BULKDELETE,
+ PARALLEL_INDVAC_STATUS_NEED_CLEANUP,
+ PARALLEL_INDVAC_STATUS_COMPLETED
+} PVIndVacStatus;
+
+/*
+ * Struct for index vacuum statistics of an index that is used for parallel vacuum.
+ * This includes the status of parallel index vacuum as well as index statistics.
+ */
+typedef struct PVIndStats
+{
+ /*
+ * The following two fields are set by leader process before executing
+ * parallel index vacuum or parallel index cleanup. These fields are not
+ * fixed for the entire VACUUM operation. They are only fixed for an
+ * individual parallel index vacuum and cleanup.
+ *
+ * parallel_workers_can_process is true if both leader and worker can
+ * process the index, otherwise only leader can process it.
+ */
+ PVIndVacStatus status;
+ bool parallel_workers_can_process;
+
+ /*
+ * Individual worker or leader stores the result of index vacuum or
+ * cleanup.
+ */
+ bool istat_updated; /* are the stats updated? */
+ IndexBulkDeleteResult istat;
+} PVIndStats;
+
+/*
+ * Struct for maintaining a parallel vacuum state. typedef appears in vacuum.h.
+ */
+struct ParallelVacuumState
+{
+ /* NULL for worker processes */
+ ParallelContext *pcxt;
+
+ /* Target indexes */
+ Relation *indrels;
+ int nindexes;
+
+ /* Shared information among parallel vacuum workers */
+ PVShared *shared;
+
+ /*
+ * Shared index statistics among parallel vacuum workers. The array
+ * element is allocated for every index, even those indexes where parallel
+ * index vacuuming is unsafe or not worthwhile (e.g.,
+ * will_parallel_vacuum[] is false). During parallel vacuum,
+ * IndexBulkDeleteResult of each index is kept in DSM and is copied into
+ * local memory at the end of parallel vacuum.
+ */
+ PVIndStats *indstats;
+
+ /* Shared dead items space among parallel vacuum workers */
+ VacDeadItems *dead_items;
+
+ /* Points to buffer usage area in DSM */
+ BufferUsage *buffer_usage;
+
+ /* Points to WAL usage area in DSM */
+ WalUsage *wal_usage;
+
+ /*
+ * False if the index is totally unsuitable target for all parallel
+ * processing. For example, the index could be <
+ * min_parallel_index_scan_size cutoff.
+ */
+ bool *will_parallel_vacuum;
+
+ /*
+ * The number of indexes that support parallel index bulk-deletion and
+ * parallel index cleanup respectively.
+ */
+ int nindexes_parallel_bulkdel;
+ int nindexes_parallel_cleanup;
+ int nindexes_parallel_condcleanup;
+
+ /* Buffer access strategy used by leader process */
+ BufferAccessStrategy bstrategy;
+
+ /*
+ * Error reporting state. The error callback is set only for workers
+ * processes during parallel index vacuum.
+ */
+ char *relnamespace;
+ char *relname;
+ char *indname;
+ PVIndVacStatus status;
+};
+
+static int parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested,
+ bool *will_parallel_vacuum);
+static void parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scans,
+ bool vacuum);
+static void parallel_vacuum_process_safe_indexes(ParallelVacuumState *pvs);
+static void parallel_vacuum_process_unsafe_indexes(ParallelVacuumState *pvs);
+static void parallel_vacuum_process_one_index(ParallelVacuumState *pvs, Relation indrel,
+ PVIndStats *indstats);
+static bool parallel_vacuum_index_is_parallel_safe(Relation indrel, int num_index_scans,
+ bool vacuum);
+static void parallel_vacuum_error_callback(void *arg);
+
+/*
+ * Try to enter parallel mode and create a parallel context. Then initialize
+ * shared memory state.
+ *
+ * On success, return parallel vacuum state. Otherwise return NULL.
+ */
+ParallelVacuumState *
+parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
+ int nrequested_workers, int max_items,
+ int elevel, BufferAccessStrategy bstrategy)
+{
+ ParallelVacuumState *pvs;
+ ParallelContext *pcxt;
+ PVShared *shared;
+ VacDeadItems *dead_items;
+ PVIndStats *indstats;
+ BufferUsage *buffer_usage;
+ WalUsage *wal_usage;
+ bool *will_parallel_vacuum;
+ Size est_indstats_len;
+ Size est_shared_len;
+ Size est_dead_items_len;
+ int nindexes_mwm = 0;
+ int parallel_workers = 0;
+ int querylen;
+
+ /*
+ * A parallel vacuum must be requested and there must be indexes on the
+ * relation
+ */
+ Assert(nrequested_workers >= 0);
+ Assert(nindexes > 0);
+
+ /*
+ * Compute the number of parallel vacuum workers to launch
+ */
+ will_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes);
+ parallel_workers = parallel_vacuum_compute_workers(indrels, nindexes,
+ nrequested_workers,
+ will_parallel_vacuum);
+ if (parallel_workers <= 0)
+ {
+ /* Can't perform vacuum in parallel -- return NULL */
+ pfree(will_parallel_vacuum);
+ return NULL;
+ }
+
+ pvs = (ParallelVacuumState *) palloc0(sizeof(ParallelVacuumState));
+ pvs->indrels = indrels;
+ pvs->nindexes = nindexes;
+ pvs->will_parallel_vacuum = will_parallel_vacuum;
+ pvs->bstrategy = bstrategy;
+
+ EnterParallelMode();
+ pcxt = CreateParallelContext("postgres", "parallel_vacuum_main",
+ parallel_workers);
+ Assert(pcxt->nworkers > 0);
+ pvs->pcxt = pcxt;
+
+ /* Estimate size for index vacuum stats -- PARALLEL_VACUUM_KEY_INDEX_STATS */
+ est_indstats_len = mul_size(sizeof(PVIndStats), nindexes);
+ shm_toc_estimate_chunk(&pcxt->estimator, est_indstats_len);
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+ /* Estimate size for shared information -- PARALLEL_VACUUM_KEY_SHARED */
+ est_shared_len = sizeof(PVShared);
+ shm_toc_estimate_chunk(&pcxt->estimator, est_shared_len);
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+ /* Estimate size for dead_items -- PARALLEL_VACUUM_KEY_DEAD_ITEMS */
+ est_dead_items_len = vac_max_items_to_alloc_size(max_items);
+ shm_toc_estimate_chunk(&pcxt->estimator, est_dead_items_len);
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+ /*
+ * Estimate space for BufferUsage and WalUsage --
+ * PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE.
+ *
+ * If there are no extensions loaded that care, we could skip this. We
+ * have no way of knowing whether anyone's looking at pgBufferUsage or
+ * pgWalUsage, so do it unconditionally.
+ */
+ shm_toc_estimate_chunk(&pcxt->estimator,
+ mul_size(sizeof(BufferUsage), pcxt->nworkers));
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+ shm_toc_estimate_chunk(&pcxt->estimator,
+ mul_size(sizeof(WalUsage), pcxt->nworkers));
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+ /* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */
+ if (debug_query_string)
+ {
+ querylen = strlen(debug_query_string);
+ shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+ }
+ else
+ querylen = 0; /* keep compiler quiet */
+
+ InitializeParallelDSM(pcxt);
+
+ /* Prepare index vacuum stats */
+ indstats = (PVIndStats *) shm_toc_allocate(pcxt->toc, est_indstats_len);
+ MemSet(indstats, 0, est_indstats_len);
+ for (int i = 0; i < nindexes; i++)
+ {
+ Relation indrel = indrels[i];
+ uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions;
+
+ /*
+ * Cleanup option should be either disabled, always performing in
+ * parallel or conditionally performing in parallel.
+ */
+ Assert(((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) ||
+ ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0));
+ Assert(vacoptions <= VACUUM_OPTION_MAX_VALID_VALUE);
+
+ if (!will_parallel_vacuum[i])
+ continue;
+
+ if (indrel->rd_indam->amusemaintenanceworkmem)
+ nindexes_mwm++;
+
+ /*
+ * Remember the number of indexes that support parallel operation for
+ * each phase.
+ */
+ if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
+ pvs->nindexes_parallel_bulkdel++;
+ if ((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0)
+ pvs->nindexes_parallel_cleanup++;
+ if ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0)
+ pvs->nindexes_parallel_condcleanup++;
+ }
+ shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_INDEX_STATS, indstats);
+ pvs->indstats = indstats;
+
+ /* Prepare shared information */
+ shared = (PVShared *) shm_toc_allocate(pcxt->toc, est_shared_len);
+ MemSet(shared, 0, est_shared_len);
+ shared->relid = RelationGetRelid(rel);
+ shared->elevel = elevel;
+ shared->maintenance_work_mem_worker =
+ (nindexes_mwm > 0) ?
+ maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
+ maintenance_work_mem;
+
+ pg_atomic_init_u32(&(shared->cost_balance), 0);
+ pg_atomic_init_u32(&(shared->active_nworkers), 0);
+ pg_atomic_init_u32(&(shared->idx), 0);
+
+ shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_SHARED, shared);
+ pvs->shared = shared;
+
+ /* Prepare the dead_items space */
+ dead_items = (VacDeadItems *) shm_toc_allocate(pcxt->toc,
+ est_dead_items_len);
+ dead_items->max_items = max_items;
+ dead_items->num_items = 0;
+ MemSet(dead_items->items, 0, sizeof(ItemPointerData) * max_items);
+ shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_ITEMS, dead_items);
+ pvs->dead_items = dead_items;
+
+ /*
+ * Allocate space for each worker's BufferUsage and WalUsage; no need to
+ * initialize
+ */
+ buffer_usage = shm_toc_allocate(pcxt->toc,
+ mul_size(sizeof(BufferUsage), pcxt->nworkers));
+ shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, buffer_usage);
+ pvs->buffer_usage = buffer_usage;
+ wal_usage = shm_toc_allocate(pcxt->toc,
+ mul_size(sizeof(WalUsage), pcxt->nworkers));
+ shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_WAL_USAGE, wal_usage);
+ pvs->wal_usage = wal_usage;
+
+ /* Store query string for workers */
+ if (debug_query_string)
+ {
+ char *sharedquery;
+
+ sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
+ memcpy(sharedquery, debug_query_string, querylen + 1);
+ sharedquery[querylen] = '\0';
+ shm_toc_insert(pcxt->toc,
+ PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery);
+ }
+
+ /* Success -- return parallel vacuum state */
+ return pvs;
+}
+
+/*
+ * Destroy the parallel context, and end parallel mode.
+ *
+ * Since writes are not allowed during parallel mode, copy the
+ * updated index statistics from DSM into local memory and then later use that
+ * to update the index statistics. One might think that we can exit from
+ * parallel mode, update the index statistics and then destroy parallel
+ * context, but that won't be safe (see ExitParallelMode).
+ */
+void
+parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
+{
+ Assert(!IsParallelWorker());
+
+ /* Copy the updated statistics */
+ for (int i = 0; i < pvs->nindexes; i++)
+ {
+ PVIndStats *indstats = &(pvs->indstats[i]);
+
+ if (indstats->istat_updated)
+ {
+ istats[i] = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+ memcpy(istats[i], &indstats->istat, sizeof(IndexBulkDeleteResult));
+ }
+ else
+ istats[i] = NULL;
+ }
+
+ DestroyParallelContext(pvs->pcxt);
+ ExitParallelMode();
+
+ pfree(pvs->will_parallel_vacuum);
+ pfree(pvs);
+}
+
+/* Returns the dead items space */
+VacDeadItems *
+parallel_vacuum_get_dead_items(ParallelVacuumState *pvs)
+{
+ return pvs->dead_items;
+}
+
+/*
+ * Do parallel index bulk-deletion with parallel workers.
+ */
+void
+parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs, long num_table_tuples,
+ int num_index_scans)
+{
+ Assert(!IsParallelWorker());
+
+ /*
+ * We can only provide an approximate value of num_heap_tuples, at least
+ * for now.
+ */
+ pvs->shared->reltuples = num_table_tuples;
+ pvs->shared->estimated_count = true;
+
+ parallel_vacuum_process_all_indexes(pvs, num_index_scans, true);
+}
+
+/*
+ * Do parallel index cleanup with parallel workers.
+ */
+void
+parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs, long num_table_tuples,
+ int num_index_scans, bool estimated_count)
+{
+ Assert(!IsParallelWorker());
+
+ /*
+ * We can provide a better estimate of total number of surviving tuples
+ * (we assume indexes are more interested in that than in the number of
+ * nominally live tuples).
+ */
+ pvs->shared->reltuples = num_table_tuples;
+ pvs->shared->estimated_count = estimated_count;
+
+ parallel_vacuum_process_all_indexes(pvs, num_index_scans, false);
+}
+
+/*
+ * Compute the number of parallel worker processes to request. Both index
+ * vacuum and index cleanup can be executed with parallel workers.
+ * The index is eligible for parallel vacuum iff its size is greater than
+ * min_parallel_index_scan_size as invoking workers for very small indexes
+ * can hurt performance.
+ *
+ * nrequested is the number of parallel workers that user requested. If
+ * nrequested is 0, we compute the parallel degree based on nindexes, that is
+ * the number of indexes that support parallel vacuum. This function also
+ * sets will_parallel_vacuum to remember indexes that participate in parallel
+ * vacuum.
+ */
+static int
+parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested,
+ bool *will_parallel_vacuum)
+{
+ int nindexes_parallel = 0;
+ int nindexes_parallel_bulkdel = 0;
+ int nindexes_parallel_cleanup = 0;
+ int parallel_workers;
+
+ /*
+ * We don't allow performing parallel operation in standalone backend or
+ * when parallelism is disabled.
+ */
+ if (!IsUnderPostmaster || max_parallel_maintenance_workers == 0)
+ return 0;
+
+ /*
+ * Compute the number of indexes that can participate in parallel vacuum.
+ */
+ for (int i = 0; i < nindexes; i++)
+ {
+ Relation indrel = indrels[i];
+ uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions;
+
+ /* Skip index that is not a suitable target for parallel index vacuum */
+ if (vacoptions == VACUUM_OPTION_NO_PARALLEL ||
+ RelationGetNumberOfBlocks(indrel) < min_parallel_index_scan_size)
+ continue;
+
+ will_parallel_vacuum[i] = true;
+
+ if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
+ nindexes_parallel_bulkdel++;
+ if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0) ||
+ ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
+ nindexes_parallel_cleanup++;
+ }
+
+ nindexes_parallel = Max(nindexes_parallel_bulkdel,
+ nindexes_parallel_cleanup);
+
+ /* The leader process takes one index */
+ nindexes_parallel--;
+
+ /* No index supports parallel vacuum */
+ if (nindexes_parallel <= 0)
+ return 0;
+
+ /* Compute the parallel degree */
+ parallel_workers = (nrequested > 0) ?
+ Min(nrequested, nindexes_parallel) : nindexes_parallel;
+
+ /* Cap by max_parallel_maintenance_workers */
+ parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers);
+
+ return parallel_workers;
+}
+
+/*
+ * Perform index vacuum or index cleanup with parallel workers. This function
+ * must be used by the parallel vacuum leader process.
+ */
+static void
+parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scans,
+ bool vacuum)
+{
+ int nworkers;
+ PVIndVacStatus new_status;
+
+ Assert(!IsParallelWorker());
+
+ if (vacuum)
+ {
+ new_status = PARALLEL_INDVAC_STATUS_NEED_BULKDELETE;
+
+ /* Determine the number of parallel workers to launch */
+ nworkers = pvs->nindexes_parallel_bulkdel;
+ }
+ else
+ {
+ new_status = PARALLEL_INDVAC_STATUS_NEED_CLEANUP;
+
+ /* Determine the number of parallel workers to launch */
+ nworkers = pvs->nindexes_parallel_cleanup;
+
+ /* Add conditionally parallel-aware indexes if in the first time call */
+ if (num_index_scans == 0)
+ nworkers += pvs->nindexes_parallel_condcleanup;
+ }
+
+ /* The leader process will participate */
+ nworkers--;
+
+ /*
+ * It is possible that parallel context is initialized with fewer workers
+ * than the number of indexes that need a separate worker in the current
+ * phase, so we need to consider it. See
+ * parallel_vacuum_compute_workers().
+ */
+ nworkers = Min(nworkers, pvs->pcxt->nworkers);
+
+ /*
+ * Set index vacuum status and mark whether parallel vacuum worker can
+ * process it.
+ */
+ for (int i = 0; i < pvs->nindexes; i++)
+ {
+ PVIndStats *indstats = &(pvs->indstats[i]);
+
+ Assert(indstats->status == PARALLEL_INDVAC_STATUS_INITIAL);
+ indstats->status = new_status;
+ indstats->parallel_workers_can_process =
+ (pvs->will_parallel_vacuum[i] &&
+ parallel_vacuum_index_is_parallel_safe(pvs->indrels[i],
+ num_index_scans,
+ vacuum));
+ }
+
+ /* Reset the parallel index processing counter */
+ pg_atomic_write_u32(&(pvs->shared->idx), 0);
+
+ /* Setup the shared cost-based vacuum delay and launch workers */
+ if (nworkers > 0)
+ {
+ /* Reinitialize parallel context to relaunch parallel workers */
+ if (num_index_scans > 0)
+ ReinitializeParallelDSM(pvs->pcxt);
+
+ /*
+ * Set up shared cost balance and the number of active workers for
+ * vacuum delay. We need to do this before launching workers as
+ * otherwise, they might not see the updated values for these
+ * parameters.
+ */
+ pg_atomic_write_u32(&(pvs->shared->cost_balance), VacuumCostBalance);
+ pg_atomic_write_u32(&(pvs->shared->active_nworkers), 0);
+
+ /*
+ * The number of workers can vary between bulkdelete and cleanup
+ * phase.
+ */
+ ReinitializeParallelWorkers(pvs->pcxt, nworkers);
+
+ LaunchParallelWorkers(pvs->pcxt);
+
+ if (pvs->pcxt->nworkers_launched > 0)
+ {
+ /*
+ * Reset the local cost values for leader backend as we have
+ * already accumulated the remaining balance of heap.
+ */
+ VacuumCostBalance = 0;
+ VacuumCostBalanceLocal = 0;
+
+ /* Enable shared cost balance for leader backend */
+ VacuumSharedCostBalance = &(pvs->shared->cost_balance);
+ VacuumActiveNWorkers = &(pvs->shared->active_nworkers);
+ }
+
+ if (vacuum)
+ ereport(pvs->shared->elevel,
+ (errmsg(ngettext("launched %d parallel vacuum worker for index vacuuming (planned: %d)",
+ "launched %d parallel vacuum workers for index vacuuming (planned: %d)",
+ pvs->pcxt->nworkers_launched),
+ pvs->pcxt->nworkers_launched, nworkers)));
+ else
+ ereport(pvs->shared->elevel,
+ (errmsg(ngettext("launched %d parallel vacuum worker for index cleanup (planned: %d)",
+ "launched %d parallel vacuum workers for index cleanup (planned: %d)",
+ pvs->pcxt->nworkers_launched),
+ pvs->pcxt->nworkers_launched, nworkers)));
+ }
+
+ /* Vacuum the indexes that can be processed by only leader process */
+ parallel_vacuum_process_unsafe_indexes(pvs);
+
+ /*
+ * Join as a parallel worker. The leader vacuums alone processes all
+ * parallel-safe indexes in the case where no workers are launched.
+ */
+ parallel_vacuum_process_safe_indexes(pvs);
+
+ /*
+ * Next, accumulate buffer and WAL usage. (This must wait for the workers
+ * to finish, or we might get incomplete data.)
+ */
+ if (nworkers > 0)
+ {
+ /* Wait for all vacuum workers to finish */
+ WaitForParallelWorkersToFinish(pvs->pcxt);
+
+ for (int i = 0; i < pvs->pcxt->nworkers_launched; i++)
+ InstrAccumParallelQuery(&pvs->buffer_usage[i], &pvs->wal_usage[i]);
+ }
+
+ /*
+ * Reset all index status back to initial (while checking that we have
+ * vacuumed all indexes).
+ */
+ for (int i = 0; i < pvs->nindexes; i++)
+ {
+ PVIndStats *indstats = &(pvs->indstats[i]);
+
+ if (indstats->status != PARALLEL_INDVAC_STATUS_COMPLETED)
+ elog(ERROR, "parallel index vacuum on index \"%s\" is not completed",
+ RelationGetRelationName(pvs->indrels[i]));
+
+ indstats->status = PARALLEL_INDVAC_STATUS_INITIAL;
+ }
+
+ /*
+ * Carry the shared balance value to heap scan and disable shared costing
+ */
+ if (VacuumSharedCostBalance)
+ {
+ VacuumCostBalance = pg_atomic_read_u32(VacuumSharedCostBalance);
+ VacuumSharedCostBalance = NULL;
+ VacuumActiveNWorkers = NULL;
+ }
+}
+
+/*
+ * Index vacuum/cleanup routine used by the leader process and parallel
+ * vacuum worker processes to vacuum the indexes in parallel.
+ */
+static void
+parallel_vacuum_process_safe_indexes(ParallelVacuumState *pvs)
+{
+ /*
+ * Increment the active worker count if we are able to launch any worker.
+ */
+ if (VacuumActiveNWorkers)
+ pg_atomic_add_fetch_u32(VacuumActiveNWorkers, 1);
+
+ /* Loop until all indexes are vacuumed */
+ for (;;)
+ {
+ int idx;
+ PVIndStats *indstats;
+
+ /* Get an index number to process */
+ idx = pg_atomic_fetch_add_u32(&(pvs->shared->idx), 1);
+
+ /* Done for all indexes? */
+ if (idx >= pvs->nindexes)
+ break;
+
+ indstats = &(pvs->indstats[idx]);
+
+ /*
+ * Skip vacuuming index that is unsafe for workers or has an
+ * unsuitable target for parallel index vacuum (this is vacuumed in
+ * parallel_vacuum_process_unsafe_indexes() by the leader).
+ */
+ if (!indstats->parallel_workers_can_process)
+ continue;
+
+ /* Do vacuum or cleanup of the index */
+ parallel_vacuum_process_one_index(pvs, pvs->indrels[idx], indstats);
+ }
+
+ /*
+ * We have completed the index vacuum so decrement the active worker
+ * count.
+ */
+ if (VacuumActiveNWorkers)
+ pg_atomic_sub_fetch_u32(VacuumActiveNWorkers, 1);
+}
+
+/*
+ * Perform parallel vacuuming of indexes in leader process.
+ *
+ * Handles index vacuuming (or index cleanup) for indexes that are not
+ * parallel safe. It's possible that this will vary for a given index, based
+ * on details like whether we're performing index cleanup right now.
+ *
+ * Also performs vacuuming of smaller indexes that fell under the size cutoff
+ * enforced by parallel_vacuum_compute_workers().
+ */
+static void
+parallel_vacuum_process_unsafe_indexes(ParallelVacuumState *pvs)
+{
+ Assert(!IsParallelWorker());
+
+ /*
+ * Increment the active worker count if we are able to launch any worker.
+ */
+ if (VacuumActiveNWorkers)
+ pg_atomic_add_fetch_u32(VacuumActiveNWorkers, 1);
+
+ for (int i = 0; i < pvs->nindexes; i++)
+ {
+ PVIndStats *indstats = &(pvs->indstats[i]);
+
+ /* Skip, indexes that are safe for workers */
+ if (indstats->parallel_workers_can_process)
+ continue;
+
+ /* Do vacuum or cleanup of the index */
+ parallel_vacuum_process_one_index(pvs, pvs->indrels[i], indstats);
+ }
+
+ /*
+ * We have completed the index vacuum so decrement the active worker
+ * count.
+ */
+ if (VacuumActiveNWorkers)
+ pg_atomic_sub_fetch_u32(VacuumActiveNWorkers, 1);
+}
+
+/*
+ * Vacuum or cleanup index either by leader process or by one of the worker
+ * process. After vacuuming the index this function copies the index
+ * statistics returned from ambulkdelete and amvacuumcleanup to the DSM
+ * segment.
+ */
+static void
+parallel_vacuum_process_one_index(ParallelVacuumState *pvs, Relation indrel,
+ PVIndStats *indstats)
+{
+ IndexBulkDeleteResult *istat = NULL;
+ IndexBulkDeleteResult *istat_res;
+ IndexVacuumInfo ivinfo;
+
+ /*
+ * Update the pointer to the corresponding bulk-deletion result if someone
+ * has already updated it
+ */
+ if (indstats->istat_updated)
+ istat = &(indstats->istat);
+
+ ivinfo.index = indrel;
+ ivinfo.analyze_only = false;
+ ivinfo.report_progress = false;
+ ivinfo.message_level = DEBUG2;
+ ivinfo.estimated_count = pvs->shared->estimated_count;
+ ivinfo.num_heap_tuples = pvs->shared->reltuples;
+ ivinfo.strategy = pvs->bstrategy;
+
+ /* Update error traceback information */
+ pvs->indname = pstrdup(RelationGetRelationName(indrel));
+ pvs->status = indstats->status;
+
+ switch (indstats->status)
+ {
+ case PARALLEL_INDVAC_STATUS_NEED_BULKDELETE:
+ istat_res = vac_bulkdel_one_index(&ivinfo, istat, pvs->dead_items);
+ break;
+ case PARALLEL_INDVAC_STATUS_NEED_CLEANUP:
+ istat_res = vac_cleanup_one_index(&ivinfo, istat);
+ break;
+ default:
+ elog(ERROR, "unexpected parallel vacuum index status %d for index \"%s\"",
+ indstats->status,
+ RelationGetRelationName(indrel));
+ }
+
+ /*
+ * Copy the index bulk-deletion result returned from ambulkdelete and
+ * amvacuumcleanup to the DSM segment if it's the first cycle because they
+ * allocate locally and it's possible that an index will be vacuumed by a
+ * different vacuum process the next cycle. Copying the result normally
+ * happens only the first time an index is vacuumed. For any additional
+ * vacuum pass, we directly point to the result on the DSM segment and
+ * pass it to vacuum index APIs so that workers can update it directly.
+ *
+ * Since all vacuum workers write the bulk-deletion result at different
+ * slots we can write them without locking.
+ */
+ if (!indstats->istat_updated && istat_res != NULL)
+ {
+ memcpy(&(indstats->istat), istat_res, sizeof(IndexBulkDeleteResult));
+ indstats->istat_updated = true;
+
+ /* Free the locally-allocated bulk-deletion result */
+ pfree(istat_res);
+ }
+
+ /*
+ * Update the status to completed. No need to lock here since each worker
+ * touches different indexes.
+ */
+ indstats->status = PARALLEL_INDVAC_STATUS_COMPLETED;
+
+ /* Reset error traceback information */
+ pvs->status = PARALLEL_INDVAC_STATUS_COMPLETED;
+ pfree(pvs->indname);
+ pvs->indname = NULL;
+}
+
+/*
+ * Returns false, if the given index can't participate in the next execution of
+ * parallel index vacuum or parallel index cleanup.
+ */
+static bool
+parallel_vacuum_index_is_parallel_safe(Relation indrel, int num_index_scans,
+ bool vacuum)
+{
+ uint8 vacoptions;
+
+ vacoptions = indrel->rd_indam->amparallelvacuumoptions;
+
+ /* In parallel vacuum case, check if it supports parallel bulk-deletion */
+ if (vacuum)
+ return ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0);
+
+ /* Not safe, if the index does not support parallel cleanup */
+ if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) &&
+ ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0))
+ return false;
+
+ /*
+ * Not safe, if the index supports parallel cleanup conditionally, but we
+ * have already processed the index (for bulkdelete). We do this to avoid
+ * the need to invoke workers when parallel index cleanup doesn't need to
+ * scan the index. See the comments for option
+ * VACUUM_OPTION_PARALLEL_COND_CLEANUP to know when indexes support
+ * parallel cleanup conditionally.
+ */
+ if (num_index_scans > 0 &&
+ ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
+ return false;
+
+ return true;
+}
+
+/*
+ * Perform work within a launched parallel process.
+ *
+ * Since parallel vacuum workers perform only index vacuum or index cleanup,
+ * we don't need to report progress information.
+ */
+void
+parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
+{
+ ParallelVacuumState pvs;
+ Relation rel;
+ Relation *indrels;
+ PVIndStats *indstats;
+ PVShared *shared;
+ VacDeadItems *dead_items;
+ BufferUsage *buffer_usage;
+ WalUsage *wal_usage;
+ int nindexes;
+ char *sharedquery;
+ ErrorContextCallback errcallback;
+
+ /*
+ * A parallel vacuum worker must have only PROC_IN_VACUUM flag since we
+ * don't support parallel vacuum for autovacuum as of now.
+ */
+ Assert(MyProc->statusFlags == PROC_IN_VACUUM);
+
+ elog(DEBUG1, "starting parallel vacuum worker");
+
+ shared = (PVShared *) shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_SHARED, false);
+
+ /* Set debug_query_string for individual workers */
+ sharedquery = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, true);
+ debug_query_string = sharedquery;
+ pgstat_report_activity(STATE_RUNNING, debug_query_string);
+
+ /*
+ * Open table. The lock mode is the same as the leader process. It's
+ * okay because the lock mode does not conflict among the parallel
+ * workers.
+ */
+ rel = table_open(shared->relid, ShareUpdateExclusiveLock);
+
+ /*
+ * Open all indexes. indrels are sorted in order by OID, which should be
+ * matched to the leader's one.
+ */
+ vac_open_indexes(rel, RowExclusiveLock, &nindexes, &indrels);
+ Assert(nindexes > 0);
+
+ if (shared->maintenance_work_mem_worker > 0)
+ maintenance_work_mem = shared->maintenance_work_mem_worker;
+
+ /* Set index statistics */
+ indstats = (PVIndStats *) shm_toc_lookup(toc,
+ PARALLEL_VACUUM_KEY_INDEX_STATS,
+ false);
+
+ /* Set dead_items space */
+ dead_items = (VacDeadItems *) shm_toc_lookup(toc,
+ PARALLEL_VACUUM_KEY_DEAD_ITEMS,
+ false);
+
+ /* Set cost-based vacuum delay */
+ VacuumCostActive = (VacuumCostDelay > 0);
+ VacuumCostBalance = 0;
+ VacuumPageHit = 0;
+ VacuumPageMiss = 0;
+ VacuumPageDirty = 0;
+ VacuumCostBalanceLocal = 0;
+ VacuumSharedCostBalance = &(shared->cost_balance);
+ VacuumActiveNWorkers = &(shared->active_nworkers);
+
+ /* Set parallel vacuum state */
+ pvs.indrels = indrels;
+ pvs.nindexes = nindexes;
+ pvs.indstats = indstats;
+ pvs.shared = shared;
+ pvs.dead_items = dead_items;
+ pvs.relnamespace = get_namespace_name(RelationGetNamespace(rel));
+ pvs.relname = pstrdup(RelationGetRelationName(rel));
+
+ /* These fields will be filled during index vacuum or cleanup */
+ pvs.indname = NULL;
+ pvs.status = PARALLEL_INDVAC_STATUS_INITIAL;
+
+ /* Each parallel VACUUM worker gets its own access strategy */
+ pvs.bstrategy = GetAccessStrategy(BAS_VACUUM);
+
+ /* Setup error traceback support for ereport() */
+ errcallback.callback = parallel_vacuum_error_callback;
+ errcallback.arg = &pvs;
+ errcallback.previous = error_context_stack;
+ error_context_stack = &errcallback;
+
+ /* Prepare to track buffer usage during parallel execution */
+ InstrStartParallelQuery();
+
+ /* Process indexes to perform vacuum/cleanup */
+ parallel_vacuum_process_safe_indexes(&pvs);
+
+ /* Report buffer/WAL usage during parallel execution */
+ buffer_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, false);
+ wal_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_WAL_USAGE, false);
+ InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber],
+ &wal_usage[ParallelWorkerNumber]);
+
+ /* Pop the error context stack */
+ error_context_stack = errcallback.previous;
+
+ vac_close_indexes(nindexes, indrels, RowExclusiveLock);
+ table_close(rel, ShareUpdateExclusiveLock);
+ FreeAccessStrategy(pvs.bstrategy);
+}
+
+/*
+ * Error context callback for errors occurring during parallel index vacuum.
+ * The error context messages should match the messages set in the lazy vacuum
+ * error context. If you change this function, change vacuum_error_callback()
+ * as well.
+ */
+static void
+parallel_vacuum_error_callback(void *arg)
+{
+ ParallelVacuumState *errinfo = arg;
+
+ switch (errinfo->status)
+ {
+ case PARALLEL_INDVAC_STATUS_NEED_BULKDELETE:
+ errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
+ errinfo->indname,
+ errinfo->relnamespace,
+ errinfo->relname);
+ break;
+ case PARALLEL_INDVAC_STATUS_NEED_CLEANUP:
+ errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
+ errinfo->indname,
+ errinfo->relnamespace,
+ errinfo->relname);
+ break;
+ case PARALLEL_INDVAC_STATUS_INITIAL:
+ case PARALLEL_INDVAC_STATUS_COMPLETED:
+ default:
+ return;
+ }
+}
diff --git a/src/backend/commands/variable.c b/src/backend/commands/variable.c
new file mode 100644
index 0000000..e5ddcda
--- /dev/null
+++ b/src/backend/commands/variable.c
@@ -0,0 +1,935 @@
+/*-------------------------------------------------------------------------
+ *
+ * variable.c
+ * Routines for handling specialized SET variables.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/variable.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "access/htup_details.h"
+#include "access/parallel.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "catalog/pg_authid.h"
+#include "commands/variable.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/timestamp.h"
+#include "utils/varlena.h"
+
+/*
+ * DATESTYLE
+ */
+
+/*
+ * check_datestyle: GUC check_hook for datestyle
+ */
+bool
+check_datestyle(char **newval, void **extra, GucSource source)
+{
+ int newDateStyle = DateStyle;
+ int newDateOrder = DateOrder;
+ bool have_style = false;
+ bool have_order = false;
+ bool ok = true;
+ char *rawstring;
+ int *myextra;
+ char *result;
+ List *elemlist;
+ ListCell *l;
+
+ /* Need a modifiable copy of string */
+ rawstring = pstrdup(*newval);
+
+ /* Parse string into list of identifiers */
+ if (!SplitIdentifierString(rawstring, ',', &elemlist))
+ {
+ /* syntax error in list */
+ GUC_check_errdetail("List syntax is invalid.");
+ pfree(rawstring);
+ list_free(elemlist);
+ return false;
+ }
+
+ foreach(l, elemlist)
+ {
+ char *tok = (char *) lfirst(l);
+
+ /* Ugh. Somebody ought to write a table driven version -- mjl */
+
+ if (pg_strcasecmp(tok, "ISO") == 0)
+ {
+ if (have_style && newDateStyle != USE_ISO_DATES)
+ ok = false; /* conflicting styles */
+ newDateStyle = USE_ISO_DATES;
+ have_style = true;
+ }
+ else if (pg_strcasecmp(tok, "SQL") == 0)
+ {
+ if (have_style && newDateStyle != USE_SQL_DATES)
+ ok = false; /* conflicting styles */
+ newDateStyle = USE_SQL_DATES;
+ have_style = true;
+ }
+ else if (pg_strncasecmp(tok, "POSTGRES", 8) == 0)
+ {
+ if (have_style && newDateStyle != USE_POSTGRES_DATES)
+ ok = false; /* conflicting styles */
+ newDateStyle = USE_POSTGRES_DATES;
+ have_style = true;
+ }
+ else if (pg_strcasecmp(tok, "GERMAN") == 0)
+ {
+ if (have_style && newDateStyle != USE_GERMAN_DATES)
+ ok = false; /* conflicting styles */
+ newDateStyle = USE_GERMAN_DATES;
+ have_style = true;
+ /* GERMAN also sets DMY, unless explicitly overridden */
+ if (!have_order)
+ newDateOrder = DATEORDER_DMY;
+ }
+ else if (pg_strcasecmp(tok, "YMD") == 0)
+ {
+ if (have_order && newDateOrder != DATEORDER_YMD)
+ ok = false; /* conflicting orders */
+ newDateOrder = DATEORDER_YMD;
+ have_order = true;
+ }
+ else if (pg_strcasecmp(tok, "DMY") == 0 ||
+ pg_strncasecmp(tok, "EURO", 4) == 0)
+ {
+ if (have_order && newDateOrder != DATEORDER_DMY)
+ ok = false; /* conflicting orders */
+ newDateOrder = DATEORDER_DMY;
+ have_order = true;
+ }
+ else if (pg_strcasecmp(tok, "MDY") == 0 ||
+ pg_strcasecmp(tok, "US") == 0 ||
+ pg_strncasecmp(tok, "NONEURO", 7) == 0)
+ {
+ if (have_order && newDateOrder != DATEORDER_MDY)
+ ok = false; /* conflicting orders */
+ newDateOrder = DATEORDER_MDY;
+ have_order = true;
+ }
+ else if (pg_strcasecmp(tok, "DEFAULT") == 0)
+ {
+ /*
+ * Easiest way to get the current DEFAULT state is to fetch the
+ * DEFAULT string from guc.c and recursively parse it.
+ *
+ * We can't simply "return check_datestyle(...)" because we need
+ * to handle constructs like "DEFAULT, ISO".
+ */
+ char *subval;
+ void *subextra = NULL;
+
+ subval = strdup(GetConfigOptionResetString("datestyle"));
+ if (!subval)
+ {
+ ok = false;
+ break;
+ }
+ if (!check_datestyle(&subval, &subextra, source))
+ {
+ free(subval);
+ ok = false;
+ break;
+ }
+ myextra = (int *) subextra;
+ if (!have_style)
+ newDateStyle = myextra[0];
+ if (!have_order)
+ newDateOrder = myextra[1];
+ free(subval);
+ free(subextra);
+ }
+ else
+ {
+ GUC_check_errdetail("Unrecognized key word: \"%s\".", tok);
+ pfree(rawstring);
+ list_free(elemlist);
+ return false;
+ }
+ }
+
+ pfree(rawstring);
+ list_free(elemlist);
+
+ if (!ok)
+ {
+ GUC_check_errdetail("Conflicting \"datestyle\" specifications.");
+ return false;
+ }
+
+ /*
+ * Prepare the canonical string to return. GUC wants it malloc'd.
+ */
+ result = (char *) malloc(32);
+ if (!result)
+ return false;
+
+ switch (newDateStyle)
+ {
+ case USE_ISO_DATES:
+ strcpy(result, "ISO");
+ break;
+ case USE_SQL_DATES:
+ strcpy(result, "SQL");
+ break;
+ case USE_GERMAN_DATES:
+ strcpy(result, "German");
+ break;
+ default:
+ strcpy(result, "Postgres");
+ break;
+ }
+ switch (newDateOrder)
+ {
+ case DATEORDER_YMD:
+ strcat(result, ", YMD");
+ break;
+ case DATEORDER_DMY:
+ strcat(result, ", DMY");
+ break;
+ default:
+ strcat(result, ", MDY");
+ break;
+ }
+
+ free(*newval);
+ *newval = result;
+
+ /*
+ * Set up the "extra" struct actually used by assign_datestyle.
+ */
+ myextra = (int *) malloc(2 * sizeof(int));
+ if (!myextra)
+ return false;
+ myextra[0] = newDateStyle;
+ myextra[1] = newDateOrder;
+ *extra = (void *) myextra;
+
+ return true;
+}
+
+/*
+ * assign_datestyle: GUC assign_hook for datestyle
+ */
+void
+assign_datestyle(const char *newval, void *extra)
+{
+ int *myextra = (int *) extra;
+
+ DateStyle = myextra[0];
+ DateOrder = myextra[1];
+}
+
+
+/*
+ * TIMEZONE
+ */
+
+/*
+ * check_timezone: GUC check_hook for timezone
+ */
+bool
+check_timezone(char **newval, void **extra, GucSource source)
+{
+ pg_tz *new_tz;
+ long gmtoffset;
+ char *endptr;
+ double hours;
+
+ if (pg_strncasecmp(*newval, "interval", 8) == 0)
+ {
+ /*
+ * Support INTERVAL 'foo'. This is for SQL spec compliance, not
+ * because it has any actual real-world usefulness.
+ */
+ const char *valueptr = *newval;
+ char *val;
+ Interval *interval;
+
+ valueptr += 8;
+ while (isspace((unsigned char) *valueptr))
+ valueptr++;
+ if (*valueptr++ != '\'')
+ return false;
+ val = pstrdup(valueptr);
+ /* Check and remove trailing quote */
+ endptr = strchr(val, '\'');
+ if (!endptr || endptr[1] != '\0')
+ {
+ pfree(val);
+ return false;
+ }
+ *endptr = '\0';
+
+ /*
+ * Try to parse it. XXX an invalid interval format will result in
+ * ereport(ERROR), which is not desirable for GUC. We did what we
+ * could to guard against this in flatten_set_variable_args, but a
+ * string coming in from postgresql.conf might contain anything.
+ */
+ interval = DatumGetIntervalP(DirectFunctionCall3(interval_in,
+ CStringGetDatum(val),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1)));
+
+ pfree(val);
+ if (interval->month != 0)
+ {
+ GUC_check_errdetail("Cannot specify months in time zone interval.");
+ pfree(interval);
+ return false;
+ }
+ if (interval->day != 0)
+ {
+ GUC_check_errdetail("Cannot specify days in time zone interval.");
+ pfree(interval);
+ return false;
+ }
+
+ /* Here we change from SQL to Unix sign convention */
+ gmtoffset = -(interval->time / USECS_PER_SEC);
+ new_tz = pg_tzset_offset(gmtoffset);
+
+ pfree(interval);
+ }
+ else
+ {
+ /*
+ * Try it as a numeric number of hours (possibly fractional).
+ */
+ hours = strtod(*newval, &endptr);
+ if (endptr != *newval && *endptr == '\0')
+ {
+ /* Here we change from SQL to Unix sign convention */
+ gmtoffset = -hours * SECS_PER_HOUR;
+ new_tz = pg_tzset_offset(gmtoffset);
+ }
+ else
+ {
+ /*
+ * Otherwise assume it is a timezone name, and try to load it.
+ */
+ new_tz = pg_tzset(*newval);
+
+ if (!new_tz)
+ {
+ /* Doesn't seem to be any great value in errdetail here */
+ return false;
+ }
+
+ if (!pg_tz_acceptable(new_tz))
+ {
+ GUC_check_errmsg("time zone \"%s\" appears to use leap seconds",
+ *newval);
+ GUC_check_errdetail("PostgreSQL does not support leap seconds.");
+ return false;
+ }
+ }
+ }
+
+ /* Test for failure in pg_tzset_offset, which we assume is out-of-range */
+ if (!new_tz)
+ {
+ GUC_check_errdetail("UTC timezone offset is out of range.");
+ return false;
+ }
+
+ /*
+ * Pass back data for assign_timezone to use
+ */
+ *extra = malloc(sizeof(pg_tz *));
+ if (!*extra)
+ return false;
+ *((pg_tz **) *extra) = new_tz;
+
+ return true;
+}
+
+/*
+ * assign_timezone: GUC assign_hook for timezone
+ */
+void
+assign_timezone(const char *newval, void *extra)
+{
+ session_timezone = *((pg_tz **) extra);
+}
+
+/*
+ * show_timezone: GUC show_hook for timezone
+ */
+const char *
+show_timezone(void)
+{
+ const char *tzn;
+
+ /* Always show the zone's canonical name */
+ tzn = pg_get_timezone_name(session_timezone);
+
+ if (tzn != NULL)
+ return tzn;
+
+ return "unknown";
+}
+
+
+/*
+ * LOG_TIMEZONE
+ *
+ * For log_timezone, we don't support the interval-based methods of setting a
+ * zone, which are only there for SQL spec compliance not because they're
+ * actually useful.
+ */
+
+/*
+ * check_log_timezone: GUC check_hook for log_timezone
+ */
+bool
+check_log_timezone(char **newval, void **extra, GucSource source)
+{
+ pg_tz *new_tz;
+
+ /*
+ * Assume it is a timezone name, and try to load it.
+ */
+ new_tz = pg_tzset(*newval);
+
+ if (!new_tz)
+ {
+ /* Doesn't seem to be any great value in errdetail here */
+ return false;
+ }
+
+ if (!pg_tz_acceptable(new_tz))
+ {
+ GUC_check_errmsg("time zone \"%s\" appears to use leap seconds",
+ *newval);
+ GUC_check_errdetail("PostgreSQL does not support leap seconds.");
+ return false;
+ }
+
+ /*
+ * Pass back data for assign_log_timezone to use
+ */
+ *extra = malloc(sizeof(pg_tz *));
+ if (!*extra)
+ return false;
+ *((pg_tz **) *extra) = new_tz;
+
+ return true;
+}
+
+/*
+ * assign_log_timezone: GUC assign_hook for log_timezone
+ */
+void
+assign_log_timezone(const char *newval, void *extra)
+{
+ log_timezone = *((pg_tz **) extra);
+}
+
+/*
+ * show_log_timezone: GUC show_hook for log_timezone
+ */
+const char *
+show_log_timezone(void)
+{
+ const char *tzn;
+
+ /* Always show the zone's canonical name */
+ tzn = pg_get_timezone_name(log_timezone);
+
+ if (tzn != NULL)
+ return tzn;
+
+ return "unknown";
+}
+
+
+/*
+ * SET TRANSACTION READ ONLY and SET TRANSACTION READ WRITE
+ *
+ * We allow idempotent changes (r/w -> r/w and r/o -> r/o) at any time, and
+ * we also always allow changes from read-write to read-only. However,
+ * read-only may be changed to read-write only when in a top-level transaction
+ * that has not yet taken an initial snapshot. Can't do it in a hot standby,
+ * either.
+ *
+ * If we are not in a transaction at all, just allow the change; it means
+ * nothing since XactReadOnly will be reset by the next StartTransaction().
+ * The IsTransactionState() test protects us against trying to check
+ * RecoveryInProgress() in contexts where shared memory is not accessible.
+ * (Similarly, if we're restoring state in a parallel worker, just allow
+ * the change.)
+ */
+bool
+check_transaction_read_only(bool *newval, void **extra, GucSource source)
+{
+ if (*newval == false && XactReadOnly && IsTransactionState() && !InitializingParallelWorker)
+ {
+ /* Can't go to r/w mode inside a r/o transaction */
+ if (IsSubTransaction())
+ {
+ GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION);
+ GUC_check_errmsg("cannot set transaction read-write mode inside a read-only transaction");
+ return false;
+ }
+ /* Top level transaction can't change to r/w after first snapshot. */
+ if (FirstSnapshotSet)
+ {
+ GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION);
+ GUC_check_errmsg("transaction read-write mode must be set before any query");
+ return false;
+ }
+ /* Can't go to r/w mode while recovery is still active */
+ if (RecoveryInProgress())
+ {
+ GUC_check_errcode(ERRCODE_FEATURE_NOT_SUPPORTED);
+ GUC_check_errmsg("cannot set transaction read-write mode during recovery");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/*
+ * SET TRANSACTION ISOLATION LEVEL
+ *
+ * We allow idempotent changes at any time, but otherwise this can only be
+ * changed in a toplevel transaction that has not yet taken a snapshot.
+ *
+ * As in check_transaction_read_only, allow it if not inside a transaction.
+ */
+bool
+check_XactIsoLevel(int *newval, void **extra, GucSource source)
+{
+ int newXactIsoLevel = *newval;
+
+ if (newXactIsoLevel != XactIsoLevel && IsTransactionState())
+ {
+ if (FirstSnapshotSet)
+ {
+ GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION);
+ GUC_check_errmsg("SET TRANSACTION ISOLATION LEVEL must be called before any query");
+ return false;
+ }
+ /* We ignore a subtransaction setting it to the existing value. */
+ if (IsSubTransaction())
+ {
+ GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION);
+ GUC_check_errmsg("SET TRANSACTION ISOLATION LEVEL must not be called in a subtransaction");
+ return false;
+ }
+ /* Can't go to serializable mode while recovery is still active */
+ if (newXactIsoLevel == XACT_SERIALIZABLE && RecoveryInProgress())
+ {
+ GUC_check_errcode(ERRCODE_FEATURE_NOT_SUPPORTED);
+ GUC_check_errmsg("cannot use serializable mode in a hot standby");
+ GUC_check_errhint("You can use REPEATABLE READ instead.");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/*
+ * SET TRANSACTION [NOT] DEFERRABLE
+ */
+
+bool
+check_transaction_deferrable(bool *newval, void **extra, GucSource source)
+{
+ if (IsSubTransaction())
+ {
+ GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION);
+ GUC_check_errmsg("SET TRANSACTION [NOT] DEFERRABLE cannot be called within a subtransaction");
+ return false;
+ }
+ if (FirstSnapshotSet)
+ {
+ GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION);
+ GUC_check_errmsg("SET TRANSACTION [NOT] DEFERRABLE must be called before any query");
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Random number seed
+ *
+ * We can't roll back the random sequence on error, and we don't want
+ * config file reloads to affect it, so we only want interactive SET SEED
+ * commands to set it. We use the "extra" storage to ensure that rollbacks
+ * don't try to do the operation again.
+ */
+
+bool
+check_random_seed(double *newval, void **extra, GucSource source)
+{
+ *extra = malloc(sizeof(int));
+ if (!*extra)
+ return false;
+ /* Arm the assign only if source of value is an interactive SET */
+ *((int *) *extra) = (source >= PGC_S_INTERACTIVE);
+
+ return true;
+}
+
+void
+assign_random_seed(double newval, void *extra)
+{
+ /* We'll do this at most once for any setting of the GUC variable */
+ if (*((int *) extra))
+ DirectFunctionCall1(setseed, Float8GetDatum(newval));
+ *((int *) extra) = 0;
+}
+
+const char *
+show_random_seed(void)
+{
+ return "unavailable";
+}
+
+
+/*
+ * SET CLIENT_ENCODING
+ */
+
+bool
+check_client_encoding(char **newval, void **extra, GucSource source)
+{
+ int encoding;
+ const char *canonical_name;
+
+ /* Look up the encoding by name */
+ encoding = pg_valid_client_encoding(*newval);
+ if (encoding < 0)
+ return false;
+
+ /* Get the canonical name (no aliases, uniform case) */
+ canonical_name = pg_encoding_to_char(encoding);
+
+ /*
+ * If we are not within a transaction then PrepareClientEncoding will not
+ * be able to look up the necessary conversion procs. If we are still
+ * starting up, it will return "OK" anyway, and InitializeClientEncoding
+ * will fix things once initialization is far enough along. After
+ * startup, we'll fail. This would only happen if someone tries to change
+ * client_encoding in postgresql.conf and then SIGHUP existing sessions.
+ * It seems like a bad idea for client_encoding to change that way anyhow,
+ * so we don't go out of our way to support it.
+ *
+ * Note: in the postmaster, or any other process that never calls
+ * InitializeClientEncoding, PrepareClientEncoding will always succeed,
+ * and so will SetClientEncoding; but they won't do anything, which is OK.
+ */
+ if (PrepareClientEncoding(encoding) < 0)
+ {
+ if (IsTransactionState())
+ {
+ /* Must be a genuine no-such-conversion problem */
+ GUC_check_errcode(ERRCODE_FEATURE_NOT_SUPPORTED);
+ GUC_check_errdetail("Conversion between %s and %s is not supported.",
+ canonical_name,
+ GetDatabaseEncodingName());
+ }
+ else
+ {
+ /* Provide a useful complaint */
+ GUC_check_errdetail("Cannot change \"client_encoding\" now.");
+ }
+ return false;
+ }
+
+ /*
+ * Replace the user-supplied string with the encoding's canonical name.
+ * This gets rid of aliases and case-folding variations.
+ *
+ * XXX Although canonicalizing seems like a good idea in the abstract, it
+ * breaks pre-9.1 JDBC drivers, which expect that if they send "UNICODE"
+ * as the client_encoding setting then it will read back the same way. As
+ * a workaround, don't replace the string if it's "UNICODE". Remove that
+ * hack when pre-9.1 JDBC drivers are no longer in use.
+ */
+ if (strcmp(*newval, canonical_name) != 0 &&
+ strcmp(*newval, "UNICODE") != 0)
+ {
+ free(*newval);
+ *newval = strdup(canonical_name);
+ if (!*newval)
+ return false;
+ }
+
+ /*
+ * Save the encoding's ID in *extra, for use by assign_client_encoding.
+ */
+ *extra = malloc(sizeof(int));
+ if (!*extra)
+ return false;
+ *((int *) *extra) = encoding;
+
+ return true;
+}
+
+void
+assign_client_encoding(const char *newval, void *extra)
+{
+ int encoding = *((int *) extra);
+
+ /*
+ * Parallel workers send data to the leader, not the client. They always
+ * send data using the database encoding.
+ */
+ if (IsParallelWorker())
+ {
+ /*
+ * During parallel worker startup, we want to accept the leader's
+ * client_encoding setting so that anyone who looks at the value in
+ * the worker sees the same value that they would see in the leader.
+ */
+ if (InitializingParallelWorker)
+ return;
+
+ /*
+ * A change other than during startup, for example due to a SET clause
+ * attached to a function definition, should be rejected, as there is
+ * nothing we can do inside the worker to make it take effect.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+ errmsg("cannot change client_encoding during a parallel operation")));
+ }
+
+ /* We do not expect an error if PrepareClientEncoding succeeded */
+ if (SetClientEncoding(encoding) < 0)
+ elog(LOG, "SetClientEncoding(%d) failed", encoding);
+}
+
+
+/*
+ * SET SESSION AUTHORIZATION
+ */
+
+typedef struct
+{
+ /* This is the "extra" state for both SESSION AUTHORIZATION and ROLE */
+ Oid roleid;
+ bool is_superuser;
+} role_auth_extra;
+
+bool
+check_session_authorization(char **newval, void **extra, GucSource source)
+{
+ HeapTuple roleTup;
+ Form_pg_authid roleform;
+ Oid roleid;
+ bool is_superuser;
+ role_auth_extra *myextra;
+
+ /* Do nothing for the boot_val default of NULL */
+ if (*newval == NULL)
+ return true;
+
+ if (!IsTransactionState())
+ {
+ /*
+ * Can't do catalog lookups, so fail. The result of this is that
+ * session_authorization cannot be set in postgresql.conf, which seems
+ * like a good thing anyway, so we don't work hard to avoid it.
+ */
+ return false;
+ }
+
+ /* Look up the username */
+ roleTup = SearchSysCache1(AUTHNAME, PointerGetDatum(*newval));
+ if (!HeapTupleIsValid(roleTup))
+ {
+ /*
+ * When source == PGC_S_TEST, we don't throw a hard error for a
+ * nonexistent user name, only a NOTICE. See comments in guc.h.
+ */
+ if (source == PGC_S_TEST)
+ {
+ ereport(NOTICE,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("role \"%s\" does not exist", *newval)));
+ return true;
+ }
+ GUC_check_errmsg("role \"%s\" does not exist", *newval);
+ return false;
+ }
+
+ roleform = (Form_pg_authid) GETSTRUCT(roleTup);
+ roleid = roleform->oid;
+ is_superuser = roleform->rolsuper;
+
+ ReleaseSysCache(roleTup);
+
+ /* Set up "extra" struct for assign_session_authorization to use */
+ myextra = (role_auth_extra *) malloc(sizeof(role_auth_extra));
+ if (!myextra)
+ return false;
+ myextra->roleid = roleid;
+ myextra->is_superuser = is_superuser;
+ *extra = (void *) myextra;
+
+ return true;
+}
+
+void
+assign_session_authorization(const char *newval, void *extra)
+{
+ role_auth_extra *myextra = (role_auth_extra *) extra;
+
+ /* Do nothing for the boot_val default of NULL */
+ if (!myextra)
+ return;
+
+ SetSessionAuthorization(myextra->roleid, myextra->is_superuser);
+}
+
+
+/*
+ * SET ROLE
+ *
+ * The SQL spec requires "SET ROLE NONE" to unset the role, so we hardwire
+ * a translation of "none" to InvalidOid. Otherwise this is much like
+ * SET SESSION AUTHORIZATION.
+ */
+extern char *role_string; /* in guc.c */
+
+bool
+check_role(char **newval, void **extra, GucSource source)
+{
+ HeapTuple roleTup;
+ Oid roleid;
+ bool is_superuser;
+ role_auth_extra *myextra;
+ Form_pg_authid roleform;
+
+ if (strcmp(*newval, "none") == 0)
+ {
+ /* hardwired translation */
+ roleid = InvalidOid;
+ is_superuser = false;
+ }
+ else
+ {
+ if (!IsTransactionState())
+ {
+ /*
+ * Can't do catalog lookups, so fail. The result of this is that
+ * role cannot be set in postgresql.conf, which seems like a good
+ * thing anyway, so we don't work hard to avoid it.
+ */
+ return false;
+ }
+
+ /*
+ * When source == PGC_S_TEST, we don't throw a hard error for a
+ * nonexistent user name or insufficient privileges, only a NOTICE.
+ * See comments in guc.h.
+ */
+
+ /* Look up the username */
+ roleTup = SearchSysCache1(AUTHNAME, PointerGetDatum(*newval));
+ if (!HeapTupleIsValid(roleTup))
+ {
+ if (source == PGC_S_TEST)
+ {
+ ereport(NOTICE,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("role \"%s\" does not exist", *newval)));
+ return true;
+ }
+ GUC_check_errmsg("role \"%s\" does not exist", *newval);
+ return false;
+ }
+
+ roleform = (Form_pg_authid) GETSTRUCT(roleTup);
+ roleid = roleform->oid;
+ is_superuser = roleform->rolsuper;
+
+ ReleaseSysCache(roleTup);
+
+ /*
+ * Verify that session user is allowed to become this role, but skip
+ * this in parallel mode, where we must blindly recreate the parallel
+ * leader's state.
+ */
+ if (!InitializingParallelWorker &&
+ !is_member_of_role(GetSessionUserId(), roleid))
+ {
+ if (source == PGC_S_TEST)
+ {
+ ereport(NOTICE,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission will be denied to set role \"%s\"",
+ *newval)));
+ return true;
+ }
+ GUC_check_errcode(ERRCODE_INSUFFICIENT_PRIVILEGE);
+ GUC_check_errmsg("permission denied to set role \"%s\"",
+ *newval);
+ return false;
+ }
+ }
+
+ /* Set up "extra" struct for assign_role to use */
+ myextra = (role_auth_extra *) malloc(sizeof(role_auth_extra));
+ if (!myextra)
+ return false;
+ myextra->roleid = roleid;
+ myextra->is_superuser = is_superuser;
+ *extra = (void *) myextra;
+
+ return true;
+}
+
+void
+assign_role(const char *newval, void *extra)
+{
+ role_auth_extra *myextra = (role_auth_extra *) extra;
+
+ SetCurrentRoleId(myextra->roleid, myextra->is_superuser);
+}
+
+const char *
+show_role(void)
+{
+ /*
+ * Check whether SET ROLE is active; if not return "none". This is a
+ * kluge to deal with the fact that SET SESSION AUTHORIZATION logically
+ * resets SET ROLE to NONE, but we cannot set the GUC role variable from
+ * assign_session_authorization (because we haven't got enough info to
+ * call set_config_option).
+ */
+ if (!OidIsValid(GetCurrentRoleId()))
+ return "none";
+
+ /* Otherwise we can just use the GUC string */
+ return role_string ? role_string : "none";
+}
diff --git a/src/backend/commands/view.c b/src/backend/commands/view.c
new file mode 100644
index 0000000..b5a0fc0
--- /dev/null
+++ b/src/backend/commands/view.c
@@ -0,0 +1,604 @@
+/*-------------------------------------------------------------------------
+ *
+ * view.c
+ * use rewrite rules to construct views
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/view.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/relation.h"
+#include "access/xact.h"
+#include "catalog/namespace.h"
+#include "commands/defrem.h"
+#include "commands/tablecmds.h"
+#include "commands/view.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/analyze.h"
+#include "parser/parse_relation.h"
+#include "rewrite/rewriteDefine.h"
+#include "rewrite/rewriteHandler.h"
+#include "rewrite/rewriteManip.h"
+#include "rewrite/rewriteSupport.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+static void checkViewTupleDesc(TupleDesc newdesc, TupleDesc olddesc);
+
+/*---------------------------------------------------------------------
+ * DefineVirtualRelation
+ *
+ * Create a view relation and use the rules system to store the query
+ * for the view.
+ *
+ * EventTriggerAlterTableStart must have been called already.
+ *---------------------------------------------------------------------
+ */
+static ObjectAddress
+DefineVirtualRelation(RangeVar *relation, List *tlist, bool replace,
+ List *options, Query *viewParse)
+{
+ Oid viewOid;
+ LOCKMODE lockmode;
+ CreateStmt *createStmt = makeNode(CreateStmt);
+ List *attrList;
+ ListCell *t;
+
+ /*
+ * create a list of ColumnDef nodes based on the names and types of the
+ * (non-junk) targetlist items from the view's SELECT list.
+ */
+ attrList = NIL;
+ foreach(t, tlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(t);
+
+ if (!tle->resjunk)
+ {
+ ColumnDef *def = makeColumnDef(tle->resname,
+ exprType((Node *) tle->expr),
+ exprTypmod((Node *) tle->expr),
+ exprCollation((Node *) tle->expr));
+
+ /*
+ * It's possible that the column is of a collatable type but the
+ * collation could not be resolved, so double-check.
+ */
+ if (type_is_collatable(exprType((Node *) tle->expr)))
+ {
+ if (!OidIsValid(def->collOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for view column \"%s\"",
+ def->colname),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+ else
+ Assert(!OidIsValid(def->collOid));
+
+ attrList = lappend(attrList, def);
+ }
+ }
+
+ /*
+ * Look up, check permissions on, and lock the creation namespace; also
+ * check for a preexisting view with the same name. This will also set
+ * relation->relpersistence to RELPERSISTENCE_TEMP if the selected
+ * namespace is temporary.
+ */
+ lockmode = replace ? AccessExclusiveLock : NoLock;
+ (void) RangeVarGetAndCheckCreationNamespace(relation, lockmode, &viewOid);
+
+ if (OidIsValid(viewOid) && replace)
+ {
+ Relation rel;
+ TupleDesc descriptor;
+ List *atcmds = NIL;
+ AlterTableCmd *atcmd;
+ ObjectAddress address;
+
+ /* Relation is already locked, but we must build a relcache entry. */
+ rel = relation_open(viewOid, NoLock);
+
+ /* Make sure it *is* a view. */
+ if (rel->rd_rel->relkind != RELKIND_VIEW)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a view",
+ RelationGetRelationName(rel))));
+
+ /* Also check it's not in use already */
+ CheckTableNotInUse(rel, "CREATE OR REPLACE VIEW");
+
+ /*
+ * Due to the namespace visibility rules for temporary objects, we
+ * should only end up replacing a temporary view with another
+ * temporary view, and similarly for permanent views.
+ */
+ Assert(relation->relpersistence == rel->rd_rel->relpersistence);
+
+ /*
+ * Create a tuple descriptor to compare against the existing view, and
+ * verify that the old column list is an initial prefix of the new
+ * column list.
+ */
+ descriptor = BuildDescForRelation(attrList);
+ checkViewTupleDesc(descriptor, rel->rd_att);
+
+ /*
+ * If new attributes have been added, we must add pg_attribute entries
+ * for them. It is convenient (although overkill) to use the ALTER
+ * TABLE ADD COLUMN infrastructure for this.
+ *
+ * Note that we must do this before updating the query for the view,
+ * since the rules system requires that the correct view columns be in
+ * place when defining the new rules.
+ *
+ * Also note that ALTER TABLE doesn't run parse transformation on
+ * AT_AddColumnToView commands. The ColumnDef we supply must be ready
+ * to execute as-is.
+ */
+ if (list_length(attrList) > rel->rd_att->natts)
+ {
+ ListCell *c;
+ int skip = rel->rd_att->natts;
+
+ foreach(c, attrList)
+ {
+ if (skip > 0)
+ {
+ skip--;
+ continue;
+ }
+ atcmd = makeNode(AlterTableCmd);
+ atcmd->subtype = AT_AddColumnToView;
+ atcmd->def = (Node *) lfirst(c);
+ atcmds = lappend(atcmds, atcmd);
+ }
+
+ /* EventTriggerAlterTableStart called by ProcessUtilitySlow */
+ AlterTableInternal(viewOid, atcmds, true);
+
+ /* Make the new view columns visible */
+ CommandCounterIncrement();
+ }
+
+ /*
+ * Update the query for the view.
+ *
+ * Note that we must do this before updating the view options, because
+ * the new options may not be compatible with the old view query (for
+ * example if we attempt to add the WITH CHECK OPTION, we require that
+ * the new view be automatically updatable, but the old view may not
+ * have been).
+ */
+ StoreViewQuery(viewOid, viewParse, replace);
+
+ /* Make the new view query visible */
+ CommandCounterIncrement();
+
+ /*
+ * Update the view's options.
+ *
+ * The new options list replaces the existing options list, even if
+ * it's empty.
+ */
+ atcmd = makeNode(AlterTableCmd);
+ atcmd->subtype = AT_ReplaceRelOptions;
+ atcmd->def = (Node *) options;
+ atcmds = list_make1(atcmd);
+
+ /* EventTriggerAlterTableStart called by ProcessUtilitySlow */
+ AlterTableInternal(viewOid, atcmds, true);
+
+ /*
+ * There is very little to do here to update the view's dependencies.
+ * Most view-level dependency relationships, such as those on the
+ * owner, schema, and associated composite type, aren't changing.
+ * Because we don't allow changing type or collation of an existing
+ * view column, those dependencies of the existing columns don't
+ * change either, while the AT_AddColumnToView machinery took care of
+ * adding such dependencies for new view columns. The dependencies of
+ * the view's query could have changed arbitrarily, but that was dealt
+ * with inside StoreViewQuery. What remains is only to check that
+ * view replacement is allowed when we're creating an extension.
+ */
+ ObjectAddressSet(address, RelationRelationId, viewOid);
+
+ recordDependencyOnCurrentExtension(&address, true);
+
+ /*
+ * Seems okay, so return the OID of the pre-existing view.
+ */
+ relation_close(rel, NoLock); /* keep the lock! */
+
+ return address;
+ }
+ else
+ {
+ ObjectAddress address;
+
+ /*
+ * Set the parameters for keys/inheritance etc. All of these are
+ * uninteresting for views...
+ */
+ createStmt->relation = relation;
+ createStmt->tableElts = attrList;
+ createStmt->inhRelations = NIL;
+ createStmt->constraints = NIL;
+ createStmt->options = options;
+ createStmt->oncommit = ONCOMMIT_NOOP;
+ createStmt->tablespacename = NULL;
+ createStmt->if_not_exists = false;
+
+ /*
+ * Create the relation (this will error out if there's an existing
+ * view, so we don't need more code to complain if "replace" is
+ * false).
+ */
+ address = DefineRelation(createStmt, RELKIND_VIEW, InvalidOid, NULL,
+ NULL);
+ Assert(address.objectId != InvalidOid);
+
+ /* Make the new view relation visible */
+ CommandCounterIncrement();
+
+ /* Store the query for the view */
+ StoreViewQuery(address.objectId, viewParse, replace);
+
+ return address;
+ }
+}
+
+/*
+ * Verify that tupledesc associated with proposed new view definition
+ * matches tupledesc of old view. This is basically a cut-down version
+ * of equalTupleDescs(), with code added to generate specific complaints.
+ * Also, we allow the new tupledesc to have more columns than the old.
+ */
+static void
+checkViewTupleDesc(TupleDesc newdesc, TupleDesc olddesc)
+{
+ int i;
+
+ if (newdesc->natts < olddesc->natts)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot drop columns from view")));
+
+ for (i = 0; i < olddesc->natts; i++)
+ {
+ Form_pg_attribute newattr = TupleDescAttr(newdesc, i);
+ Form_pg_attribute oldattr = TupleDescAttr(olddesc, i);
+
+ /* XXX msg not right, but we don't support DROP COL on view anyway */
+ if (newattr->attisdropped != oldattr->attisdropped)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot drop columns from view")));
+
+ if (strcmp(NameStr(newattr->attname), NameStr(oldattr->attname)) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot change name of view column \"%s\" to \"%s\"",
+ NameStr(oldattr->attname),
+ NameStr(newattr->attname)),
+ errhint("Use ALTER VIEW ... RENAME COLUMN ... to change name of view column instead.")));
+
+ /*
+ * We cannot allow type, typmod, or collation to change, since these
+ * properties may be embedded in Vars of other views/rules referencing
+ * this one. Other column attributes can be ignored.
+ */
+ if (newattr->atttypid != oldattr->atttypid ||
+ newattr->atttypmod != oldattr->atttypmod)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot change data type of view column \"%s\" from %s to %s",
+ NameStr(oldattr->attname),
+ format_type_with_typemod(oldattr->atttypid,
+ oldattr->atttypmod),
+ format_type_with_typemod(newattr->atttypid,
+ newattr->atttypmod))));
+
+ /*
+ * At this point, attcollations should be both valid or both invalid,
+ * so applying get_collation_name unconditionally should be fine.
+ */
+ if (newattr->attcollation != oldattr->attcollation)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot change collation of view column \"%s\" from \"%s\" to \"%s\"",
+ NameStr(oldattr->attname),
+ get_collation_name(oldattr->attcollation),
+ get_collation_name(newattr->attcollation))));
+ }
+
+ /*
+ * We ignore the constraint fields. The new view desc can't have any
+ * constraints, and the only ones that could be on the old view are
+ * defaults, which we are happy to leave in place.
+ */
+}
+
+static void
+DefineViewRules(Oid viewOid, Query *viewParse, bool replace)
+{
+ /*
+ * Set up the ON SELECT rule. Since the query has already been through
+ * parse analysis, we use DefineQueryRewrite() directly.
+ */
+ DefineQueryRewrite(pstrdup(ViewSelectRuleName),
+ viewOid,
+ NULL,
+ CMD_SELECT,
+ true,
+ replace,
+ list_make1(viewParse));
+
+ /*
+ * Someday: automatic ON INSERT, etc
+ */
+}
+
+/*---------------------------------------------------------------
+ * UpdateRangeTableOfViewParse
+ *
+ * Update the range table of the given parsetree.
+ * This update consists of adding two new entries IN THE BEGINNING
+ * of the range table (otherwise the rule system will die a slow,
+ * horrible and painful death, and we do not want that now, do we?)
+ * one for the OLD relation and one for the NEW one (both of
+ * them refer in fact to the "view" relation).
+ *
+ * Of course we must also increase the 'varnos' of all the Var nodes
+ * by 2...
+ *
+ * These extra RT entries are not actually used in the query,
+ * except for run-time locking and permission checking.
+ *---------------------------------------------------------------
+ */
+static Query *
+UpdateRangeTableOfViewParse(Oid viewOid, Query *viewParse)
+{
+ Relation viewRel;
+ List *new_rt;
+ ParseNamespaceItem *nsitem;
+ RangeTblEntry *rt_entry1,
+ *rt_entry2;
+ ParseState *pstate;
+
+ /*
+ * Make a copy of the given parsetree. It's not so much that we don't
+ * want to scribble on our input, it's that the parser has a bad habit of
+ * outputting multiple links to the same subtree for constructs like
+ * BETWEEN, and we mustn't have OffsetVarNodes increment the varno of a
+ * Var node twice. copyObject will expand any multiply-referenced subtree
+ * into multiple copies.
+ */
+ viewParse = copyObject(viewParse);
+
+ /* Create a dummy ParseState for addRangeTableEntryForRelation */
+ pstate = make_parsestate(NULL);
+
+ /* need to open the rel for addRangeTableEntryForRelation */
+ viewRel = relation_open(viewOid, AccessShareLock);
+
+ /*
+ * Create the 2 new range table entries and form the new range table...
+ * OLD first, then NEW....
+ */
+ nsitem = addRangeTableEntryForRelation(pstate, viewRel,
+ AccessShareLock,
+ makeAlias("old", NIL),
+ false, false);
+ rt_entry1 = nsitem->p_rte;
+ nsitem = addRangeTableEntryForRelation(pstate, viewRel,
+ AccessShareLock,
+ makeAlias("new", NIL),
+ false, false);
+ rt_entry2 = nsitem->p_rte;
+
+ /* Must override addRangeTableEntry's default access-check flags */
+ rt_entry1->requiredPerms = 0;
+ rt_entry2->requiredPerms = 0;
+
+ new_rt = lcons(rt_entry1, lcons(rt_entry2, viewParse->rtable));
+
+ viewParse->rtable = new_rt;
+
+ /*
+ * Now offset all var nodes by 2, and jointree RT indexes too.
+ */
+ OffsetVarNodes((Node *) viewParse, 2, 0);
+
+ relation_close(viewRel, AccessShareLock);
+
+ return viewParse;
+}
+
+/*
+ * DefineView
+ * Execute a CREATE VIEW command.
+ */
+ObjectAddress
+DefineView(ViewStmt *stmt, const char *queryString,
+ int stmt_location, int stmt_len)
+{
+ RawStmt *rawstmt;
+ Query *viewParse;
+ RangeVar *view;
+ ListCell *cell;
+ bool check_option;
+ ObjectAddress address;
+
+ /*
+ * Run parse analysis to convert the raw parse tree to a Query. Note this
+ * also acquires sufficient locks on the source table(s).
+ */
+ rawstmt = makeNode(RawStmt);
+ rawstmt->stmt = stmt->query;
+ rawstmt->stmt_location = stmt_location;
+ rawstmt->stmt_len = stmt_len;
+
+ viewParse = parse_analyze_fixedparams(rawstmt, queryString, NULL, 0, NULL);
+
+ /*
+ * The grammar should ensure that the result is a single SELECT Query.
+ * However, it doesn't forbid SELECT INTO, so we have to check for that.
+ */
+ if (!IsA(viewParse, Query))
+ elog(ERROR, "unexpected parse analysis result");
+ if (viewParse->utilityStmt != NULL &&
+ IsA(viewParse->utilityStmt, CreateTableAsStmt))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("views must not contain SELECT INTO")));
+ if (viewParse->commandType != CMD_SELECT)
+ elog(ERROR, "unexpected parse analysis result");
+
+ /*
+ * Check for unsupported cases. These tests are redundant with ones in
+ * DefineQueryRewrite(), but that function will complain about a bogus ON
+ * SELECT rule, and we'd rather the message complain about a view.
+ */
+ if (viewParse->hasModifyingCTE)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("views must not contain data-modifying statements in WITH")));
+
+ /*
+ * If the user specified the WITH CHECK OPTION, add it to the list of
+ * reloptions.
+ */
+ if (stmt->withCheckOption == LOCAL_CHECK_OPTION)
+ stmt->options = lappend(stmt->options,
+ makeDefElem("check_option",
+ (Node *) makeString("local"), -1));
+ else if (stmt->withCheckOption == CASCADED_CHECK_OPTION)
+ stmt->options = lappend(stmt->options,
+ makeDefElem("check_option",
+ (Node *) makeString("cascaded"), -1));
+
+ /*
+ * Check that the view is auto-updatable if WITH CHECK OPTION was
+ * specified.
+ */
+ check_option = false;
+
+ foreach(cell, stmt->options)
+ {
+ DefElem *defel = (DefElem *) lfirst(cell);
+
+ if (strcmp(defel->defname, "check_option") == 0)
+ check_option = true;
+ }
+
+ /*
+ * If the check option is specified, look to see if the view is actually
+ * auto-updatable or not.
+ */
+ if (check_option)
+ {
+ const char *view_updatable_error =
+ view_query_is_auto_updatable(viewParse, true);
+
+ if (view_updatable_error)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("WITH CHECK OPTION is supported only on automatically updatable views"),
+ errhint("%s", _(view_updatable_error))));
+ }
+
+ /*
+ * If a list of column names was given, run through and insert these into
+ * the actual query tree. - thomas 2000-03-08
+ */
+ if (stmt->aliases != NIL)
+ {
+ ListCell *alist_item = list_head(stmt->aliases);
+ ListCell *targetList;
+
+ foreach(targetList, viewParse->targetList)
+ {
+ TargetEntry *te = lfirst_node(TargetEntry, targetList);
+
+ /* junk columns don't get aliases */
+ if (te->resjunk)
+ continue;
+ te->resname = pstrdup(strVal(lfirst(alist_item)));
+ alist_item = lnext(stmt->aliases, alist_item);
+ if (alist_item == NULL)
+ break; /* done assigning aliases */
+ }
+
+ if (alist_item != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("CREATE VIEW specifies more column "
+ "names than columns")));
+ }
+
+ /* Unlogged views are not sensible. */
+ if (stmt->view->relpersistence == RELPERSISTENCE_UNLOGGED)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("views cannot be unlogged because they do not have storage")));
+
+ /*
+ * If the user didn't explicitly ask for a temporary view, check whether
+ * we need one implicitly. We allow TEMP to be inserted automatically as
+ * long as the CREATE command is consistent with that --- no explicit
+ * schema name.
+ */
+ view = copyObject(stmt->view); /* don't corrupt original command */
+ if (view->relpersistence == RELPERSISTENCE_PERMANENT
+ && isQueryUsingTempRelation(viewParse))
+ {
+ view->relpersistence = RELPERSISTENCE_TEMP;
+ ereport(NOTICE,
+ (errmsg("view \"%s\" will be a temporary view",
+ view->relname)));
+ }
+
+ /*
+ * Create the view relation
+ *
+ * NOTE: if it already exists and replace is false, the xact will be
+ * aborted.
+ */
+ address = DefineVirtualRelation(view, viewParse->targetList,
+ stmt->replace, stmt->options, viewParse);
+
+ return address;
+}
+
+/*
+ * Use the rules system to store the query for the view.
+ */
+void
+StoreViewQuery(Oid viewOid, Query *viewParse, bool replace)
+{
+ /*
+ * The range table of 'viewParse' does not contain entries for the "OLD"
+ * and "NEW" relations. So... add them!
+ */
+ viewParse = UpdateRangeTableOfViewParse(viewOid, viewParse);
+
+ /*
+ * Now create the rules associated with the view.
+ */
+ DefineViewRules(viewOid, viewParse, replace);
+}